#ifndef V8_TOKEN_H_
#define V8_TOKEN_H_
namespace v8 { namespace internal {
// TOKEN_LIST takes a list of 3 macros M, all of which satisfy the
// same signature M(name, string, precedence), where name is the
// symbolic token name, string is the corresponding syntactic symbol
// (or NULL, for literals), and precedence is the precedence (or 0).
// The parameters are invoked for token categories as follows:
//
// T: Non-keyword tokens
// K: Keyword tokens
// F: Future (reserved) keyword tokens
// IGNORE_TOKEN is a convenience macro that can be supplied as
// an argument (at any position) for a TOKEN_LIST call. It does
// nothing with tokens belonging to the respective category.
#define IGNORE_TOKEN(name, string, precedence)
#define TOKEN_LIST(T, K, F) \
/* End of source indicator. */ \
T(EOS, "EOS", 0) \
\
/* Punctuators (ECMA-262, section 7.7, page 15). */ \
T(LPAREN, "(", 0) \
T(RPAREN, ")", 0) \
T(LBRACK, "[", 0) \
T(RBRACK, "]", 0) \
T(LBRACE, "{", 0) \
T(RBRACE, "}", 0) \
T(COLON, ":", 0) \
T(SEMICOLON, ";", 0) \
T(PERIOD, ".", 0) \
T(CONDITIONAL, "?", 3) \
T(INC, "++", 0) \
T(DEC, "--", 0) \
\
/* Assignment operators. */ \
/* IsAssignmentOp() relies on this block of enum values */ \
/* being contiguous and sorted in the same order! */ \
T(INIT_VAR, "=init_var", 2) /* AST-use only. */ \
T(INIT_CONST, "=init_const", 2) /* AST-use only. */ \
T(ASSIGN, "=", 2) \
T(ASSIGN_BIT_OR, "|=", 2) \
T(ASSIGN_BIT_XOR, "^=", 2) \
T(ASSIGN_BIT_AND, "&=", 2) \
T(ASSIGN_SHL, "<<=", 2) \
T(ASSIGN_SAR, ">>=", 2) \
T(ASSIGN_SHR, ">>>=", 2) \
T(ASSIGN_ADD, "+=", 2) \
T(ASSIGN_SUB, "-=", 2) \
T(ASSIGN_MUL, "*=", 2) \
T(ASSIGN_DIV, "/=", 2) \
T(ASSIGN_MOD, "%=", 2) \
\
/* Binary operators sorted by precedence. */ \
/* IsBinaryOp() relies on this block of enum values */ \
/* being contiguous and sorted in the same order! */ \
T(COMMA, ",", 1) \
T(OR, "||", 4) \
T(AND, "&&", 5) \
T(BIT_OR, "|", 6) \
T(BIT_XOR, "^", 7) \
T(BIT_AND, "&", 8) \
T(SHL, "<<", 11) \
T(SAR, ">>", 11) \
T(SHR, ">>>", 11) \
T(ADD, "+", 12) \
T(SUB, "-", 12) \
T(MUL, "*", 13) \
T(DIV, "/", 13) \
T(MOD, "%", 13) \
\
/* Compare operators sorted by precedence. */ \
/* IsCompareOp() relies on this block of enum values */ \
/* being contiguous and sorted in the same order! */ \
T(EQ, "==", 9) \
T(NE, "!=", 9) \
T(EQ_STRICT, "===", 9) \
T(NE_STRICT, "!==", 9) \
T(LT, "<", 10) \
T(GT, ">", 10) \
T(LTE, "<=", 10) \
T(GTE, ">=", 10) \
K(INSTANCEOF, "instanceof", 10) \
K(IN, "in", 10) \
\
/* Unary operators. */ \
/* IsUnaryOp() relies on this block of enum values */ \
/* being contiguous and sorted in the same order! */ \
T(NOT, "!", 0) \
T(BIT_NOT, "~", 0) \
K(DELETE, "delete", 0) \
K(TYPEOF, "typeof", 0) \
K(VOID, "void", 0) \
\
/* Keywords (ECMA-262, section 7.5.2, page 13). */ \
K(BREAK, "break", 0) \
K(CASE, "case", 0) \
K(CATCH, "catch", 0) \
K(CONTINUE, "continue", 0) \
K(DEBUGGER, "debugger", 0) \
K(DEFAULT, "default", 0) \
/* DELETE */ \
K(DO, "do", 0) \
K(ELSE, "else", 0) \
K(FINALLY, "finally", 0) \
K(FOR, "for", 0) \
K(FUNCTION, "function", 0) \
K(IF, "if", 0) \
/* IN */ \
/* INSTANCEOF */ \
K(NEW, "new", 0) \
K(RETURN, "return", 0) \
K(SWITCH, "switch", 0) \
K(THIS, "this", 0) \
K(THROW, "throw", 0) \
K(TRY, "try", 0) \
/* TYPEOF */ \
K(VAR, "var", 0) \
/* VOID */ \
K(WHILE, "while", 0) \
K(WITH, "with", 0) \
\
/* Future reserved words (ECMA-262, section 7.5.3, page 14). */ \
F(ABSTRACT, "abstract", 0) \
F(BOOLEAN, "boolean", 0) \
F(BYTE, "byte", 0) \
F(CHAR, "char", 0) \
F(CLASS, "class", 0) \
K(CONST, "const", 0) \
F(DOUBLE, "double", 0) \
F(ENUM, "enum", 0) \
F(EXPORT, "export", 0) \
F(EXTENDS, "extends", 0) \
F(FINAL, "final", 0) \
F(FLOAT, "float", 0) \
F(GOTO, "goto", 0) \
F(IMPLEMENTS, "implements", 0) \
F(IMPORT, "import", 0) \
F(INT, "int", 0) \
F(INTERFACE, "interface", 0) \
F(LONG, "long", 0) \
K(NATIVE, "native", 0) \
F(PACKAGE, "package", 0) \
F(PRIVATE, "private", 0) \
F(PROTECTED, "protected", 0) \
F(PUBLIC, "public", 0) \
F(SHORT, "short", 0) \
F(STATIC, "static", 0) \
F(SUPER, "super", 0) \
F(SYNCHRONIZED, "synchronized", 0) \
F(THROWS, "throws", 0) \
F(TRANSIENT, "transient", 0) \
F(VOLATILE, "volatile", 0) \
\
/* Literals (ECMA-262, section 7.8, page 16). */ \
K(NULL_LITERAL, "null", 0) \
K(TRUE_LITERAL, "true", 0) \
K(FALSE_LITERAL, "false", 0) \
T(NUMBER, NULL, 0) \
T(STRING, NULL, 0) \
\
/* Identifiers (not keywords or future reserved words). */ \
T(IDENTIFIER, NULL, 0) \
\
/* Illegal token - not able to scan. */ \
T(ILLEGAL, "ILLEGAL", 0) \
\
/* Scanner-internal use only. */ \
T(COMMENT, NULL, 0)
class Token {
public:
// All token values.
// 定义宏T
#define T(name, string, precedence) name,
enum Value {
/*
TOKEN_LIST宏展开后变成
T(EOS, "EOS", 0)
...
然后T宏展开后变成
EOS
...
F开头的会被忽略,因为F等于IGNORE_TOKEN
*/
TOKEN_LIST(T, T, IGNORE_TOKEN)
NUM_TOKENS
};
#undef T
#ifdef DEBUG
// Returns a string corresponding to the C++ token name
// (e.g. "LT" for the token LT).
static const char* Name(Value tok) {
ASSERT(0 <= tok && tok < NUM_TOKENS);
return name_[tok];
}
#endif
// 判断token字符语义的函数
// Predicates
static bool IsAssignmentOp(Value tok) {
return INIT_VAR <= tok && tok <= ASSIGN_MOD;
}
static bool IsBinaryOp(Value op) {
return COMMA <= op && op <= MOD;
}
static bool IsCompareOp(Value op) {
return EQ <= op && op <= IN;
}
static bool IsBitOp(Value op) {
return (BIT_OR <= op && op <= SHR) || op == BIT_NOT;
}
static bool IsUnaryOp(Value op) {
return (NOT <= op && op <= VOID) || op == ADD || op == SUB;
}
static bool IsCountOp(Value op) {
return op == INC || op == DEC;
}
// Returns a string corresponding to the JS token string
// (.e., "<" for the token LT) or NULL if the token doesn't
// have a (unique) string (e.g. an IDENTIFIER).
// 见token.cc关于string_的定义
static const char* String(Value tok) {
ASSERT(0 <= tok && tok < NUM_TOKENS);
return string_[tok];
}
// Returns the precedence > 0 for binary and compare
// operators; returns 0 otherwise.
static int Precedence(Value tok) {
ASSERT(0 <= tok && tok < NUM_TOKENS);
return precedence_[tok];
}
// Returns the keyword value if str is a keyword;
// returns IDENTIFIER otherwise. The class must
// have been initialized.
static Value Lookup(const char* str);
// Must be called once to initialize the class.
// Multiple calls are ignored.
static void Initialize();
private:
#ifdef DEBUG
static const char* name_[NUM_TOKENS];
#endif
static const char* string_[NUM_TOKENS];
static int8_t precedence_[NUM_TOKENS];
};
} } // namespace v8::internal
#endif // V8_TOKEN_H_
token.cc
#include "v8.h"
#include "token.h"
namespace v8 { namespace internal {
#ifdef DEBUG
#define T(name, string, precedence) #name,
const char* Token::name_[NUM_TOKENS] = {
TOKEN_LIST(T, T, IGNORE_TOKEN)
};
#undef T
#endif
#define T(name, string, precedence) string,
const char* Token::string_[NUM_TOKENS] = {
/*
TOKEN_LIST宏展开后变成
T(EOS, "EOS", 0)
...
然后T宏展开后变成
"EOS"
...
F开头的会被忽略,因为F等于IGNORE_TOKEN
*/
TOKEN_LIST(T, T, IGNORE_TOKEN)
};
#undef T
#define T(name, string, precedence) precedence,
int8_t Token::precedence_[NUM_TOKENS] = {
/*
TOKEN_LIST宏展开后变成
T(EOS, "EOS", 0)
...
然后T宏展开后变成
0
...
F开头的会被忽略,因为F等于IGNORE_TOKEN
*/
TOKEN_LIST(T, T, IGNORE_TOKEN)
};
#undef T
// A perfect (0 collision) hash table of keyword token values.
// larger N will reduce the number of collisions (power of 2 for fast %)
const unsigned int N = 128;
// make this small since we have <= 256 tokens
static uint8_t Hashtable[N];
static bool IsInitialized = false;
// 哈希算法
static unsigned int Hash(const char* s) {
// The following constants have been found using trial-and-error. If the
// keyword set changes, they may have to be recomputed (make them flags
// and play with the flag values). Increasing N is the simplest way to
// reduce the number of collisions.
// we must use at least 4 or more chars ('const' and 'continue' share
// 'con')
const unsigned int L = 5;
// smaller S tend to reduce the number of collisions
const unsigned int S = 4;
// make this a prime, or at least an odd number
const unsigned int M = 3;
unsigned int h = 0;
for (unsigned int i = 0; s[i] != '\0' && i < L; i++) {
h += (h << S) + s[i];
}
// unsigned int % by a power of 2 (otherwise this will not be a bit mask)
return h * M % N;
}
// 查找str字符串
Token::Value Token::Lookup(const char* str) {
ASSERT(IsInitialized);
// Hashtable[Hash(str)]得到str在string_中的索引
Value k = static_cast<Value>(Hashtable[Hash(str)]);
// 得到对应的字符串
const char* s = string_[k];
ASSERT(s != NULL || k == IDENTIFIER);
// 对比
if (s == NULL || strcmp(s, str) == 0) {
return k;
}
return IDENTIFIER;
}
#ifdef DEBUG
// We need this function because C++ doesn't allow the expression
// NULL == NULL, which is a result of macro expansion below. What
// the hell?
static bool IsNull(const char* s) {
return s == NULL;
}
#endif
// 建立哈希表,用于Lookup查找
void Token::Initialize() {
if (IsInitialized) return;
// A list of all keywords, terminated by ILLEGAL.
#define T(name, string, precedence) name,
static Value keyword[] = {
/*
TOKEN_LIST宏展开后变成
T(EOS, "EOS", 0)
...
然后T宏展开后变成
EOS
...
但是只有K开头,在宏展开后会保留,因为T和F都等于IGNORE_TOKEN
*/
TOKEN_LIST(IGNORE_TOKEN, T, IGNORE_TOKEN)
ILLEGAL
};
#undef T
// Assert that the keyword array contains the 25 keywords, 3 future
// reserved words (const, debugger, and native), and the 3 named literals
// defined by ECMA-262 standard.
ASSERT(ARRAY_SIZE(keyword) == 25 + 3 + 3 + 1); // +1 for ILLEGAL sentinel
// Initialize Hashtable.
ASSERT(NUM_TOKENS <= 256); // Hashtable contains uint8_t elements
// 初始化哈希表
for (unsigned int i = 0; i < N; i++) {
Hashtable[i] = IDENTIFIER;
}
// Insert all keywords into Hashtable.
int collisions = 0;
for (int i = 0; keyword[i] != ILLEGAL; i++) {
Value k = keyword[i];
/*
string_的内容和Value的枚举范围是一一对应的,keyword是他们的子集,
即string_里包括关键字和非关键字(T和K),keyword包括了关键字的在string_中的索引,
string_[k]是一个字符串
*/
unsigned int h = Hash(string_[k]);
//不等于IDENTIFIER说明已经赋过值,即冲突了
if (Hashtable[h] != IDENTIFIER) collisions++;
// 保存哈希值到关键字索引的映射
Hashtable[h] = k;
}
if (collisions > 0) {
PrintF("%d collisions in keyword hashtable\n", collisions);
FATAL("Fix keyword lookup!");
}
IsInitialized = true;
// Verify hash table.
#define T(name, string, precedence) \
ASSERT(IsNull(string) || Lookup(string) == IDENTIFIER);
#define K(name, string, precedence) \
ASSERT(Lookup(string) == name);
TOKEN_LIST(T, K, IGNORE_TOKEN)
#undef K
#undef T
}
} } // namespace v8::internal