|
1 module("jslex", package.seeall); |
|
2 |
|
3 function peek_char(stream) |
|
4 return stream.next_char; |
|
5 end |
|
6 |
|
7 function read_char(stream, param) |
|
8 local c = stream.next_char; |
|
9 stream.next_char = stream.file:read(1); |
|
10 if c == "\n" then stream.line = (stream.line or 1) + 1; end |
|
11 return c; |
|
12 end |
|
13 |
|
14 function read_until(stream, char) |
|
15 local r = {}; |
|
16 while stream.next_char and stream.next_char ~= char do |
|
17 r[#r+1] = read_char(stream); |
|
18 end |
|
19 return table.concat(r); |
|
20 end |
|
21 |
|
22 function read_until_match(stream, pattern) |
|
23 local r = {}; |
|
24 while stream.next_char and not stream.next_char:match(pattern) do |
|
25 r[#r+1] = read_char(stream); |
|
26 end |
|
27 return table.concat(r); |
|
28 end |
|
29 |
|
30 local keywords = { |
|
31 "abstract", "as", "break", "case", "catch", |
|
32 "class", "const", "continue", "debugger", |
|
33 "default", "delete", "do", "else", "enum", |
|
34 "export", "extends", "false", "final", |
|
35 "finally", "for", "function", "goto", "if", |
|
36 "implements", "import", "in", "instanceof", |
|
37 "interface", "is", "namespace", "native", |
|
38 "new", "null", "package", "private", |
|
39 "protected", "public", "return", "static", |
|
40 "super", "switch", "synchronized", "this", |
|
41 "throw", "throws", "transient", "true", |
|
42 "try", "typeof", "use", "var", "volatile", |
|
43 "while", "with" |
|
44 }; |
|
45 |
|
46 for _, k in ipairs(keywords) do keywords[k] = true; end |
|
47 local function is_keyword(name) |
|
48 return keywords[name]; |
|
49 end |
|
50 |
|
51 local operators = "+-/*(),={}.&|<>![]:?"; |
|
52 |
|
53 local function push_token(stream, token_type, token_value) |
|
54 stream.last_token_type = token_type; |
|
55 coroutine.yield(token_type, token_value); |
|
56 end |
|
57 |
|
58 local handlers = { |
|
59 -- Whitespace |
|
60 function (stream) |
|
61 local c = peek_char(stream); |
|
62 if c:match("%s") then |
|
63 read_until_match(stream, "%S"); |
|
64 return true; |
|
65 end |
|
66 end; |
|
67 -- Strings |
|
68 function (stream) |
|
69 local c = peek_char(stream); |
|
70 if c == [["]] or c == [[']] then |
|
71 read_char(stream); -- Use up the string marker |
|
72 push_token(stream, "string", read_until(stream, c)); |
|
73 read_char(stream); -- Use up the string terminator |
|
74 return true; |
|
75 end |
|
76 end; |
|
77 -- Identifiers |
|
78 function (stream) |
|
79 local c = peek_char(stream); |
|
80 if c:match("[_a-zA-Z$]") then |
|
81 local name = read_until_match(stream, "[^a-zA-Z0-9_$]"); |
|
82 if is_keyword(name) then |
|
83 push_token(stream, "keyword", name); |
|
84 else |
|
85 push_token(stream, "name", name); |
|
86 end |
|
87 return true; |
|
88 end |
|
89 end; |
|
90 --Numbers |
|
91 function (stream) |
|
92 local c = peek_char(stream); |
|
93 if c:match("%d") then |
|
94 push_token(stream, "number", read_until_match(stream, "[^%d%.]")); |
|
95 return true; |
|
96 end |
|
97 end; |
|
98 -- Operators (and comments!) |
|
99 function (stream) |
|
100 local c = peek_char(stream); |
|
101 if operators:match("%"..c) then |
|
102 local op, c = read_char(stream), peek_char(stream); |
|
103 if op == "/" and (c == "/" or c == "*") then -- A comment |
|
104 if c == "/" then -- Comment until end of line |
|
105 read_until(stream, "\n"); |
|
106 return true; |
|
107 else |
|
108 while true do |
|
109 read_until(stream, "*"); |
|
110 read_char(stream); -- Read "*" |
|
111 if peek_char(stream) == "/" then |
|
112 read_char(stream); -- Read / |
|
113 return true; |
|
114 elseif peek_char(stream) == nil then |
|
115 return true; |
|
116 end |
|
117 end |
|
118 end |
|
119 elseif op == "/" and stream.last_token_type == "op" then -- Regex |
|
120 local regex = read_until(stream, "/"); |
|
121 read_char(stream); |
|
122 local flags = read_until_match(stream, "%A"); |
|
123 push_token(stream, "regex", op..regex.."/"..flags); |
|
124 read_char(stream); |
|
125 elseif op == "=" and (c == "=") then -- Equality |
|
126 op = "=="; |
|
127 read_char(stream); |
|
128 if peek_char(stream) == "=" then |
|
129 op = "==="; |
|
130 read_char(stream); |
|
131 end |
|
132 elseif op:match("[&|]") and c == op then |
|
133 op = op:rep(2); |
|
134 read_char(stream); |
|
135 elseif op == "!" and (c == "=") then |
|
136 op = "!="; |
|
137 read_char(stream); |
|
138 end |
|
139 push_token(stream, "op", op); |
|
140 return true; |
|
141 end |
|
142 end; |
|
143 -- Semi-colons |
|
144 function (stream) |
|
145 local c = peek_char(stream); |
|
146 if c == ";" then |
|
147 read_char(stream); |
|
148 push_token(stream, "eos", c); |
|
149 return true; |
|
150 end |
|
151 end; |
|
152 }; |
|
153 |
|
154 |
|
155 function new_stream(file) |
|
156 local stream = { file = file; }; |
|
157 stream.next_char = stream.file:read(1); |
|
158 |
|
159 stream.get_token = coroutine.wrap( |
|
160 function () |
|
161 while stream.next_char do |
|
162 local handled; |
|
163 for _, handler in ipairs(handlers) do |
|
164 handled = handler(stream) or handled; |
|
165 if not stream.next_char then break; end |
|
166 end |
|
167 if not handled then error("Unexpected character on line "..(stream.line or 1)..": "..stream.next_char); end |
|
168 end |
|
169 end); |
|
170 |
|
171 |
|
172 return stream; |
|
173 end |
|
174 |
|
175 return _M; |