Fri, 15 Oct 2010 15:17:17 +0100
Initial commit
0 | 1 | module("jslex", package.seeall); |
2 | ||
3 | function peek_char(stream) | |
4 | return stream.next_char; | |
5 | end | |
6 | ||
7 | function read_char(stream, param) | |
8 | local c = stream.next_char; | |
9 | stream.next_char = stream.file:read(1); | |
10 | if c == "\n" then stream.line = (stream.line or 1) + 1; end | |
11 | return c; | |
12 | end | |
13 | ||
14 | function read_until(stream, char) | |
15 | local r = {}; | |
16 | while stream.next_char and stream.next_char ~= char do | |
17 | r[#r+1] = read_char(stream); | |
18 | end | |
19 | return table.concat(r); | |
20 | end | |
21 | ||
22 | function read_until_match(stream, pattern) | |
23 | local r = {}; | |
24 | while stream.next_char and not stream.next_char:match(pattern) do | |
25 | r[#r+1] = read_char(stream); | |
26 | end | |
27 | return table.concat(r); | |
28 | end | |
29 | ||
30 | local keywords = { | |
31 | "abstract", "as", "break", "case", "catch", | |
32 | "class", "const", "continue", "debugger", | |
33 | "default", "delete", "do", "else", "enum", | |
34 | "export", "extends", "false", "final", | |
35 | "finally", "for", "function", "goto", "if", | |
36 | "implements", "import", "in", "instanceof", | |
37 | "interface", "is", "namespace", "native", | |
38 | "new", "null", "package", "private", | |
39 | "protected", "public", "return", "static", | |
40 | "super", "switch", "synchronized", "this", | |
41 | "throw", "throws", "transient", "true", | |
42 | "try", "typeof", "use", "var", "volatile", | |
43 | "while", "with" | |
44 | }; | |
45 | ||
46 | for _, k in ipairs(keywords) do keywords[k] = true; end | |
47 | local function is_keyword(name) | |
48 | return keywords[name]; | |
49 | end | |
50 | ||
51 | local operators = "+-/*(),={}.&|<>![]:?"; | |
52 | ||
53 | local function push_token(stream, token_type, token_value) | |
54 | stream.last_token_type = token_type; | |
55 | coroutine.yield(token_type, token_value); | |
56 | end | |
57 | ||
58 | local handlers = { | |
59 | -- Whitespace | |
60 | function (stream) | |
61 | local c = peek_char(stream); | |
62 | if c:match("%s") then | |
63 | read_until_match(stream, "%S"); | |
64 | return true; | |
65 | end | |
66 | end; | |
67 | -- Strings | |
68 | function (stream) | |
69 | local c = peek_char(stream); | |
70 | if c == [["]] or c == [[']] then | |
71 | read_char(stream); -- Use up the string marker | |
72 | push_token(stream, "string", read_until(stream, c)); | |
73 | read_char(stream); -- Use up the string terminator | |
74 | return true; | |
75 | end | |
76 | end; | |
77 | -- Identifiers | |
78 | function (stream) | |
79 | local c = peek_char(stream); | |
80 | if c:match("[_a-zA-Z$]") then | |
81 | local name = read_until_match(stream, "[^a-zA-Z0-9_$]"); | |
82 | if is_keyword(name) then | |
83 | push_token(stream, "keyword", name); | |
84 | else | |
85 | push_token(stream, "name", name); | |
86 | end | |
87 | return true; | |
88 | end | |
89 | end; | |
90 | --Numbers | |
91 | function (stream) | |
92 | local c = peek_char(stream); | |
93 | if c:match("%d") then | |
94 | push_token(stream, "number", read_until_match(stream, "[^%d%.]")); | |
95 | return true; | |
96 | end | |
97 | end; | |
98 | -- Operators (and comments!) | |
99 | function (stream) | |
100 | local c = peek_char(stream); | |
101 | if operators:match("%"..c) then | |
102 | local op, c = read_char(stream), peek_char(stream); | |
103 | if op == "/" and (c == "/" or c == "*") then -- A comment | |
104 | if c == "/" then -- Comment until end of line | |
105 | read_until(stream, "\n"); | |
106 | return true; | |
107 | else | |
108 | while true do | |
109 | read_until(stream, "*"); | |
110 | read_char(stream); -- Read "*" | |
111 | if peek_char(stream) == "/" then | |
112 | read_char(stream); -- Read / | |
113 | return true; | |
114 | elseif peek_char(stream) == nil then | |
115 | return true; | |
116 | end | |
117 | end | |
118 | end | |
119 | elseif op == "/" and stream.last_token_type == "op" then -- Regex | |
120 | local regex = read_until(stream, "/"); | |
121 | read_char(stream); | |
122 | local flags = read_until_match(stream, "%A"); | |
123 | push_token(stream, "regex", op..regex.."/"..flags); | |
124 | read_char(stream); | |
125 | elseif op == "=" and (c == "=") then -- Equality | |
126 | op = "=="; | |
127 | read_char(stream); | |
128 | if peek_char(stream) == "=" then | |
129 | op = "==="; | |
130 | read_char(stream); | |
131 | end | |
132 | elseif op:match("[&|]") and c == op then | |
133 | op = op:rep(2); | |
134 | read_char(stream); | |
135 | elseif op == "!" and (c == "=") then | |
136 | op = "!="; | |
137 | read_char(stream); | |
138 | end | |
139 | push_token(stream, "op", op); | |
140 | return true; | |
141 | end | |
142 | end; | |
143 | -- Semi-colons | |
144 | function (stream) | |
145 | local c = peek_char(stream); | |
146 | if c == ";" then | |
147 | read_char(stream); | |
148 | push_token(stream, "eos", c); | |
149 | return true; | |
150 | end | |
151 | end; | |
152 | }; | |
153 | ||
154 | ||
155 | function new_stream(file) | |
156 | local stream = { file = file; }; | |
157 | stream.next_char = stream.file:read(1); | |
158 | ||
159 | stream.get_token = coroutine.wrap( | |
160 | function () | |
161 | while stream.next_char do | |
162 | local handled; | |
163 | for _, handler in ipairs(handlers) do | |
164 | handled = handler(stream) or handled; | |
165 | if not stream.next_char then break; end | |
166 | end | |
167 | if not handled then error("Unexpected character on line "..(stream.line or 1)..": "..stream.next_char); end | |
168 | end | |
169 | end); | |
170 | ||
171 | ||
172 | return stream; | |
173 | end | |
174 | ||
175 | return _M; |