lib/jslex.lua

Fri, 15 Oct 2010 15:17:17 +0100

author
Matthew Wild <mwild1@gmail.com>
date
Fri, 15 Oct 2010 15:17:17 +0100
changeset 0
b2e55f320d48
permissions
-rw-r--r--

Initial commit

0
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
1 module("jslex", package.seeall);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
2
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
3 function peek_char(stream)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
4 return stream.next_char;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
5 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
6
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
7 function read_char(stream, param)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
8 local c = stream.next_char;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
9 stream.next_char = stream.file:read(1);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
10 if c == "\n" then stream.line = (stream.line or 1) + 1; end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
11 return c;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
12 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
13
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
14 function read_until(stream, char)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
15 local r = {};
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
16 while stream.next_char and stream.next_char ~= char do
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
17 r[#r+1] = read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
18 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
19 return table.concat(r);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
20 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
21
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
22 function read_until_match(stream, pattern)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
23 local r = {};
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
24 while stream.next_char and not stream.next_char:match(pattern) do
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
25 r[#r+1] = read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
26 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
27 return table.concat(r);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
28 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
29
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
30 local keywords = {
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
31 "abstract", "as", "break", "case", "catch",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
32 "class", "const", "continue", "debugger",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
33 "default", "delete", "do", "else", "enum",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
34 "export", "extends", "false", "final",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
35 "finally", "for", "function", "goto", "if",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
36 "implements", "import", "in", "instanceof",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
37 "interface", "is", "namespace", "native",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
38 "new", "null", "package", "private",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
39 "protected", "public", "return", "static",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
40 "super", "switch", "synchronized", "this",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
41 "throw", "throws", "transient", "true",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
42 "try", "typeof", "use", "var", "volatile",
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
43 "while", "with"
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
44 };
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
45
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
46 for _, k in ipairs(keywords) do keywords[k] = true; end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
47 local function is_keyword(name)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
48 return keywords[name];
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
49 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
50
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
51 local operators = "+-/*(),={}.&|<>![]:?";
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
52
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
53 local function push_token(stream, token_type, token_value)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
54 stream.last_token_type = token_type;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
55 coroutine.yield(token_type, token_value);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
56 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
57
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
58 local handlers = {
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
59 -- Whitespace
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
60 function (stream)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
61 local c = peek_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
62 if c:match("%s") then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
63 read_until_match(stream, "%S");
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
64 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
65 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
66 end;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
67 -- Strings
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
68 function (stream)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
69 local c = peek_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
70 if c == [["]] or c == [[']] then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
71 read_char(stream); -- Use up the string marker
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
72 push_token(stream, "string", read_until(stream, c));
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
73 read_char(stream); -- Use up the string terminator
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
74 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
75 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
76 end;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
77 -- Identifiers
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
78 function (stream)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
79 local c = peek_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
80 if c:match("[_a-zA-Z$]") then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
81 local name = read_until_match(stream, "[^a-zA-Z0-9_$]");
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
82 if is_keyword(name) then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
83 push_token(stream, "keyword", name);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
84 else
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
85 push_token(stream, "name", name);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
86 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
87 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
88 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
89 end;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
90 --Numbers
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
91 function (stream)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
92 local c = peek_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
93 if c:match("%d") then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
94 push_token(stream, "number", read_until_match(stream, "[^%d%.]"));
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
95 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
96 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
97 end;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
98 -- Operators (and comments!)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
99 function (stream)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
100 local c = peek_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
101 if operators:match("%"..c) then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
102 local op, c = read_char(stream), peek_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
103 if op == "/" and (c == "/" or c == "*") then -- A comment
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
104 if c == "/" then -- Comment until end of line
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
105 read_until(stream, "\n");
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
106 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
107 else
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
108 while true do
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
109 read_until(stream, "*");
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
110 read_char(stream); -- Read "*"
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
111 if peek_char(stream) == "/" then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
112 read_char(stream); -- Read /
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
113 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
114 elseif peek_char(stream) == nil then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
115 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
116 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
117 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
118 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
119 elseif op == "/" and stream.last_token_type == "op" then -- Regex
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
120 local regex = read_until(stream, "/");
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
121 read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
122 local flags = read_until_match(stream, "%A");
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
123 push_token(stream, "regex", op..regex.."/"..flags);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
124 read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
125 elseif op == "=" and (c == "=") then -- Equality
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
126 op = "==";
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
127 read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
128 if peek_char(stream) == "=" then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
129 op = "===";
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
130 read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
131 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
132 elseif op:match("[&|]") and c == op then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
133 op = op:rep(2);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
134 read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
135 elseif op == "!" and (c == "=") then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
136 op = "!=";
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
137 read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
138 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
139 push_token(stream, "op", op);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
140 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
141 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
142 end;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
143 -- Semi-colons
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
144 function (stream)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
145 local c = peek_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
146 if c == ";" then
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
147 read_char(stream);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
148 push_token(stream, "eos", c);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
149 return true;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
150 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
151 end;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
152 };
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
153
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
154
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
155 function new_stream(file)
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
156 local stream = { file = file; };
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
157 stream.next_char = stream.file:read(1);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
158
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
159 stream.get_token = coroutine.wrap(
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
160 function ()
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
161 while stream.next_char do
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
162 local handled;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
163 for _, handler in ipairs(handlers) do
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
164 handled = handler(stream) or handled;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
165 if not stream.next_char then break; end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
166 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
167 if not handled then error("Unexpected character on line "..(stream.line or 1)..": "..stream.next_char); end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
168 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
169 end);
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
170
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
171
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
172 return stream;
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
173 end
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
174
b2e55f320d48 Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
175 return _M;

mercurial