diff -r 000000000000 -r b2e55f320d48 lib/jslex.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/jslex.lua Fri Oct 15 15:17:17 2010 +0100 @@ -0,0 +1,175 @@ +module("jslex", package.seeall); + +function peek_char(stream) + return stream.next_char; +end + +function read_char(stream, param) + local c = stream.next_char; + stream.next_char = stream.file:read(1); + if c == "\n" then stream.line = (stream.line or 1) + 1; end + return c; +end + +function read_until(stream, char) + local r = {}; + while stream.next_char and stream.next_char ~= char do + r[#r+1] = read_char(stream); + end + return table.concat(r); +end + +function read_until_match(stream, pattern) + local r = {}; + while stream.next_char and not stream.next_char:match(pattern) do + r[#r+1] = read_char(stream); + end + return table.concat(r); +end + +local keywords = { + "abstract", "as", "break", "case", "catch", + "class", "const", "continue", "debugger", + "default", "delete", "do", "else", "enum", + "export", "extends", "false", "final", + "finally", "for", "function", "goto", "if", + "implements", "import", "in", "instanceof", + "interface", "is", "namespace", "native", + "new", "null", "package", "private", + "protected", "public", "return", "static", + "super", "switch", "synchronized", "this", + "throw", "throws", "transient", "true", + "try", "typeof", "use", "var", "volatile", + "while", "with" +}; + +for _, k in ipairs(keywords) do keywords[k] = true; end +local function is_keyword(name) + return keywords[name]; +end + +local operators = "+-/*(),={}.&|<>![]:?"; + +local function push_token(stream, token_type, token_value) + stream.last_token_type = token_type; + coroutine.yield(token_type, token_value); +end + +local handlers = { + -- Whitespace + function (stream) + local c = peek_char(stream); + if c:match("%s") then + read_until_match(stream, "%S"); + return true; + end + end; + -- Strings + function (stream) + local c = peek_char(stream); + if c == [["]] or c == [[']] then + read_char(stream); -- Use up the string marker + push_token(stream, "string", read_until(stream, c)); + read_char(stream); -- Use up the string terminator + return true; + end + end; + -- Identifiers + function (stream) + local c = peek_char(stream); + if c:match("[_a-zA-Z$]") then + local name = read_until_match(stream, "[^a-zA-Z0-9_$]"); + if is_keyword(name) then + push_token(stream, "keyword", name); + else + push_token(stream, "name", name); + end + return true; + end + end; + --Numbers + function (stream) + local c = peek_char(stream); + if c:match("%d") then + push_token(stream, "number", read_until_match(stream, "[^%d%.]")); + return true; + end + end; + -- Operators (and comments!) + function (stream) + local c = peek_char(stream); + if operators:match("%"..c) then + local op, c = read_char(stream), peek_char(stream); + if op == "/" and (c == "/" or c == "*") then -- A comment + if c == "/" then -- Comment until end of line + read_until(stream, "\n"); + return true; + else + while true do + read_until(stream, "*"); + read_char(stream); -- Read "*" + if peek_char(stream) == "/" then + read_char(stream); -- Read / + return true; + elseif peek_char(stream) == nil then + return true; + end + end + end + elseif op == "/" and stream.last_token_type == "op" then -- Regex + local regex = read_until(stream, "/"); + read_char(stream); + local flags = read_until_match(stream, "%A"); + push_token(stream, "regex", op..regex.."/"..flags); + read_char(stream); + elseif op == "=" and (c == "=") then -- Equality + op = "=="; + read_char(stream); + if peek_char(stream) == "=" then + op = "==="; + read_char(stream); + end + elseif op:match("[&|]") and c == op then + op = op:rep(2); + read_char(stream); + elseif op == "!" and (c == "=") then + op = "!="; + read_char(stream); + end + push_token(stream, "op", op); + return true; + end + end; + -- Semi-colons + function (stream) + local c = peek_char(stream); + if c == ";" then + read_char(stream); + push_token(stream, "eos", c); + return true; + end + end; +}; + + +function new_stream(file) + local stream = { file = file; }; + stream.next_char = stream.file:read(1); + + stream.get_token = coroutine.wrap( + function () + while stream.next_char do + local handled; + for _, handler in ipairs(handlers) do + handled = handler(stream) or handled; + if not stream.next_char then break; end + end + if not handled then error("Unexpected character on line "..(stream.line or 1)..": "..stream.next_char); end + end + end); + + + return stream; +end + +return _M;