Fri, 15 Oct 2010 15:17:17 +0100
Initial commit
module("jslex", package.seeall); function peek_char(stream) return stream.next_char; end function read_char(stream, param) local c = stream.next_char; stream.next_char = stream.file:read(1); if c == "\n" then stream.line = (stream.line or 1) + 1; end return c; end function read_until(stream, char) local r = {}; while stream.next_char and stream.next_char ~= char do r[#r+1] = read_char(stream); end return table.concat(r); end function read_until_match(stream, pattern) local r = {}; while stream.next_char and not stream.next_char:match(pattern) do r[#r+1] = read_char(stream); end return table.concat(r); end local keywords = { "abstract", "as", "break", "case", "catch", "class", "const", "continue", "debugger", "default", "delete", "do", "else", "enum", "export", "extends", "false", "final", "finally", "for", "function", "goto", "if", "implements", "import", "in", "instanceof", "interface", "is", "namespace", "native", "new", "null", "package", "private", "protected", "public", "return", "static", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "true", "try", "typeof", "use", "var", "volatile", "while", "with" }; for _, k in ipairs(keywords) do keywords[k] = true; end local function is_keyword(name) return keywords[name]; end local operators = "+-/*(),={}.&|<>![]:?"; local function push_token(stream, token_type, token_value) stream.last_token_type = token_type; coroutine.yield(token_type, token_value); end local handlers = { -- Whitespace function (stream) local c = peek_char(stream); if c:match("%s") then read_until_match(stream, "%S"); return true; end end; -- Strings function (stream) local c = peek_char(stream); if c == [["]] or c == [[']] then read_char(stream); -- Use up the string marker push_token(stream, "string", read_until(stream, c)); read_char(stream); -- Use up the string terminator return true; end end; -- Identifiers function (stream) local c = peek_char(stream); if c:match("[_a-zA-Z$]") then local name = read_until_match(stream, "[^a-zA-Z0-9_$]"); if is_keyword(name) then push_token(stream, "keyword", name); else push_token(stream, "name", name); end return true; end end; --Numbers function (stream) local c = peek_char(stream); if c:match("%d") then push_token(stream, "number", read_until_match(stream, "[^%d%.]")); return true; end end; -- Operators (and comments!) function (stream) local c = peek_char(stream); if operators:match("%"..c) then local op, c = read_char(stream), peek_char(stream); if op == "/" and (c == "/" or c == "*") then -- A comment if c == "/" then -- Comment until end of line read_until(stream, "\n"); return true; else while true do read_until(stream, "*"); read_char(stream); -- Read "*" if peek_char(stream) == "/" then read_char(stream); -- Read / return true; elseif peek_char(stream) == nil then return true; end end end elseif op == "/" and stream.last_token_type == "op" then -- Regex local regex = read_until(stream, "/"); read_char(stream); local flags = read_until_match(stream, "%A"); push_token(stream, "regex", op..regex.."/"..flags); read_char(stream); elseif op == "=" and (c == "=") then -- Equality op = "=="; read_char(stream); if peek_char(stream) == "=" then op = "==="; read_char(stream); end elseif op:match("[&|]") and c == op then op = op:rep(2); read_char(stream); elseif op == "!" and (c == "=") then op = "!="; read_char(stream); end push_token(stream, "op", op); return true; end end; -- Semi-colons function (stream) local c = peek_char(stream); if c == ";" then read_char(stream); push_token(stream, "eos", c); return true; end end; }; function new_stream(file) local stream = { file = file; }; stream.next_char = stream.file:read(1); stream.get_token = coroutine.wrap( function () while stream.next_char do local handled; for _, handler in ipairs(handlers) do handled = handler(stream) or handled; if not stream.next_char then break; end end if not handled then error("Unexpected character on line "..(stream.line or 1)..": "..stream.next_char); end end end); return stream; end return _M;