lib/jslex.lua

changeset 0
b2e55f320d48
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/jslex.lua	Fri Oct 15 15:17:17 2010 +0100
@@ -0,0 +1,175 @@
+module("jslex", package.seeall);
+
+function peek_char(stream)
+	return stream.next_char;
+end
+
+function read_char(stream, param)
+	local c = stream.next_char;
+	stream.next_char = stream.file:read(1);
+	if c == "\n" then stream.line = (stream.line or 1) + 1; end
+	return c;
+end
+
+function read_until(stream, char)
+	local r = {};
+	while stream.next_char and stream.next_char ~= char do
+		r[#r+1] = read_char(stream);
+	end
+	return table.concat(r);
+end
+
+function read_until_match(stream, pattern)
+	local r = {};
+	while stream.next_char and not stream.next_char:match(pattern) do
+		r[#r+1] = read_char(stream);
+	end
+	return table.concat(r);
+end
+
+local keywords = {
+	"abstract", "as", "break", "case", "catch",
+	"class", "const", "continue", "debugger",
+	"default", "delete", "do", "else", "enum",
+	"export", "extends", "false", "final",
+	"finally", "for", "function", "goto", "if",
+	"implements", "import", "in", "instanceof",
+	"interface", "is", "namespace", "native",
+	"new", "null", "package", "private",
+	"protected", "public", "return", "static",
+	"super", "switch", "synchronized", "this",
+	"throw", "throws", "transient", "true",
+	"try", "typeof", "use", "var", "volatile",
+	"while", "with"
+};
+
+for _, k in ipairs(keywords) do keywords[k] = true; end
+local function is_keyword(name)
+	return keywords[name];
+end
+
+local operators = "+-/*(),={}.&|<>![]:?";
+
+local function push_token(stream, token_type, token_value)
+	stream.last_token_type = token_type;
+	coroutine.yield(token_type, token_value);
+end
+
+local handlers = {
+	-- Whitespace
+	function (stream)
+		local c = peek_char(stream);
+		if c:match("%s") then
+			read_until_match(stream, "%S");
+			return true;
+		end
+	end;
+	-- Strings
+	function (stream)
+		local c = peek_char(stream);
+		if c == [["]] or c == [[']] then
+			read_char(stream); -- Use up the string marker
+			push_token(stream, "string", read_until(stream, c));
+			read_char(stream); -- Use up the string terminator
+			return true;
+		end
+	end;
+	-- Identifiers
+	function (stream)
+		local c = peek_char(stream);
+		if c:match("[_a-zA-Z$]") then
+			local name = read_until_match(stream, "[^a-zA-Z0-9_$]");
+			if is_keyword(name) then
+				push_token(stream, "keyword", name);
+			else
+				push_token(stream, "name", name);
+			end
+			return true;
+		end
+	end;
+	--Numbers
+	function (stream)
+		local c = peek_char(stream);
+		if c:match("%d") then
+			push_token(stream, "number", read_until_match(stream, "[^%d%.]"));
+			return true;
+		end
+	end;
+	-- Operators (and comments!)
+	function (stream)
+		local c = peek_char(stream);
+		if operators:match("%"..c) then
+			local op, c = read_char(stream), peek_char(stream);
+			if op == "/" and (c == "/" or c == "*") then -- A comment
+				if c == "/" then -- Comment until end of line
+					read_until(stream, "\n");
+					return true;
+				else
+					while true do
+						read_until(stream, "*");
+						read_char(stream); -- Read "*"
+						if peek_char(stream) == "/" then
+							read_char(stream); -- Read /
+							return true;
+						elseif peek_char(stream) == nil then
+							return true;
+						end
+					end
+				end
+			elseif op == "/" and stream.last_token_type == "op" then -- Regex
+				local regex = read_until(stream, "/");
+				read_char(stream);
+				local flags = read_until_match(stream, "%A");
+				push_token(stream, "regex", op..regex.."/"..flags);
+				read_char(stream);
+			elseif op == "=" and (c == "=") then -- Equality
+				op = "==";
+				read_char(stream);
+				if peek_char(stream) == "=" then
+					op = "===";
+					read_char(stream);
+				end
+			elseif op:match("[&|]") and c == op then
+				op = op:rep(2);
+				read_char(stream);
+			elseif op == "!" and (c == "=") then
+				op = "!=";
+				read_char(stream);
+			end
+			push_token(stream, "op", op);
+			return true;
+		end
+	end;
+	-- Semi-colons
+	function (stream)
+		local c = peek_char(stream);
+		if c == ";" then
+			read_char(stream);
+			push_token(stream, "eos", c);
+			return true;
+		end
+	end;
+};
+
+
+function new_stream(file)
+	local stream = { file = file; };
+	stream.next_char = stream.file:read(1);
+
+	stream.get_token = coroutine.wrap(
+	function ()
+		while stream.next_char do
+			local handled;
+			for _, handler in ipairs(handlers) do
+				handled = handler(stream) or handled;
+				if not stream.next_char then break; end
+			end
+			if not handled then error("Unexpected character on line "..(stream.line or 1)..": "..stream.next_char); end
+		end
+	end);
+
+
+	return stream;
+end
+
+return _M;

mercurial