lib/jslex.lua

Fri, 15 Oct 2010 15:17:17 +0100

author
Matthew Wild <mwild1@gmail.com>
date
Fri, 15 Oct 2010 15:17:17 +0100
changeset 0
b2e55f320d48
permissions
-rw-r--r--

Initial commit

module("jslex", package.seeall);

function peek_char(stream)
	return stream.next_char;
end

function read_char(stream, param)
	local c = stream.next_char;
	stream.next_char = stream.file:read(1);
	if c == "\n" then stream.line = (stream.line or 1) + 1; end
	return c;
end

function read_until(stream, char)
	local r = {};
	while stream.next_char and stream.next_char ~= char do
		r[#r+1] = read_char(stream);
	end
	return table.concat(r);
end

function read_until_match(stream, pattern)
	local r = {};
	while stream.next_char and not stream.next_char:match(pattern) do
		r[#r+1] = read_char(stream);
	end
	return table.concat(r);
end

local keywords = {
	"abstract", "as", "break", "case", "catch",
	"class", "const", "continue", "debugger",
	"default", "delete", "do", "else", "enum",
	"export", "extends", "false", "final",
	"finally", "for", "function", "goto", "if",
	"implements", "import", "in", "instanceof",
	"interface", "is", "namespace", "native",
	"new", "null", "package", "private",
	"protected", "public", "return", "static",
	"super", "switch", "synchronized", "this",
	"throw", "throws", "transient", "true",
	"try", "typeof", "use", "var", "volatile",
	"while", "with"
};

for _, k in ipairs(keywords) do keywords[k] = true; end
local function is_keyword(name)
	return keywords[name];
end

local operators = "+-/*(),={}.&|<>![]:?";

local function push_token(stream, token_type, token_value)
	stream.last_token_type = token_type;
	coroutine.yield(token_type, token_value);
end

local handlers = {
	-- Whitespace
	function (stream)
		local c = peek_char(stream);
		if c:match("%s") then
			read_until_match(stream, "%S");
			return true;
		end
	end;
	-- Strings
	function (stream)
		local c = peek_char(stream);
		if c == [["]] or c == [[']] then
			read_char(stream); -- Use up the string marker
			push_token(stream, "string", read_until(stream, c));
			read_char(stream); -- Use up the string terminator
			return true;
		end
	end;
	-- Identifiers
	function (stream)
		local c = peek_char(stream);
		if c:match("[_a-zA-Z$]") then
			local name = read_until_match(stream, "[^a-zA-Z0-9_$]");
			if is_keyword(name) then
				push_token(stream, "keyword", name);
			else
				push_token(stream, "name", name);
			end
			return true;
		end
	end;
	--Numbers
	function (stream)
		local c = peek_char(stream);
		if c:match("%d") then
			push_token(stream, "number", read_until_match(stream, "[^%d%.]"));
			return true;
		end
	end;
	-- Operators (and comments!)
	function (stream)
		local c = peek_char(stream);
		if operators:match("%"..c) then
			local op, c = read_char(stream), peek_char(stream);
			if op == "/" and (c == "/" or c == "*") then -- A comment
				if c == "/" then -- Comment until end of line
					read_until(stream, "\n");
					return true;
				else
					while true do
						read_until(stream, "*");
						read_char(stream); -- Read "*"
						if peek_char(stream) == "/" then
							read_char(stream); -- Read /
							return true;
						elseif peek_char(stream) == nil then
							return true;
						end
					end
				end
			elseif op == "/" and stream.last_token_type == "op" then -- Regex
				local regex = read_until(stream, "/");
				read_char(stream);
				local flags = read_until_match(stream, "%A");
				push_token(stream, "regex", op..regex.."/"..flags);
				read_char(stream);
			elseif op == "=" and (c == "=") then -- Equality
				op = "==";
				read_char(stream);
				if peek_char(stream) == "=" then
					op = "===";
					read_char(stream);
				end
			elseif op:match("[&|]") and c == op then
				op = op:rep(2);
				read_char(stream);
			elseif op == "!" and (c == "=") then
				op = "!=";
				read_char(stream);
			end
			push_token(stream, "op", op);
			return true;
		end
	end;
	-- Semi-colons
	function (stream)
		local c = peek_char(stream);
		if c == ";" then
			read_char(stream);
			push_token(stream, "eos", c);
			return true;
		end
	end;
};


function new_stream(file)
	local stream = { file = file; };
	stream.next_char = stream.file:read(1);

	stream.get_token = coroutine.wrap(
	function ()
		while stream.next_char do
			local handled;
			for _, handler in ipairs(handlers) do
				handled = handler(stream) or handled;
				if not stream.next_char then break; end
			end
			if not handled then error("Unexpected character on line "..(stream.line or 1)..": "..stream.next_char); end
		end
	end);


	return stream;
end

return _M;

mercurial