fallbacks/lxp.lua: Pure Lua pseudo-XML parser. Implements the same API as LuaExpat.

Thu, 02 Dec 2010 17:11:51 +0500

author
Waqas Hussain <waqas20@gmail.com>
date
Thu, 02 Dec 2010 17:11:51 +0500
changeset 3680
408a19977125
parent 3679
afdce92d07be
child 3681
3dbdcc79bd66

fallbacks/lxp.lua: Pure Lua pseudo-XML parser. Implements the same API as LuaExpat.

fallbacks/lxp.lua file | annotate | diff | comparison | revisions
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fallbacks/lxp.lua	Thu Dec 02 17:11:51 2010 +0500
@@ -0,0 +1,149 @@
+
+local coroutine = coroutine;
+local tonumber = tonumber;
+local string = string;
+local setmetatable, getmetatable = setmetatable, getmetatable;
+local pairs = pairs;
+
+local deadroutine = coroutine.create(function() end);
+coroutine.resume(deadroutine);
+
+module("lxp")
+
+local entity_map = setmetatable({
+	["amp"] = "&";
+	["gt"] = ">";
+	["lt"] = "<";
+	["apos"] = "'";
+	["quot"] = "\"";
+}, {__index = function(_, s)
+		if s:sub(1,1) == "#" then
+			if s:sub(2,2) == "x" then
+				return string.char(tonumber(s:sub(3), 16));
+			else
+				return string.char(tonumber(s:sub(2)));
+			end
+		end
+	end
+});
+local function xml_unescape(str)
+	return (str:gsub("&(.-);", entity_map));
+end
+local function parse_tag(s)
+	local name,sattr=(s):gmatch("([^%s]+)(.*)")();
+	local attr = {};
+	for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
+	return name, attr;
+end
+
+local function parser(data, handlers, ns_separator)
+	local function read_until(str)
+		local pos = data:find(str, nil, true);
+		while not pos do
+			data = data..coroutine.yield();
+			pos = data:find(str, nil, true);
+		end
+		local r = data:sub(1, pos);
+		data = data:sub(pos+1);
+		return r;
+	end
+	local function read_before(str)
+		local pos = data:find(str, nil, true);
+		while not pos do
+			data = data..coroutine.yield();
+			pos = data:find(str, nil, true);
+		end
+		local r = data:sub(1, pos-1);
+		data = data:sub(pos);
+		return r;
+	end
+	local function peek()
+		while #data == 0 do data = coroutine.yield(); end
+		return data:sub(1,1);
+	end
+	
+	local ns = { xml = "http://www.w3.org/XML/1998/namespace" };
+	ns.__index = ns;
+	local function apply_ns(name, dodefault)
+		local prefix,n = name:match("^([^:]*):(.*)$");
+		if prefix and ns[prefix] then
+			return ns[prefix]..ns_separator..n;
+		end
+		if dodefault and ns[""] then
+			return ns[""]..ns_separator..name;
+		end
+		return name;
+	end
+	local function push(tag, attr)
+		ns = setmetatable({}, ns);
+		for k,v in pairs(attr) do
+			local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");
+			if xmlns then
+				ns[xmlns] = v;
+				attr[k] = nil;
+			end
+		end
+		local newattr, n = {}, 0;
+		for k,v in pairs(attr) do
+			n = n+1;
+			k = apply_ns(k);
+			newattr[n] = k;
+			newattr[k] = v;
+		end
+		tag = apply_ns(tag, true);
+		ns[0] = tag;
+		ns.__index = ns;
+		return tag, newattr;
+	end
+	local function pop()
+		local tag = ns[0];
+		ns = getmetatable(ns);
+		return tag;
+	end
+	
+	while true do
+		if peek() == "<" then
+			local elem = read_until(">"):sub(2,-2);
+			if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
+			elseif elem:sub(1,1) == "/" then -- end tag
+				elem = elem:sub(2);
+				local name = pop();
+				handlers:EndElement(name); -- TODO check for start-end tag name match
+			elseif elem:sub(-1,-1) == "/" then -- empty tag
+				elem = elem:sub(1,-2);
+				local name,attr = parse_tag(elem);
+				name,attr = push(name,attr);
+				handlers:StartElement(name,attr);
+				name = pop();
+				handlers:EndElement(name);
+			else -- start tag
+				local name,attr = parse_tag(elem);
+				name,attr = push(name,attr);
+				handlers:StartElement(name,attr);
+			end
+		else
+			local text = read_before("<");
+			handlers:CharacterData(xml_unescape(text));
+		end
+	end
+end
+
+function new(handlers, ns_separator)
+	local co = coroutine.create(parser);
+	return {
+		parse = function(self, data)
+			if not data then
+				co = deadroutine;
+				return true; -- eof
+			end
+			local success, result = coroutine.resume(co, data, handlers, ns_separator);
+			if result then
+				co = deadroutine;
+				return nil, result; -- error
+			end
+			return true; -- success
+		end;
+	};
+end
+
+return _M;

mercurial