ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes

Thu, 23 Jul 2009 01:38:13 +0100

author
Sergei Golovan
date
Thu, 23 Jul 2009 01:38:13 +0100
changeset 1572
1b87dfb76caa
parent 1571
063d7be32fdd
child 1573
43cf3d027455
child 1574
3692706f78e3

ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes

tools/erlparse.lua file | annotate | diff | comparison | revisions
--- a/tools/erlparse.lua	Tue Jul 21 19:23:03 2009 +0100
+++ b/tools/erlparse.lua	Thu Jul 23 01:38:13 2009 +0100
@@ -27,18 +27,18 @@
 	return last;
 end
 
-local _A, _a, _Z, _z, _0, _9, __, _space = string.byte("AaZz09_ ", 1, 8);
-local function isAlpha(ch)
+local _A, _a, _Z, _z, _0, _9, __, _at, _space = string.byte("AaZz09@_ ", 1, 9);
+local function isLowerAlpha(ch)
 	ch = string.byte(ch) or 0;
-	return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z);
+	return (ch >= _a and ch <= _z);
 end
 local function isNumeric(ch)
 	ch = string.byte(ch) or 0;
 	return (ch >= _0 and ch <= _9);
 end
-local function isVar(ch)
+local function isAtom(ch)
 	ch = string.byte(ch) or 0;
-	return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __;
+	return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at;
 end
 local function isSpace(ch)
 	ch = string.byte(ch) or "x";
@@ -57,24 +57,23 @@
 	str = str:gsub("\\.", {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"});
 	return str;
 end
-local function readSpecialString()
-	read("<"); read("<"); -- read <<
-	local str = "";
-	if peek() == "\"" then
-		str = readString();
-	elseif peek() ~= ">" then
-		error();
-	end
-	read(">"); read(">"); -- read >>
-	return str;
-end
-local function readVar()
+local function readAtom1()
 	local var = read();
-	while isVar(peek()) do
+	while isAtom(peek()) do
 		var = var..read();
 	end
 	return var;
 end
+local function readAtom2()
+	local str = read("'");
+	local slash = nil;
+	while true do
+		local ch = read();
+		str = str..ch;
+		if ch == "'" and not slash then break; end
+	end
+	return str;
+end
 local function readNumber()
 	local num = read();
 	while isNumeric(peek()) do
@@ -85,28 +84,60 @@
 local readItem = nil;
 local function readTuple()
 	local t = {};
-	read(); -- read { or [
+	local s = ""; -- string representation
+	read(); -- read {, or [, or <
 	while true do
 		local item = readItem();
 		if not item then break; end
+		if type(item) ~= type(0) or item > 255 then
+			s = nil;
+		elseif s then
+			s = s..string.char(item);
+		end
 		table.insert(t, item);
 	end
-	read(); -- read } or ]
-	return t;
+	read(); -- read }, or ], or >
+	if s and s ~= "" then
+		return s
+	else
+		return t
+	end;
+end
+local function readBinary()
+	read("<"); -- read <
+	local t = readTuple();
+	read(">") -- read >
+	local ch = peek();
+	if type(t) == type("") then
+		-- binary is a list of integers
+		return t;
+	elseif type(t) == type({}) then
+		if t[1] then
+			-- binary contains string
+			return t[1];
+		else
+			-- binary is empty
+			return "";
+		end;
+	else
+		error();
+	end
 end
 readItem = function()
 	local ch = peek();
 	if ch == nil then return nil end
 	if ch == "{" or ch == "[" then
 		return readTuple();
-	elseif isAlpha(ch) then
-		return readVar();
+	elseif isLowerAlpha(ch) then
+		return readAtom1();
+	elseif ch == "'" then
+		return readAtom2();
 	elseif isNumeric(ch) then
 		return readNumber();
 	elseif ch == "\"" then
 		return readString();
 	elseif ch == "<" then
-		return readSpecialString();
+		return readBinary();
 	elseif isSpace(ch) or ch == "," or ch == "|" then
 		read();
 		return readItem();

mercurial