tools/erlparse.lua

Sat, 22 May 2010 01:48:31 +0200

author
Tobias Markmann <tm@ayena.de>
date
Sat, 22 May 2010 01:48:31 +0200
changeset 3099
2c4d06e7e3d3
parent 2947
ff7f6668b34f
permissions
-rw-r--r--

util.sasl.scram: Check nonce in client final message. Check channel binding flag in client first message. Adding some TODOs on more strict parsing. (thanks Marc Santamaria)

1523
841d61be198f Remove version number from copyright headers
Matthew Wild <mwild1@gmail.com>
parents: 894
diff changeset
1 -- Prosody IM
2923
b7049746bd29 Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents: 1783
diff changeset
2 -- Copyright (C) 2008-2010 Matthew Wild
b7049746bd29 Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents: 1783
diff changeset
3 -- Copyright (C) 2008-2010 Waqas Hussain
519
cccd610a0ef9 Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents: 489
diff changeset
4 --
758
b1885732e979 GPL->MIT!
Matthew Wild <mwild1@gmail.com>
parents: 615
diff changeset
5 -- This project is MIT/X11 licensed. Please see the
b1885732e979 GPL->MIT!
Matthew Wild <mwild1@gmail.com>
parents: 615
diff changeset
6 -- COPYING file in the source package for more information.
519
cccd610a0ef9 Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents: 489
diff changeset
7 --
cccd610a0ef9 Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents: 489
diff changeset
8
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
9 local string_byte, string_char = string.byte, string.char;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
10 local t_concat, t_insert = table.concat, table.insert;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
11 local type, tonumber, tostring = type, tonumber, tostring;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
12
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
13 local file = nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
14 local last = nil;
2946
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
15 local line = 1;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
16 local function read(expected)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
17 local ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
18 if last then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
19 ch = last; last = nil;
2946
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
20 else
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
21 ch = file:read(1);
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
22 if ch == "\n" then line = line + 1; end
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
23 end
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
24 if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil").." on line "..line); end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
25 return ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
26 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
27 local function pushback(ch)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
28 if last then error(); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
29 last = ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
30 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
31 local function peek()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
32 if not last then last = read(); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
33 return last;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
34 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
35
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
36 local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
37 local function isLowerAlpha(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
38 ch = string_byte(ch) or 0;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
39 return (ch >= _a and ch <= _z);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
40 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
41 local function isNumeric(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
42 ch = string_byte(ch) or 0;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
43 return (ch >= _0 and ch <= _9) or ch == _minus;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
44 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
45 local function isAtom(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
46 ch = string_byte(ch) or 0;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
47 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
48 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
49 local function isSpace(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
50 ch = string_byte(ch) or "x";
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
51 return ch <= _space;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
52 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
53
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
54 local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
55 local function readString()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
56 read("\""); -- skip quote
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
57 local slash = nil;
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
58 local str = {};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
59 while true do
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
60 local ch = read();
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
61 if slash then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
62 slash = slash..ch;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
63 if not escapes[slash] then error("Unknown escape sequence: "..slash); end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
64 str[#str+1] = escapes[slash];
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
65 slash = nil;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
66 elseif ch == "\"" then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
67 break;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
68 elseif ch == "\\" then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
69 slash = ch;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
70 else
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
71 str[#str+1] = ch;
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
72 end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
73 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
74 return t_concat(str);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
75 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
76 local function readAtom1()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
77 local var = { read() };
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
78 while isAtom(peek()) do
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
79 var[#var+1] = read();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
80 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
81 return t_concat(var);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
82 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
83 local function readAtom2()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
84 local str = { read("'") };
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
85 local slash = nil;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
86 while true do
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
87 local ch = read();
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
88 str[#str+1] = ch;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
89 if ch == "'" and not slash then break; end
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
90 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
91 return t_concat(str);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
92 end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
93 local function readNumber()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
94 local num = { read() };
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
95 while isNumeric(peek()) do
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
96 num[#num+1] = read();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
97 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
98 return tonumber(t_concat(num));
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
99 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
100 local readItem = nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
101 local function readTuple()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
102 local t = {};
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
103 local s = {}; -- string representation
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
104 read(); -- read {, or [, or <
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
105 while true do
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
106 local item = readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
107 if not item then break; end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
108 if type(item) ~= "number" or item > 255 then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
109 s = nil;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
110 elseif s then
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
111 s[#s+1] = string_char(item);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
112 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
113 t_insert(t, item);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
114 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
115 read(); -- read }, or ], or >
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
116 if s and #s > 0 then
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
117 return t_concat(s)
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
118 else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
119 return t
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
120 end;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
121 end
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
122 local function readBinary()
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
123 read("<"); -- read <
2947
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
124 -- Discard PIDs
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
125 if isNumeric(peek()) then
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
126 while peek() ~= ">" do read(); end
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
127 read(">");
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
128 return {};
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
129 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
130 local t = readTuple();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
131 read(">") -- read >
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
132 local ch = peek();
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
133 if type(t) == "string" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
134 -- binary is a list of integers
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
135 return t;
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
136 elseif type(t) == "table" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
137 if t[1] then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
138 -- binary contains string
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
139 return t[1];
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
140 else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
141 -- binary is empty
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
142 return "";
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
143 end;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
144 else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
145 error();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
146 end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
147 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
148 readItem = function()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
149 local ch = peek();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
150 if ch == nil then return nil end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
151 if ch == "{" or ch == "[" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
152 return readTuple();
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
153 elseif isLowerAlpha(ch) then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
154 return readAtom1();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
155 elseif ch == "'" then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
156 return readAtom2();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
157 elseif isNumeric(ch) then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
158 return readNumber();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
159 elseif ch == "\"" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
160 return readString();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
161 elseif ch == "<" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
162 return readBinary();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
163 elseif isSpace(ch) or ch == "," or ch == "|" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
164 read();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
165 return readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
166 else
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
167 --print("Unknown char: "..ch);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
168 return nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
169 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
170 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
171 local function readChunk()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
172 local x = readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
173 if x then read("."); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
174 return x;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
175 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
176 local function readFile(filename)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
177 file = io.open(filename);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
178 if not file then error("File not found: "..filename); os.exit(0); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
179 return function()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
180 local x = readChunk();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
181 if not x and peek() then error("Invalid char: "..peek()); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
182 return x;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
183 end;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
184 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
185
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
186 module "erlparse"
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
187
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
188 function parseFile(file)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
189 return readFile(file);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
190 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
191
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
192 return _M;

mercurial