Thu, 25 Mar 2010 19:32:35 +0000
tools/erlparse: Optimisations aplenty for faster processing of large files
1523
841d61be198f
Remove version number from copyright headers
Matthew Wild <mwild1@gmail.com>
parents:
894
diff
changeset
|
1 | -- Prosody IM |
2923
b7049746bd29
Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents:
1783
diff
changeset
|
2 | -- Copyright (C) 2008-2010 Matthew Wild |
b7049746bd29
Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents:
1783
diff
changeset
|
3 | -- Copyright (C) 2008-2010 Waqas Hussain |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
4 | -- |
758 | 5 | -- This project is MIT/X11 licensed. Please see the |
6 | -- COPYING file in the source package for more information. | |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
7 | -- |
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
8 | |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
9 | local string_byte, string_char = string.byte, string.char; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
10 | local t_concat, t_insert = table.concat, table.insert; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
11 | local type, tonumber, tostring = type, tonumber, tostring; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
12 | |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
13 | local file = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
14 | local last = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
15 | local function read(expected) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
16 | local ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
17 | if last then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
18 | ch = last; last = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
19 | else ch = file:read(1); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
20 | if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil")); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
21 | return ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
22 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
23 | local function pushback(ch) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
24 | if last then error(); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
25 | last = ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
26 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
27 | local function peek() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
28 | if not last then last = read(); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
29 | return last; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
30 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
31 | |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
32 | local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
33 | local function isLowerAlpha(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
34 | ch = string_byte(ch) or 0; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
35 | return (ch >= _a and ch <= _z); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
36 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
37 | local function isNumeric(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
38 | ch = string_byte(ch) or 0; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
39 | return (ch >= _0 and ch <= _9) or ch == _minus; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
40 | end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
41 | local function isAtom(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
42 | ch = string_byte(ch) or 0; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
43 | return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
44 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
45 | local function isSpace(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
46 | ch = string_byte(ch) or "x"; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
47 | return ch <= _space; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
48 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
49 | |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
50 | local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"}; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
51 | local function readString() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
52 | read("\""); -- skip quote |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
53 | local slash = nil; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
54 | local str = {}; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
55 | while true do |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
56 | local ch = read(); |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
57 | if slash then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
58 | slash = slash..ch; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
59 | if not escapes[slash] then error("Unknown escape sequence: "..slash); end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
60 | str[#str+1] = escapes[slash]; |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
61 | slash = nil; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
62 | elseif ch == "\"" then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
63 | break; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
64 | elseif ch == "\\" then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
65 | slash = ch; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
66 | else |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
67 | str[#str+1] = ch; |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
68 | end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
69 | end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
70 | return t_concat(str); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
71 | end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
72 | local function readAtom1() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
73 | local var = { read() }; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
74 | while isAtom(peek()) do |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
75 | var[#var+1] = read(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
76 | end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
77 | return t_concat(var); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
78 | end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
79 | local function readAtom2() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
80 | local str = { read("'") }; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
81 | local slash = nil; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
82 | while true do |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
83 | local ch = read(); |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
84 | str[#str+1] = ch; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
85 | if ch == "'" and not slash then break; end |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
86 | end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
87 | return t_concat(str); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
88 | end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
89 | local function readNumber() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
90 | local num = { read() }; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
91 | while isNumeric(peek()) do |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
92 | num[#num+1] = read(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
93 | end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
94 | return tonumber(t_concat(num)); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
95 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
96 | local readItem = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
97 | local function readTuple() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
98 | local t = {}; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
99 | local s = {}; -- string representation |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
100 | read(); -- read {, or [, or < |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
101 | while true do |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
102 | local item = readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
103 | if not item then break; end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
104 | if type(item) ~= "number" or item > 255 then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
105 | s = nil; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
106 | elseif s then |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
107 | s[#s+1] = string_char(item); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
108 | end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
109 | t_insert(t, item); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
110 | end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
111 | read(); -- read }, or ], or > |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
112 | if s and #s > 0 then |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
113 | return t_concat(s) |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
114 | else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
115 | return t |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
116 | end; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
117 | end |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
118 | local function readBinary() |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
119 | read("<"); -- read < |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
120 | local t = readTuple(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
121 | read(">") -- read > |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
122 | local ch = peek(); |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
123 | if type(t) == "string" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
124 | -- binary is a list of integers |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
125 | return t; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
126 | elseif type(t) == "table" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
127 | if t[1] then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
128 | -- binary contains string |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
129 | return t[1]; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
130 | else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
131 | -- binary is empty |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
132 | return ""; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
133 | end; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
134 | else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
135 | error(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
136 | end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
137 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
138 | readItem = function() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
139 | local ch = peek(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
140 | if ch == nil then return nil end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
141 | if ch == "{" or ch == "[" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
142 | return readTuple(); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
143 | elseif isLowerAlpha(ch) then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
144 | return readAtom1(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
145 | elseif ch == "'" then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
146 | return readAtom2(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
147 | elseif isNumeric(ch) then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
148 | return readNumber(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
149 | elseif ch == "\"" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
150 | return readString(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
151 | elseif ch == "<" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
152 | return readBinary(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
153 | elseif isSpace(ch) or ch == "," or ch == "|" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
154 | read(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
155 | return readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
156 | else |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
157 | --print("Unknown char: "..ch); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
158 | return nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
159 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
160 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
161 | local function readChunk() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
162 | local x = readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
163 | if x then read("."); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
164 | return x; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
165 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
166 | local function readFile(filename) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
167 | file = io.open(filename); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
168 | if not file then error("File not found: "..filename); os.exit(0); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
169 | return function() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
170 | local x = readChunk(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
171 | if not x and peek() then error("Invalid char: "..peek()); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
172 | return x; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
173 | end; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
174 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
175 | |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
176 | module "erlparse" |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
177 | |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
178 | function parseFile(file) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
179 | return readFile(file); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
180 | end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
181 | |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
182 | return _M; |