Tue, 06 Oct 2009 18:05:31 +0100
util.stanza: Make xml_escape publicly accessible
1523
841d61be198f
Remove version number from copyright headers
Matthew Wild <mwild1@gmail.com>
parents:
894
diff
changeset
|
1 | -- Prosody IM |
760
90ce865eebd8
Update copyright notices for 2009
Matthew Wild <mwild1@gmail.com>
parents:
759
diff
changeset
|
2 | -- Copyright (C) 2008-2009 Matthew Wild |
90ce865eebd8
Update copyright notices for 2009
Matthew Wild <mwild1@gmail.com>
parents:
759
diff
changeset
|
3 | -- Copyright (C) 2008-2009 Waqas Hussain |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
509
diff
changeset
|
4 | -- |
758 | 5 | -- This project is MIT/X11 licensed. Please see the |
6 | -- COPYING file in the source package for more information. | |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
509
diff
changeset
|
7 | -- |
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
509
diff
changeset
|
8 | |
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
509
diff
changeset
|
9 | |
509
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
10 | --- WARNING! --- |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
11 | -- This file contains a mix of encodings below. |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
12 | -- Many editors will unquestioningly convert these for you. |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
13 | -- Please be careful :( (I recommend Scite) |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
14 | --------------------------------- |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
15 | |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
16 | local gmatch = string.gmatch; |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
17 | local t_concat, t_insert = table.concat, table.insert; |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
18 | local to_byte, to_char = string.byte, string.char; |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
19 | |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
20 | local function _latin1toutf8(str) |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
21 | if not str then return str; end |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
22 | local p = {}; |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
23 | for ch in gmatch(str, ".") do |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
24 | ch = to_byte(ch); |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
25 | if (ch < 0x80) then |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
26 | t_insert(p, to_char(ch)); |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
27 | elseif (ch < 0xC0) then |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
28 | t_insert(p, to_char(0xC2, ch)); |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
29 | else |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
30 | t_insert(p, to_char(0xC3, ch - 64)); |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
31 | end |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
32 | end |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
33 | return t_concat(p); |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
34 | end |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
35 | |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
36 | function latin1toutf8() |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
37 | local function assert_utf8(latin, utf8) |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
38 | assert_equal(_latin1toutf8(latin), utf8, "Incorrect UTF8 from Latin1: "..tostring(latin)); |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
39 | end |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
40 | |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
41 | assert_utf8("", "") |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
42 | assert_utf8("test", "test") |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
43 | assert_utf8(nil, nil) |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
44 | assert_utf8("foobar.råkat.se", "foobar.rÃ¥kat.se") |
32899c8a6fe5
Add test for latin1toutf8 (which passes)
Matthew Wild <mwild1@gmail.com>
parents:
diff
changeset
|
45 | end |