6 -- COPYING file in the source package for more information. |
6 -- COPYING file in the source package for more information. |
7 -- |
7 -- |
8 |
8 |
9 |
9 |
10 --- WARNING! --- |
10 --- WARNING! --- |
11 -- This file contains a mix of encodings below. |
11 -- This file contains a mix of encodings below. |
12 -- Many editors will unquestioningly convert these for you. |
12 -- Many editors will unquestioningly convert these for you. |
13 -- Please be careful :( (I recommend Scite) |
13 -- Please be careful :( (I recommend Scite) |
14 --------------------------------- |
14 --------------------------------- |
15 |
15 |
16 local gmatch = string.gmatch; |
16 local gmatch = string.gmatch; |
17 local t_concat, t_insert = table.concat, table.insert; |
17 local t_concat, t_insert = table.concat, table.insert; |
18 local to_byte, to_char = string.byte, string.char; |
18 local to_byte, to_char = string.byte, string.char; |
19 |
19 |
20 local function _latin1toutf8(str) |
20 local function _latin1toutf8(str) |
21 if not str then return str; end |
21 if not str then return str; end |
22 local p = {}; |
22 local p = {}; |
23 for ch in gmatch(str, ".") do |
23 for ch in gmatch(str, ".") do |
24 ch = to_byte(ch); |
24 ch = to_byte(ch); |
25 if (ch < 0x80) then |
25 if (ch < 0x80) then |
26 t_insert(p, to_char(ch)); |
26 t_insert(p, to_char(ch)); |
27 elseif (ch < 0xC0) then |
27 elseif (ch < 0xC0) then |
28 t_insert(p, to_char(0xC2, ch)); |
28 t_insert(p, to_char(0xC2, ch)); |
29 else |
29 else |
30 t_insert(p, to_char(0xC3, ch - 64)); |
30 t_insert(p, to_char(0xC3, ch - 64)); |
31 end |
31 end |
32 end |
32 end |
33 return t_concat(p); |
33 return t_concat(p); |
34 end |
34 end |
35 |
35 |
36 function latin1toutf8() |
36 function latin1toutf8() |
37 local function assert_utf8(latin, utf8) |
37 local function assert_utf8(latin, utf8) |
38 assert_equal(_latin1toutf8(latin), utf8, "Incorrect UTF8 from Latin1: "..tostring(latin)); |
38 assert_equal(_latin1toutf8(latin), utf8, "Incorrect UTF8 from Latin1: "..tostring(latin)); |
39 end |
39 end |