|
1 --- WARNING! --- |
|
2 -- This file contains a mix of encodings below. |
|
3 -- Many editors will unquestioningly convert these for you. |
|
4 -- Please be careful :( (I recommend Scite) |
|
5 --------------------------------- |
|
6 |
|
7 local gmatch = string.gmatch; |
|
8 local t_concat, t_insert = table.concat, table.insert; |
|
9 local to_byte, to_char = string.byte, string.char; |
|
10 |
|
11 local function _latin1toutf8(str) |
|
12 if not str then return str; end |
|
13 local p = {}; |
|
14 for ch in gmatch(str, ".") do |
|
15 ch = to_byte(ch); |
|
16 if (ch < 0x80) then |
|
17 t_insert(p, to_char(ch)); |
|
18 elseif (ch < 0xC0) then |
|
19 t_insert(p, to_char(0xC2, ch)); |
|
20 else |
|
21 t_insert(p, to_char(0xC3, ch - 64)); |
|
22 end |
|
23 end |
|
24 return t_concat(p); |
|
25 end |
|
26 |
|
27 function latin1toutf8() |
|
28 local function assert_utf8(latin, utf8) |
|
29 assert_equal(_latin1toutf8(latin), utf8, "Incorrect UTF8 from Latin1: "..tostring(latin)); |
|
30 end |
|
31 |
|
32 assert_utf8("", "") |
|
33 assert_utf8("test", "test") |
|
34 assert_utf8(nil, nil) |
|
35 assert_utf8("foobar.råkat.se", "foobar.rÃ¥kat.se") |
|
36 end |