# HG changeset patch # User Matthew Wild # Date 1248536687 -3600 # Node ID f62f83d9dc43bac772af4909cb995973e524f18d # Parent 0db12f8037f7107f9b7c85b0214f742df5475318 uglify: New specialised Lua 'compression' filter diff -r 0db12f8037f7 -r f62f83d9dc43 squishy --- a/squishy Sat Jul 25 16:43:52 2009 +0100 +++ b/squishy Sat Jul 25 16:44:47 2009 +0100 @@ -19,3 +19,10 @@ Main "minify/squish.minify.lua" end + +-- Compress Lua scripts (an excellent hack :) ) +if opts.with_uglify then + Module "llex" "uglify/llex.lua" + + Main "uglify/squish.uglify.lua" +end diff -r 0db12f8037f7 -r f62f83d9dc43 uglify/llex.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uglify/llex.lua Sat Jul 25 16:44:47 2009 +0100 @@ -0,0 +1,355 @@ +--[[-------------------------------------------------------------------- + + llex.lua: Lua 5.1 lexical analyzer in Lua + This file is part of LuaSrcDiet, based on Yueliang material. + + Copyright (c) 2008 Kein-Hong Man + The COPYRIGHT file describes the conditions + under which this software may be distributed. + + See the ChangeLog for more information. + +----------------------------------------------------------------------]] + +--[[-------------------------------------------------------------------- +-- NOTES: +-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0, +-- with significant modifications to handle LuaSrcDiet's needs: +-- (1) llex.error is an optional error function handler +-- (2) seminfo for strings include their delimiters and no +-- translation operations are performed on them +-- * ADDED shbang handling has been added to support executable scripts +-- * NO localized decimal point replacement magic +-- * NO limit to number of lines +-- * NO support for compatible long strings (LUA_COMPAT_LSTR) +-- * Please read technotes.txt for more technical details. +----------------------------------------------------------------------]] + +local base = _G +local string = require "string" +module "llex" + +local find = string.find +local match = string.match +local sub = string.sub + +---------------------------------------------------------------------- +-- initialize keyword list, variables +---------------------------------------------------------------------- + +local kw = {} +for v in string.gmatch([[ +and break do else elseif end false for function if in +local nil not or repeat return then true until while]], "%S+") do + kw[v] = true +end + +-- NOTE: see init() for module variables (externally visible): +-- tok, seminfo, tokln + +local z, -- source stream + sourceid, -- name of source + I, -- position of lexer + buff, -- buffer for strings + ln -- line number + +---------------------------------------------------------------------- +-- add information to token listing +---------------------------------------------------------------------- + +local function addtoken(token, info) + local i = #tok + 1 + tok[i] = token + seminfo[i] = info + tokln[i] = ln +end + +---------------------------------------------------------------------- +-- handles line number incrementation and end-of-line characters +---------------------------------------------------------------------- + +local function inclinenumber(i, is_tok) + local sub = sub + local old = sub(z, i, i) + i = i + 1 -- skip '\n' or '\r' + local c = sub(z, i, i) + if (c == "\n" or c == "\r") and (c ~= old) then + i = i + 1 -- skip '\n\r' or '\r\n' + old = old..c + end + if is_tok then addtoken("TK_EOL", old) end + ln = ln + 1 + I = i + return i +end + +---------------------------------------------------------------------- +-- initialize lexer for given source _z and source name _sourceid +---------------------------------------------------------------------- + +function init(_z, _sourceid) + z = _z -- source + sourceid = _sourceid -- name of source + I = 1 -- lexer's position in source + ln = 1 -- line number + tok = {} -- lexed token list* + seminfo = {} -- lexed semantic information list* + tokln = {} -- line numbers for messages* + -- (*) externally visible thru' module + -------------------------------------------------------------------- + -- initial processing (shbang handling) + -------------------------------------------------------------------- + local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)") + if p then -- skip first line + I = I + #q + addtoken("TK_COMMENT", q) + if #r > 0 then inclinenumber(I, true) end + end +end + +---------------------------------------------------------------------- +-- returns a chunk name or id, no truncation for long names +---------------------------------------------------------------------- + +function chunkid() + if sourceid and match(sourceid, "^[=@]") then + return sub(sourceid, 2) -- remove first char + end + return "[string]" +end + +---------------------------------------------------------------------- +-- formats error message and throws error +-- * a simplified version, does not report what token was responsible +---------------------------------------------------------------------- + +function errorline(s, line) + local e = error or base.error + e(string.format("%s:%d: %s", chunkid(), line or ln, s)) +end +local errorline = errorline + +------------------------------------------------------------------------ +-- count separators ("=") in a long string delimiter +------------------------------------------------------------------------ + +local function skip_sep(i) + local sub = sub + local s = sub(z, i, i) + i = i + 1 + local count = #match(z, "=*", i) -- note, take the length + i = i + count + I = i + return (sub(z, i, i) == s) and count or (-count) - 1 +end + +---------------------------------------------------------------------- +-- reads a long string or long comment +---------------------------------------------------------------------- + +local function read_long_string(is_str, sep) + local i = I + 1 -- skip 2nd '[' + local sub = sub + local c = sub(z, i, i) + if c == "\r" or c == "\n" then -- string starts with a newline? + i = inclinenumber(i) -- skip it + end + local j = i + while true do + local p, q, r = find(z, "([\r\n%]])", i) -- (long range) + if not p then + errorline(is_str and "unfinished long string" or + "unfinished long comment") + end + i = p + if r == "]" then -- delimiter test + if skip_sep(i) == sep then + buff = sub(z, buff, I) + I = I + 1 -- skip 2nd ']' + return buff + end + i = I + else -- newline + buff = buff.."\n" + i = inclinenumber(i) + end + end--while +end + +---------------------------------------------------------------------- +-- reads a string +---------------------------------------------------------------------- + +local function read_string(del) + local i = I + local find = find + local sub = sub + while true do + local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range) + if p then + if r == "\n" or r == "\r" then + errorline("unfinished string") + end + i = p + if r == "\\" then -- handle escapes + i = i + 1 + r = sub(z, i, i) + if r == "" then break end -- (EOZ error) + p = find("abfnrtv\n\r", r, 1, true) + ------------------------------------------------------ + if p then -- special escapes + if p > 7 then + i = inclinenumber(i) + else + i = i + 1 + end + ------------------------------------------------------ + elseif find(r, "%D") then -- other non-digits + i = i + 1 + ------------------------------------------------------ + else -- \xxx sequence + local p, q, s = find(z, "^(%d%d?%d?)", i) + i = q + 1 + if s + 1 > 256 then -- UCHAR_MAX + errorline("escape sequence too large") + end + ------------------------------------------------------ + end--if p + else + i = i + 1 + if r == del then -- ending delimiter + I = i + return sub(z, buff, i - 1) -- return string + end + end--if r + else + break -- (error) + end--if p + end--while + errorline("unfinished string") +end + +------------------------------------------------------------------------ +-- main lexer function +------------------------------------------------------------------------ + +function llex() + local find = find + local match = match + while true do--outer + local i = I + -- inner loop allows break to be used to nicely section tests + while true do--inner + ---------------------------------------------------------------- + local p, _, r = find(z, "^([_%a][_%w]*)", i) + if p then + I = i + #r + if kw[r] then + addtoken("TK_KEYWORD", r) -- reserved word (keyword) + else + addtoken("TK_NAME", r) -- identifier + end + break -- (continue) + end + ---------------------------------------------------------------- + local p, _, r = find(z, "^(%.?)%d", i) + if p then -- numeral + if r == "." then i = i + 1 end + local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) + i = q + 1 + if #r == 1 then -- optional exponent + if match(z, "^[%+%-]", i) then -- optional sign + i = i + 1 + end + end + local _, q = find(z, "^[_%w]*", i) + I = q + 1 + local v = sub(z, p, q) -- string equivalent + if not base.tonumber(v) then -- handles hex test also + errorline("malformed number") + end + addtoken("TK_NUMBER", v) + break -- (continue) + end + ---------------------------------------------------------------- + local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i) + if p then + if t == "\n" or t == "\r" then -- newline + inclinenumber(i, true) + else + I = q + 1 -- whitespace + addtoken("TK_SPACE", r) + end + break -- (continue) + end + ---------------------------------------------------------------- + local r = match(z, "^%p", i) + if r then + buff = i + local p = find("-[\"\'.=<>~", r, 1, true) + if p then + -- two-level if block for punctuation/symbols + -------------------------------------------------------- + if p <= 2 then + if p == 1 then -- minus + local c = match(z, "^%-%-(%[?)", i) + if c then + i = i + 2 + local sep = -1 + if c == "[" then + sep = skip_sep(i) + end + if sep >= 0 then -- long comment + addtoken("TK_LCOMMENT", read_long_string(false, sep)) + else -- short comment + I = find(z, "[\n\r]", i) or (#z + 1) + addtoken("TK_COMMENT", sub(z, buff, I - 1)) + end + break -- (continue) + end + -- (fall through for "-") + else -- [ or long string + local sep = skip_sep(i) + if sep >= 0 then + addtoken("TK_LSTRING", read_long_string(true, sep)) + elseif sep == -1 then + addtoken("TK_OP", "[") + else + errorline("invalid long string delimiter") + end + break -- (continue) + end + -------------------------------------------------------- + elseif p <= 5 then + if p < 5 then -- strings + I = i + 1 + addtoken("TK_STRING", read_string(r)) + break -- (continue) + end + r = match(z, "^%.%.?%.?", i) -- .|..|... dots + -- (fall through) + -------------------------------------------------------- + else -- relational + r = match(z, "^%p=?", i) + -- (fall through) + end + end + I = i + #r + addtoken("TK_OP", r) -- for other symbols, fall through + break -- (continue) + end + ---------------------------------------------------------------- + local r = sub(z, i, i) + if r ~= "" then + I = i + 1 + addtoken("TK_OP", r) -- other single-char tokens + break + end + addtoken("TK_EOS", "") -- end of stream, + return -- exit here + ---------------------------------------------------------------- + end--while inner + end--while outer +end + +return base.getfenv() diff -r 0db12f8037f7 -r f62f83d9dc43 uglify/squish.uglify.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uglify/squish.uglify.lua Sat Jul 25 16:44:47 2009 +0100 @@ -0,0 +1,94 @@ +local llex = require "llex" + +local base_char = 128; +local keywords = { "and", "break", "do", "else", "elseif", + "end", "false", "for", "function", "if", + "in", "local", "nil", "not", "or", "repeat", + "return", "then", "true", "until", "while" } + +function uglify_file(infile_fn, outfile_fn) + local infile, err = io.open(infile_fn); + if not infile then + print_err("Can't open input file for reading: "..tostring(err)); + return; + end + + local outfile, err = io.open(outfile_fn..".uglified", "w+"); + if not outfile then + print_err("Can't open output file for writing: "..tostring(err)); + return; + end + + local data = infile:read("*a"); + infile:close(); + + local shebang, newdata = data:match("^(#.-\n)(.+)$"); + local code = newdata or data; + if shebang then + outfile:write(shebang) + end + + + while base_char + #keywords < 255 and code:find("["..string.char(base_char).."-"..string.char(base_char+#keywords-1).."]") do + base_char = base_char + 1; + end + if base_char == 255 then + -- Sorry, can't uglify this file :( + -- We /could/ use a multi-byte marker, but that would complicate + -- things and lower the compression ratio (there are quite a few + -- 2-letter keywords) + outfile:write(code); + outfile:close(); + os.rename(outfile_fn..".uglified", outfile_fn); + return; + end + + local keyword_map_to_char = {} + for i, keyword in ipairs(keywords) do + keyword_map_to_char[keyword] = string.char(base_char + i); + end + + outfile:write("local base_char,keywords=", tostring(base_char), ",{"); + for _, keyword in ipairs(keywords) do + outfile:write('"', keyword, '",'); + end + outfile:write[[}; function prettify(code) return code:gsub("["..string.char(base_char).."-"..string.char(base_char+#keywords).."]", + function (c) return keywords[c:byte()-base_char]; end) end ]] + + -- Write loadstring and open string + local maxequals = 0; + data:gsub("(=+)", function (equals_string) maxequals = math.max(maxequals, #equals_string); end); + + outfile:write [[assert(loadstring(prettify]] + outfile:write("[", string.rep("=", maxequals+1), "["); + + -- Write code, substituting tokens as we go + llex.init(code, "@"..infile_fn); + llex.llex() + local seminfo = llex.seminfo; + for k,v in ipairs(llex.tok) do + if v == "TK_KEYWORD" then + local keyword_char = keyword_map_to_char[seminfo[k]]; + if keyword_char then + outfile:write(keyword_char); + else -- Those who think Lua shouldn't have 'continue, fix this please :) + outfile:write(seminfo[k]); + end + else + outfile:write(seminfo[k]); + end + end + + -- Close string/functions + outfile:write("]", string.rep("=", maxequals+1), "]"); + outfile:write("))()"); + outfile:close(); + os.rename(outfile_fn..".uglified", outfile_fn); +end + +if opts.uglify then + print_info("Uglifying "..out_fn.."..."); + uglify_file(out_fn, out_fn); + print_info("OK!"); +end +