Sat, 10 Nov 2012 04:02:30 +0000
parsers.markdown: Make module callable, to allow parsing text as a module
0 | 1 | #!/usr/bin/env lua |
2 | ||
3 | --[[ | |
4 | # markdown.lua -- version 0.32 | |
5 | ||
6 | <http://www.frykholm.se/files/markdown.lua> | |
7 | ||
8 | **Author:** Niklas Frykholm, <niklas@frykholm.se> | |
9 | **Date:** 31 May 2008 | |
10 | ||
11 | This is an implementation of the popular text markup language Markdown in pure Lua. | |
12 | Markdown can convert documents written in a simple and easy to read text format | |
13 | to well-formatted HTML. For a more thourough description of Markdown and the Markdown | |
14 | syntax, see <http://daringfireball.net/projects/markdown>. | |
15 | ||
16 | The original Markdown source is written in Perl and makes heavy use of advanced | |
17 | regular expression techniques (such as negative look-ahead, etc) which are not available | |
18 | in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground | |
19 | up. It is probably not completely bug free. If you notice any bugs, please report them to | |
20 | me. A unit test that exposes the error is helpful. | |
21 | ||
22 | ## Usage | |
23 | ||
24 | require "markdown" | |
25 | markdown(source) | |
26 | ||
27 | ``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the | |
28 | Markdown transformation to the specified string. | |
29 | ||
30 | ``markdown.lua`` can also be used directly from the command line: | |
31 | ||
32 | lua markdown.lua test.md | |
33 | ||
34 | Creates a file ``test.html`` with the converted content of ``test.md``. Run: | |
35 | ||
36 | lua markdown.lua -h | |
37 | ||
38 | For a description of the command-line options. | |
39 | ||
40 | ``markdown.lua`` uses the same license as Lua, the MIT license. | |
41 | ||
42 | ## License | |
43 | ||
44 | Copyright © 2008 Niklas Frykholm. | |
45 | ||
46 | Permission is hereby granted, free of charge, to any person obtaining a copy of this | |
47 | software and associated documentation files (the "Software"), to deal in the Software | |
48 | without restriction, including without limitation the rights to use, copy, modify, merge, | |
49 | publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons | |
50 | to whom the Software is furnished to do so, subject to the following conditions: | |
51 | ||
52 | The above copyright notice and this permission notice shall be included in all copies | |
53 | or substantial portions of the Software. | |
54 | ||
55 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
56 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
57 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
58 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
59 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
60 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
61 | THE SOFTWARE. | |
62 | ||
63 | ## Version history | |
64 | ||
65 | - **0.32** -- 31 May 2008 | |
66 | - Fix for links containing brackets | |
67 | - **0.31** -- 1 Mar 2008 | |
68 | - Fix for link definitions followed by spaces | |
69 | - **0.30** -- 25 Feb 2008 | |
70 | - Consistent behavior with Markdown when the same link reference is reused | |
71 | - **0.29** -- 24 Feb 2008 | |
72 | - Fix for <pre> blocks with spaces in them | |
73 | - **0.28** -- 18 Feb 2008 | |
74 | - Fix for link encoding | |
75 | - **0.27** -- 14 Feb 2008 | |
76 | - Fix for link database links with () | |
77 | - **0.26** -- 06 Feb 2008 | |
78 | - Fix for nested italic and bold markers | |
79 | - **0.25** -- 24 Jan 2008 | |
80 | - Fix for encoding of naked < | |
81 | - **0.24** -- 21 Jan 2008 | |
82 | - Fix for link behavior. | |
83 | - **0.23** -- 10 Jan 2008 | |
84 | - Fix for a regression bug in longer expressions in italic or bold. | |
85 | - **0.22** -- 27 Dec 2007 | |
86 | - Fix for crash when processing blocks with a percent sign in them. | |
87 | - **0.21** -- 27 Dec 2007 | |
88 | - Fix for combined strong and emphasis tags | |
89 | - **0.20** -- 13 Oct 2007 | |
90 | - Fix for < as well in image titles, now matches Dingus behavior | |
91 | - **0.19** -- 28 Sep 2007 | |
92 | - Fix for quotation marks " and ampersands & in link and image titles. | |
93 | - **0.18** -- 28 Jul 2007 | |
94 | - Does not crash on unmatched tags (behaves like standard markdown) | |
95 | - **0.17** -- 12 Apr 2007 | |
96 | - Fix for links with %20 in them. | |
97 | - **0.16** -- 12 Apr 2007 | |
98 | - Do not require arg global to exist. | |
99 | - **0.15** -- 28 Aug 2006 | |
100 | - Better handling of links with underscores in them. | |
101 | - **0.14** -- 22 Aug 2006 | |
102 | - Bug for *`foo()`* | |
103 | - **0.13** -- 12 Aug 2006 | |
104 | - Added -l option for including stylesheet inline in document. | |
105 | - Fixed bug in -s flag. | |
106 | - Fixed emphasis bug. | |
107 | - **0.12** -- 15 May 2006 | |
108 | - Fixed several bugs to comply with MarkdownTest 1.0 <http://six.pairlist.net/pipermail/markdown-discuss/2004-December/000909.html> | |
109 | - **0.11** -- 12 May 2006 | |
110 | - Fixed bug for escaping `*` and `_` inside code spans. | |
111 | - Added license terms. | |
112 | - Changed join() to table.concat(). | |
113 | - **0.10** -- 3 May 2006 | |
114 | - Initial public release. | |
115 | ||
116 | // Niklas | |
117 | ]] | |
118 | ||
119 | ||
120 | -- Set up a table for holding local functions to avoid polluting the global namespace | |
121 | local M = {} | |
18
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
122 | local MT = {__index = _G, __call = function (M, ...) return M.markdown(...); end } |
0 | 123 | setmetatable(M, MT) |
124 | setfenv(1, M) | |
125 | ||
126 | ---------------------------------------------------------------------- | |
127 | -- Utility functions | |
128 | ---------------------------------------------------------------------- | |
129 | ||
130 | -- Locks table t from changes, writes an error if someone attempts to change the table. | |
131 | -- This is useful for detecting variables that have "accidently" been made global. Something | |
132 | -- I tend to do all too much. | |
133 | function lock(t) | |
134 | function lock_new_index(t, k, v) | |
135 | error("module has been locked -- " .. k .. " must be declared local", 2) | |
136 | end | |
137 | ||
138 | local mt = {__newindex = lock_new_index} | |
18
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
139 | local orig_mt = getmetatable(t) |
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
140 | if orig_mt then |
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
141 | for k, v in pairs(orig_mt) do |
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
142 | if k ~= "index" then |
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
143 | mt[k] = orig_mt[k] |
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
144 | end |
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
145 | end |
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
146 | end |
0 | 147 | setmetatable(t, mt) |
148 | end | |
149 | ||
150 | -- Returns the result of mapping the values in table t through the function f | |
151 | function map(t, f) | |
152 | local out = {} | |
153 | for k,v in pairs(t) do out[k] = f(v,k) end | |
154 | return out | |
155 | end | |
156 | ||
157 | -- The identity function, useful as a placeholder. | |
158 | function identity(text) return text end | |
159 | ||
160 | -- Functional style if statement. (NOTE: no short circuit evaluation) | |
161 | function iff(t, a, b) if t then return a else return b end end | |
162 | ||
163 | -- Splits the text into an array of separate lines. | |
164 | function split(text, sep) | |
165 | sep = sep or "\n" | |
166 | local lines = {} | |
167 | local pos = 1 | |
168 | while true do | |
169 | local b,e = text:find(sep, pos) | |
170 | if not b then table.insert(lines, text:sub(pos)) break end | |
171 | table.insert(lines, text:sub(pos, b-1)) | |
172 | pos = e + 1 | |
173 | end | |
174 | return lines | |
175 | end | |
176 | ||
177 | -- Converts tabs to spaces | |
178 | function detab(text) | |
179 | local tab_width = 4 | |
180 | local function rep(match) | |
181 | local spaces = -match:len() | |
182 | while spaces<1 do spaces = spaces + tab_width end | |
183 | return match .. string.rep(" ", spaces) | |
184 | end | |
185 | text = text:gsub("([^\n]-)\t", rep) | |
186 | return text | |
187 | end | |
188 | ||
189 | -- Applies string.find for every pattern in the list and returns the first match | |
190 | function find_first(s, patterns, index) | |
191 | local res = {} | |
192 | for _,p in ipairs(patterns) do | |
193 | local match = {s:find(p, index)} | |
194 | if #match>0 and (#res==0 or match[1] < res[1]) then res = match end | |
195 | end | |
196 | return unpack(res) | |
197 | end | |
198 | ||
199 | -- If a replacement array is specified, the range [start, stop] in the array is replaced | |
200 | -- with the replacement array and the resulting array is returned. Without a replacement | |
201 | -- array the section of the array between start and stop is returned. | |
202 | function splice(array, start, stop, replacement) | |
203 | if replacement then | |
204 | local n = stop - start + 1 | |
205 | while n > 0 do | |
206 | table.remove(array, start) | |
207 | n = n - 1 | |
208 | end | |
209 | for i,v in ipairs(replacement) do | |
210 | table.insert(array, start, v) | |
211 | end | |
212 | return array | |
213 | else | |
214 | local res = {} | |
215 | for i = start,stop do | |
216 | table.insert(res, array[i]) | |
217 | end | |
218 | return res | |
219 | end | |
220 | end | |
221 | ||
222 | -- Outdents the text one step. | |
223 | function outdent(text) | |
224 | text = "\n" .. text | |
225 | text = text:gsub("\n ? ? ?", "\n") | |
226 | text = text:sub(2) | |
227 | return text | |
228 | end | |
229 | ||
230 | -- Indents the text one step. | |
231 | function indent(text) | |
232 | text = text:gsub("\n", "\n ") | |
233 | return text | |
234 | end | |
235 | ||
236 | -- Does a simple tokenization of html data. Returns the data as a list of tokens. | |
237 | -- Each token is a table with a type field (which is either "tag" or "text") and | |
238 | -- a text field (which contains the original token data). | |
239 | function tokenize_html(html) | |
240 | local tokens = {} | |
241 | local pos = 1 | |
242 | while true do | |
243 | local start = find_first(html, {"<!%-%-", "<[a-z/!$]", "<%?"}, pos) | |
244 | if not start then | |
245 | table.insert(tokens, {type="text", text=html:sub(pos)}) | |
246 | break | |
247 | end | |
248 | if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end | |
249 | ||
250 | local _, stop | |
251 | if html:match("^<!%-%-", start) then | |
252 | _,stop = html:find("%-%->", start) | |
253 | elseif html:match("^<%?", start) then | |
254 | _,stop = html:find("?>", start) | |
255 | else | |
256 | _,stop = html:find("%b<>", start) | |
257 | end | |
258 | if not stop then | |
259 | -- error("Could not match html tag " .. html:sub(start,start+30)) | |
260 | table.insert(tokens, {type="text", text=html:sub(start, start)}) | |
261 | pos = start + 1 | |
262 | else | |
263 | table.insert(tokens, {type="tag", text=html:sub(start, stop)}) | |
264 | pos = stop + 1 | |
265 | end | |
266 | end | |
267 | return tokens | |
268 | end | |
269 | ||
270 | ---------------------------------------------------------------------- | |
271 | -- Hash | |
272 | ---------------------------------------------------------------------- | |
273 | ||
274 | -- This is used to "hash" data into alphanumeric strings that are unique | |
275 | -- in the document. (Note that this is not cryptographic hash, the hash | |
276 | -- function is not one-way.) The hash procedure is used to protect parts | |
277 | -- of the document from further processing. | |
278 | ||
279 | local HASH = { | |
280 | -- Has the hash been inited. | |
281 | inited = false, | |
282 | ||
283 | -- The unique string prepended to all hash values. This is to ensure | |
284 | -- that hash values do not accidently coincide with an actual existing | |
285 | -- string in the document. | |
286 | identifier = "", | |
287 | ||
288 | -- Counter that counts up for each new hash instance. | |
289 | counter = 0, | |
290 | ||
291 | -- Hash table. | |
292 | table = {} | |
293 | } | |
294 | ||
295 | -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere | |
296 | -- in the text. | |
297 | function init_hash(text) | |
298 | HASH.inited = true | |
299 | HASH.identifier = "" | |
300 | HASH.counter = 0 | |
301 | HASH.table = {} | |
302 | ||
303 | local s = "HASH" | |
304 | local counter = 0 | |
305 | local id | |
306 | while true do | |
307 | id = s .. counter | |
308 | if not text:find(id, 1, true) then break end | |
309 | counter = counter + 1 | |
310 | end | |
311 | HASH.identifier = id | |
312 | end | |
313 | ||
314 | -- Returns the hashed value for s. | |
315 | function hash(s) | |
316 | assert(HASH.inited) | |
317 | if not HASH.table[s] then | |
318 | HASH.counter = HASH.counter + 1 | |
319 | local id = HASH.identifier .. HASH.counter .. "X" | |
320 | HASH.table[s] = id | |
321 | end | |
322 | return HASH.table[s] | |
323 | end | |
324 | ||
325 | ---------------------------------------------------------------------- | |
326 | -- Protection | |
327 | ---------------------------------------------------------------------- | |
328 | ||
329 | -- The protection module is used to "protect" parts of a document | |
330 | -- so that they are not modified by subsequent processing steps. | |
331 | -- Protected parts are saved in a table for later unprotection | |
332 | ||
333 | -- Protection data | |
334 | local PD = { | |
335 | -- Saved blocks that have been converted | |
336 | blocks = {}, | |
337 | ||
338 | -- Block level tags that will be protected | |
339 | tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", | |
340 | "pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset", | |
341 | "iframe", "math", "ins", "del"} | |
342 | } | |
343 | ||
344 | -- Pattern for matching a block tag that begins and ends in the leftmost | |
345 | -- column and may contain indented subtags, i.e. | |
346 | -- <div> | |
347 | -- A nested block. | |
348 | -- <div> | |
349 | -- Nested data. | |
350 | -- </div> | |
351 | -- </div> | |
352 | function block_pattern(tag) | |
353 | return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n" | |
354 | end | |
355 | ||
356 | -- Pattern for matching a block tag that begins and ends with a newline | |
357 | function line_pattern(tag) | |
358 | return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n" | |
359 | end | |
360 | ||
361 | -- Protects the range of characters from start to stop in the text and | |
362 | -- returns the protected string. | |
363 | function protect_range(text, start, stop) | |
364 | local s = text:sub(start, stop) | |
365 | local h = hash(s) | |
366 | PD.blocks[h] = s | |
367 | text = text:sub(1,start) .. h .. text:sub(stop) | |
368 | return text | |
369 | end | |
370 | ||
371 | -- Protect every part of the text that matches any of the patterns. The first | |
372 | -- matching pattern is protected first, etc. | |
373 | function protect_matches(text, patterns) | |
374 | while true do | |
375 | local start, stop = find_first(text, patterns) | |
376 | if not start then break end | |
377 | text = protect_range(text, start, stop) | |
378 | end | |
379 | return text | |
380 | end | |
381 | ||
382 | -- Protects blocklevel tags in the specified text | |
383 | function protect(text) | |
384 | -- First protect potentially nested block tags | |
385 | text = protect_matches(text, map(PD.tags, block_pattern)) | |
386 | -- Then protect block tags at the line level. | |
387 | text = protect_matches(text, map(PD.tags, line_pattern)) | |
388 | -- Protect <hr> and comment tags | |
389 | text = protect_matches(text, {"\n<hr[^>]->[ \t]*\n"}) | |
390 | text = protect_matches(text, {"\n<!%-%-.-%-%->[ \t]*\n"}) | |
391 | return text | |
392 | end | |
393 | ||
394 | -- Returns true if the string s is a hash resulting from protection | |
395 | function is_protected(s) | |
396 | return PD.blocks[s] | |
397 | end | |
398 | ||
399 | -- Unprotects the specified text by expanding all the nonces | |
400 | function unprotect(text) | |
401 | for k,v in pairs(PD.blocks) do | |
402 | v = v:gsub("%%", "%%%%") | |
403 | text = text:gsub(k, v) | |
404 | end | |
405 | return text | |
406 | end | |
407 | ||
408 | ||
409 | ---------------------------------------------------------------------- | |
410 | -- Block transform | |
411 | ---------------------------------------------------------------------- | |
412 | ||
413 | -- The block transform functions transform the text on the block level. | |
414 | -- They work with the text as an array of lines rather than as individual | |
415 | -- characters. | |
416 | ||
417 | -- Returns true if the line is a ruler of (char) characters. | |
418 | -- The line must contain at least three char characters and contain only spaces and | |
419 | -- char characters. | |
420 | function is_ruler_of(line, char) | |
421 | if not line:match("^[ %" .. char .. "]*$") then return false end | |
422 | if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end | |
423 | return true | |
424 | end | |
425 | ||
426 | -- Identifies the block level formatting present in the line | |
427 | function classify(line) | |
428 | local info = {line = line, text = line} | |
429 | ||
430 | if line:match("^ ") then | |
431 | info.type = "indented" | |
432 | info.outdented = line:sub(5) | |
433 | return info | |
434 | end | |
435 | ||
436 | for _,c in ipairs({'*', '-', '_', '='}) do | |
437 | if is_ruler_of(line, c) then | |
438 | info.type = "ruler" | |
439 | info.ruler_char = c | |
440 | return info | |
441 | end | |
442 | end | |
443 | ||
444 | if line == "" then | |
445 | info.type = "blank" | |
446 | return info | |
447 | end | |
448 | ||
449 | if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then | |
450 | local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") | |
451 | info.type = "header" | |
452 | info.level = m1:len() | |
453 | info.text = m2 | |
454 | return info | |
455 | end | |
456 | ||
457 | if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then | |
458 | local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") | |
459 | info.type = "list_item" | |
460 | info.list_type = "numeric" | |
461 | info.number = 0 + number | |
462 | info.text = text | |
463 | return info | |
464 | end | |
465 | ||
466 | if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then | |
467 | local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") | |
468 | info.type = "list_item" | |
469 | info.list_type = "bullet" | |
470 | info.bullet = bullet | |
471 | info.text= text | |
472 | return info | |
473 | end | |
474 | ||
475 | if line:match("^>[ \t]?(.*)") then | |
476 | info.type = "blockquote" | |
477 | info.text = line:match("^>[ \t]?(.*)") | |
478 | return info | |
479 | end | |
480 | ||
481 | if is_protected(line) then | |
482 | info.type = "raw" | |
483 | info.html = unprotect(line) | |
484 | return info | |
485 | end | |
486 | ||
487 | info.type = "normal" | |
488 | return info | |
489 | end | |
490 | ||
491 | -- Find headers constisting of a normal line followed by a ruler and converts them to | |
492 | -- header entries. | |
493 | function headers(array) | |
494 | local i = 1 | |
495 | while i <= #array - 1 do | |
496 | if array[i].type == "normal" and array[i+1].type == "ruler" and | |
497 | (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then | |
498 | local info = {line = array[i].line} | |
499 | info.text = info.line | |
500 | info.type = "header" | |
501 | info.level = iff(array[i+1].ruler_char == "=", 1, 2) | |
502 | table.remove(array, i+1) | |
503 | array[i] = info | |
504 | end | |
505 | i = i + 1 | |
506 | end | |
507 | return array | |
508 | end | |
509 | ||
510 | -- Find list blocks and convert them to protected data blocks | |
511 | function lists(array, sublist) | |
512 | local function process_list(arr) | |
513 | local function any_blanks(arr) | |
514 | for i = 1, #arr do | |
515 | if arr[i].type == "blank" then return true end | |
516 | end | |
517 | return false | |
518 | end | |
519 | ||
520 | local function split_list_items(arr) | |
521 | local acc = {arr[1]} | |
522 | local res = {} | |
523 | for i=2,#arr do | |
524 | if arr[i].type == "list_item" then | |
525 | table.insert(res, acc) | |
526 | acc = {arr[i]} | |
527 | else | |
528 | table.insert(acc, arr[i]) | |
529 | end | |
530 | end | |
531 | table.insert(res, acc) | |
532 | return res | |
533 | end | |
534 | ||
535 | local function process_list_item(lines, block) | |
536 | while lines[#lines].type == "blank" do | |
537 | table.remove(lines) | |
538 | end | |
539 | ||
540 | local itemtext = lines[1].text | |
541 | for i=2,#lines do | |
542 | itemtext = itemtext .. "\n" .. outdent(lines[i].line) | |
543 | end | |
544 | if block then | |
545 | itemtext = block_transform(itemtext, true) | |
546 | if not itemtext:find("<pre>") then itemtext = indent(itemtext) end | |
547 | return " <li>" .. itemtext .. "</li>" | |
548 | else | |
549 | local lines = split(itemtext) | |
550 | lines = map(lines, classify) | |
551 | lines = lists(lines, true) | |
552 | lines = blocks_to_html(lines, true) | |
553 | itemtext = table.concat(lines, "\n") | |
554 | if not itemtext:find("<pre>") then itemtext = indent(itemtext) end | |
555 | return " <li>" .. itemtext .. "</li>" | |
556 | end | |
557 | end | |
558 | ||
559 | local block_list = any_blanks(arr) | |
560 | local items = split_list_items(arr) | |
561 | local out = "" | |
562 | for _, item in ipairs(items) do | |
563 | out = out .. process_list_item(item, block_list) .. "\n" | |
564 | end | |
565 | if arr[1].list_type == "numeric" then | |
566 | return "<ol>\n" .. out .. "</ol>" | |
567 | else | |
568 | return "<ul>\n" .. out .. "</ul>" | |
569 | end | |
570 | end | |
571 | ||
572 | -- Finds the range of lines composing the first list in the array. A list | |
573 | -- starts with (^ list_item) or (blank list_item) and ends with | |
574 | -- (blank* $) or (blank normal). | |
575 | -- | |
576 | -- A sublist can start with just (list_item) does not need a blank... | |
577 | local function find_list(array, sublist) | |
578 | local function find_list_start(array, sublist) | |
579 | if array[1].type == "list_item" then return 1 end | |
580 | if sublist then | |
581 | for i = 1,#array do | |
582 | if array[i].type == "list_item" then return i end | |
583 | end | |
584 | else | |
585 | for i = 1, #array-1 do | |
586 | if array[i].type == "blank" and array[i+1].type == "list_item" then | |
587 | return i+1 | |
588 | end | |
589 | end | |
590 | end | |
591 | return nil | |
592 | end | |
593 | local function find_list_end(array, start) | |
594 | local pos = #array | |
595 | for i = start, #array-1 do | |
596 | if array[i].type == "blank" and array[i+1].type ~= "list_item" | |
597 | and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then | |
598 | pos = i-1 | |
599 | break | |
600 | end | |
601 | end | |
602 | while pos > start and array[pos].type == "blank" do | |
603 | pos = pos - 1 | |
604 | end | |
605 | return pos | |
606 | end | |
607 | ||
608 | local start = find_list_start(array, sublist) | |
609 | if not start then return nil end | |
610 | return start, find_list_end(array, start) | |
611 | end | |
612 | ||
613 | while true do | |
614 | local start, stop = find_list(array, sublist) | |
615 | if not start then break end | |
616 | local text = process_list(splice(array, start, stop)) | |
617 | local info = { | |
618 | line = text, | |
619 | type = "raw", | |
620 | html = text | |
621 | } | |
622 | array = splice(array, start, stop, {info}) | |
623 | end | |
624 | ||
625 | -- Convert any remaining list items to normal | |
626 | for _,line in ipairs(array) do | |
627 | if line.type == "list_item" then line.type = "normal" end | |
628 | end | |
629 | ||
630 | return array | |
631 | end | |
632 | ||
633 | -- Find and convert blockquote markers. | |
634 | function blockquotes(lines) | |
635 | local function find_blockquote(lines) | |
636 | local start | |
637 | for i,line in ipairs(lines) do | |
638 | if line.type == "blockquote" then | |
639 | start = i | |
640 | break | |
641 | end | |
642 | end | |
643 | if not start then return nil end | |
644 | ||
645 | local stop = #lines | |
646 | for i = start+1, #lines do | |
647 | if lines[i].type == "blank" or lines[i].type == "blockquote" then | |
648 | elseif lines[i].type == "normal" then | |
649 | if lines[i-1].type == "blank" then stop = i-1 break end | |
650 | else | |
651 | stop = i-1 break | |
652 | end | |
653 | end | |
654 | while lines[stop].type == "blank" do stop = stop - 1 end | |
655 | return start, stop | |
656 | end | |
657 | ||
658 | local function process_blockquote(lines) | |
659 | local raw = lines[1].text | |
660 | for i = 2,#lines do | |
661 | raw = raw .. "\n" .. lines[i].text | |
662 | end | |
663 | local bt = block_transform(raw) | |
664 | if not bt:find("<pre>") then bt = indent(bt) end | |
665 | return "<blockquote>\n " .. bt .. | |
666 | "\n</blockquote>" | |
667 | end | |
668 | ||
669 | while true do | |
670 | local start, stop = find_blockquote(lines) | |
671 | if not start then break end | |
672 | local text = process_blockquote(splice(lines, start, stop)) | |
673 | local info = { | |
674 | line = text, | |
675 | type = "raw", | |
676 | html = text | |
677 | } | |
678 | lines = splice(lines, start, stop, {info}) | |
679 | end | |
680 | return lines | |
681 | end | |
682 | ||
683 | -- Find and convert codeblocks. | |
684 | function codeblocks(lines) | |
685 | local function find_codeblock(lines) | |
686 | local start | |
687 | for i,line in ipairs(lines) do | |
688 | if line.type == "indented" then start = i break end | |
689 | end | |
690 | if not start then return nil end | |
691 | ||
692 | local stop = #lines | |
693 | for i = start+1, #lines do | |
694 | if lines[i].type ~= "indented" and lines[i].type ~= "blank" then | |
695 | stop = i-1 | |
696 | break | |
697 | end | |
698 | end | |
699 | while lines[stop].type == "blank" do stop = stop - 1 end | |
700 | return start, stop | |
701 | end | |
702 | ||
703 | local function process_codeblock(lines) | |
704 | local raw = detab(encode_code(outdent(lines[1].line))) | |
705 | for i = 2,#lines do | |
706 | raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line))) | |
707 | end | |
708 | return "<pre><code>" .. raw .. "\n</code></pre>" | |
709 | end | |
710 | ||
711 | while true do | |
712 | local start, stop = find_codeblock(lines) | |
713 | if not start then break end | |
714 | local text = process_codeblock(splice(lines, start, stop)) | |
715 | local info = { | |
716 | line = text, | |
717 | type = "raw", | |
718 | html = text | |
719 | } | |
720 | lines = splice(lines, start, stop, {info}) | |
721 | end | |
722 | return lines | |
723 | end | |
724 | ||
725 | -- Convert lines to html code | |
726 | function blocks_to_html(lines, no_paragraphs) | |
727 | local out = {} | |
728 | local i = 1 | |
729 | while i <= #lines do | |
730 | local line = lines[i] | |
731 | if line.type == "ruler" then | |
732 | table.insert(out, "<hr/>") | |
733 | elseif line.type == "raw" then | |
734 | table.insert(out, line.html) | |
735 | elseif line.type == "normal" then | |
736 | local s = line.line | |
737 | ||
738 | while i+1 <= #lines and lines[i+1].type == "normal" do | |
739 | i = i + 1 | |
740 | s = s .. "\n" .. lines[i].line | |
741 | end | |
742 | ||
743 | if no_paragraphs then | |
744 | table.insert(out, span_transform(s)) | |
745 | else | |
746 | table.insert(out, "<p>" .. span_transform(s) .. "</p>") | |
747 | end | |
748 | elseif line.type == "header" then | |
749 | local s = "<h" .. line.level .. ">" .. span_transform(line.text) .. "</h" .. line.level .. ">" | |
750 | table.insert(out, s) | |
751 | else | |
752 | table.insert(out, line.line) | |
753 | end | |
754 | i = i + 1 | |
755 | end | |
756 | return out | |
757 | end | |
758 | ||
759 | -- Perform all the block level transforms | |
760 | function block_transform(text, sublist) | |
761 | local lines = split(text) | |
762 | lines = map(lines, classify) | |
763 | lines = headers(lines) | |
764 | lines = lists(lines, sublist) | |
765 | lines = codeblocks(lines) | |
766 | lines = blockquotes(lines) | |
767 | lines = blocks_to_html(lines) | |
768 | local text = table.concat(lines, "\n") | |
769 | return text | |
770 | end | |
771 | ||
772 | -- Debug function for printing a line array to see the result | |
773 | -- of partial transforms. | |
774 | function print_lines(lines) | |
775 | for i, line in ipairs(lines) do | |
776 | print(i, line.type, line.text or line.line) | |
777 | end | |
778 | end | |
779 | ||
780 | ---------------------------------------------------------------------- | |
781 | -- Span transform | |
782 | ---------------------------------------------------------------------- | |
783 | ||
784 | -- Functions for transforming the text at the span level. | |
785 | ||
786 | -- These characters may need to be escaped because they have a special | |
787 | -- meaning in markdown. | |
788 | escape_chars = "'\\`*_{}[]()>#+-.!'" | |
789 | escape_table = {} | |
790 | ||
791 | function init_escape_table() | |
792 | escape_table = {} | |
793 | for i = 1,#escape_chars do | |
794 | local c = escape_chars:sub(i,i) | |
795 | escape_table[c] = hash(c) | |
796 | end | |
797 | end | |
798 | ||
799 | -- Adds a new escape to the escape table. | |
800 | function add_escape(text) | |
801 | if not escape_table[text] then | |
802 | escape_table[text] = hash(text) | |
803 | end | |
804 | return escape_table[text] | |
805 | end | |
806 | ||
807 | -- Escape characters that should not be disturbed by markdown. | |
808 | function escape_special_chars(text) | |
809 | local tokens = tokenize_html(text) | |
810 | ||
811 | local out = "" | |
812 | for _, token in ipairs(tokens) do | |
813 | local t = token.text | |
814 | if token.type == "tag" then | |
815 | -- In tags, encode * and _ so they don't conflict with their use in markdown. | |
816 | t = t:gsub("%*", escape_table["*"]) | |
817 | t = t:gsub("%_", escape_table["_"]) | |
818 | else | |
819 | t = encode_backslash_escapes(t) | |
820 | end | |
821 | out = out .. t | |
822 | end | |
823 | return out | |
824 | end | |
825 | ||
826 | -- Encode backspace-escaped characters in the markdown source. | |
827 | function encode_backslash_escapes(t) | |
828 | for i=1,escape_chars:len() do | |
829 | local c = escape_chars:sub(i,i) | |
830 | t = t:gsub("\\%" .. c, escape_table[c]) | |
831 | end | |
832 | return t | |
833 | end | |
834 | ||
835 | -- Unescape characters that have been encoded. | |
836 | function unescape_special_chars(t) | |
837 | local tin = t | |
838 | for k,v in pairs(escape_table) do | |
839 | k = k:gsub("%%", "%%%%") | |
840 | t = t:gsub(v,k) | |
841 | end | |
842 | if t ~= tin then t = unescape_special_chars(t) end | |
843 | return t | |
844 | end | |
845 | ||
846 | -- Encode/escape certain characters inside Markdown code runs. | |
847 | -- The point is that in code, these characters are literals, | |
848 | -- and lose their special Markdown meanings. | |
849 | function encode_code(s) | |
850 | s = s:gsub("%&", "&") | |
851 | s = s:gsub("<", "<") | |
852 | s = s:gsub(">", ">") | |
853 | for k,v in pairs(escape_table) do | |
854 | s = s:gsub("%"..k, v) | |
855 | end | |
856 | return s | |
857 | end | |
858 | ||
859 | -- Handle backtick blocks. | |
860 | function code_spans(s) | |
861 | s = s:gsub("\\\\", escape_table["\\"]) | |
862 | s = s:gsub("\\`", escape_table["`"]) | |
863 | ||
864 | local pos = 1 | |
865 | while true do | |
866 | local start, stop = s:find("`+", pos) | |
867 | if not start then return s end | |
868 | local count = stop - start + 1 | |
869 | -- Find a matching numbert of backticks | |
870 | local estart, estop = s:find(string.rep("`", count), stop+1) | |
871 | local brstart = s:find("\n", stop+1) | |
872 | if estart and (not brstart or estart < brstart) then | |
873 | local code = s:sub(stop+1, estart-1) | |
874 | code = code:gsub("^[ \t]+", "") | |
875 | code = code:gsub("[ \t]+$", "") | |
876 | code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"]) | |
877 | code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"]) | |
878 | code = "<code>" .. encode_code(code) .. "</code>" | |
879 | code = add_escape(code) | |
880 | s = s:sub(1, start-1) .. code .. s:sub(estop+1) | |
881 | pos = start + code:len() | |
882 | else | |
883 | pos = stop + 1 | |
884 | end | |
885 | end | |
886 | return s | |
887 | end | |
888 | ||
889 | -- Encode alt text... enodes &, and ". | |
890 | function encode_alt(s) | |
891 | if not s then return s end | |
892 | s = s:gsub('&', '&') | |
893 | s = s:gsub('"', '"') | |
894 | s = s:gsub('<', '<') | |
895 | return s | |
896 | end | |
897 | ||
898 | -- Handle image references | |
899 | function images(text) | |
900 | local function reference_link(alt, id) | |
901 | alt = encode_alt(alt:match("%b[]"):sub(2,-2)) | |
902 | id = id:match("%[(.*)%]"):lower() | |
903 | if id == "" then id = text:lower() end | |
904 | link_database[id] = link_database[id] or {} | |
905 | if not link_database[id].url then return nil end | |
906 | local url = link_database[id].url or id | |
907 | url = encode_alt(url) | |
908 | local title = encode_alt(link_database[id].title) | |
909 | if title then title = " title=\"" .. title .. "\"" else title = "" end | |
910 | return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>") | |
911 | end | |
912 | ||
913 | local function inline_link(alt, link) | |
914 | alt = encode_alt(alt:match("%b[]"):sub(2,-2)) | |
915 | local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") | |
916 | url = url or link:match("%(<?(.-)>?%)") | |
917 | url = encode_alt(url) | |
918 | title = encode_alt(title) | |
919 | if title then | |
920 | return add_escape('<img src="' .. url .. '" alt="' .. alt .. '" title="' .. title .. '"/>') | |
921 | else | |
922 | return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>') | |
923 | end | |
924 | end | |
925 | ||
926 | text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) | |
927 | text = text:gsub("!(%b[])(%b())", inline_link) | |
928 | return text | |
929 | end | |
930 | ||
931 | -- Handle anchor references | |
932 | function anchors(text) | |
933 | local function reference_link(text, id) | |
934 | text = text:match("%b[]"):sub(2,-2) | |
935 | id = id:match("%b[]"):sub(2,-2):lower() | |
936 | if id == "" then id = text:lower() end | |
937 | link_database[id] = link_database[id] or {} | |
938 | if not link_database[id].url then return nil end | |
939 | local url = link_database[id].url or id | |
940 | url = encode_alt(url) | |
941 | local title = encode_alt(link_database[id].title) | |
942 | if title then title = " title=\"" .. title .. "\"" else title = "" end | |
943 | return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>") | |
944 | end | |
945 | ||
946 | local function inline_link(text, link) | |
947 | text = text:match("%b[]"):sub(2,-2) | |
948 | local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") | |
949 | title = encode_alt(title) | |
950 | url = url or link:match("%(<?(.-)>?%)") or "" | |
951 | url = encode_alt(url) | |
952 | if title then | |
953 | return add_escape("<a href=\"" .. url .. "\" title=\"" .. title .. "\">") .. text .. "</a>" | |
954 | else | |
955 | return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>") | |
956 | end | |
957 | end | |
958 | ||
959 | text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) | |
960 | text = text:gsub("(%b[])(%b())", inline_link) | |
961 | return text | |
962 | end | |
963 | ||
964 | -- Handle auto links, i.e. <http://www.google.com/>. | |
965 | function auto_links(text) | |
966 | local function link(s) | |
967 | return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>" | |
968 | end | |
969 | -- Encode chars as a mix of dec and hex entitites to (perhaps) fool | |
970 | -- spambots. | |
971 | local function encode_email_address(s) | |
972 | -- Use a deterministic encoding to make unit testing possible. | |
973 | -- Code 45% hex, 45% dec, 10% plain. | |
974 | local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45} | |
975 | local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45} | |
976 | local plain = {code = function(c) return c end, count = 0, rate = 0.1} | |
977 | local codes = {hex, dec, plain} | |
978 | local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end | |
979 | ||
980 | local out = "" | |
981 | for i = 1,s:len() do | |
982 | for _,code in ipairs(codes) do code.count = code.count + code.rate end | |
983 | if codes[1].count < codes[2].count then swap(codes,1,2) end | |
984 | if codes[2].count < codes[3].count then swap(codes,2,3) end | |
985 | if codes[1].count < codes[2].count then swap(codes,1,2) end | |
986 | ||
987 | local code = codes[1] | |
988 | local c = s:sub(i,i) | |
989 | -- Force encoding of "@" to make email address more invisible. | |
990 | if c == "@" and code == plain then code = codes[2] end | |
991 | out = out .. code.code(c) | |
992 | code.count = code.count - 1 | |
993 | end | |
994 | return out | |
995 | end | |
996 | local function mail(s) | |
997 | s = unescape_special_chars(s) | |
998 | local address = encode_email_address("mailto:" .. s) | |
999 | local text = encode_email_address(s) | |
1000 | return add_escape("<a href=\"" .. address .. "\">") .. text .. "</a>" | |
1001 | end | |
1002 | -- links | |
1003 | text = text:gsub("<(https?:[^'\">%s]+)>", link) | |
1004 | text = text:gsub("<(ftp:[^'\">%s]+)>", link) | |
1005 | ||
1006 | ||
1007 | text = text:gsub("<mailto:([^'\">%s]+)>", mail) | |
1008 | text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) | |
1009 | return text | |
1010 | end | |
1011 | ||
1012 | -- Encode free standing amps (&) and angles (<)... note that this does not | |
1013 | -- encode free >. | |
1014 | function amps_and_angles(s) | |
1015 | -- encode amps not part of &..; expression | |
1016 | local pos = 1 | |
1017 | while true do | |
1018 | local amp = s:find("&", pos) | |
1019 | if not amp then break end | |
1020 | local semi = s:find(";", amp+1) | |
1021 | local stop = s:find("[ \t\n&]", amp+1) | |
1022 | if not semi or (stop and stop < semi) or (semi - amp) > 15 then | |
1023 | s = s:sub(1,amp-1) .. "&" .. s:sub(amp+1) | |
1024 | pos = amp+1 | |
1025 | else | |
1026 | pos = amp+1 | |
1027 | end | |
1028 | end | |
1029 | ||
1030 | -- encode naked <'s | |
1031 | s = s:gsub("<([^a-zA-Z/?$!])", "<%1") | |
1032 | s = s:gsub("<$", "<") | |
1033 | ||
1034 | -- what about >, nothing done in the original markdown source to handle them | |
1035 | return s | |
1036 | end | |
1037 | ||
1038 | -- Handles emphasis markers (* and _) in the text. | |
1039 | function emphasis(text) | |
1040 | for _, s in ipairs {"%*%*", "%_%_"} do | |
1041 | text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>") | |
1042 | text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>") | |
1043 | end | |
1044 | for _, s in ipairs {"%*", "%_"} do | |
1045 | text = text:gsub(s .. "([^%s_])" .. s, "<em>%1</em>") | |
1046 | text = text:gsub(s .. "(<strong>[^%s_]</strong>)" .. s, "<em>%1</em>") | |
1047 | text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "<em>%1</em>") | |
1048 | text = text:gsub(s .. "([^<>_]-<strong>[^<>_]-</strong>[^<>_]-)" .. s, "<em>%1</em>") | |
1049 | end | |
1050 | return text | |
1051 | end | |
1052 | ||
1053 | -- Handles line break markers in the text. | |
1054 | function line_breaks(text) | |
1055 | return text:gsub(" +\n", " <br/>\n") | |
1056 | end | |
1057 | ||
1058 | -- Perform all span level transforms. | |
1059 | function span_transform(text) | |
1060 | text = code_spans(text) | |
1061 | text = escape_special_chars(text) | |
1062 | text = images(text) | |
1063 | text = anchors(text) | |
1064 | text = auto_links(text) | |
1065 | text = amps_and_angles(text) | |
1066 | text = emphasis(text) | |
1067 | text = line_breaks(text) | |
1068 | return text | |
1069 | end | |
1070 | ||
1071 | ---------------------------------------------------------------------- | |
1072 | -- Markdown | |
1073 | ---------------------------------------------------------------------- | |
1074 | ||
1075 | -- Cleanup the text by normalizing some possible variations to make further | |
1076 | -- processing easier. | |
1077 | function cleanup(text) | |
1078 | -- Standardize line endings | |
1079 | text = text:gsub("\r\n", "\n") -- DOS to UNIX | |
1080 | text = text:gsub("\r", "\n") -- Mac to UNIX | |
1081 | ||
1082 | -- Convert all tabs to spaces | |
1083 | text = detab(text) | |
1084 | ||
1085 | -- Strip lines with only spaces and tabs | |
1086 | while true do | |
1087 | local subs | |
1088 | text, subs = text:gsub("\n[ \t]+\n", "\n\n") | |
1089 | if subs == 0 then break end | |
1090 | end | |
1091 | ||
1092 | return "\n" .. text .. "\n" | |
1093 | end | |
1094 | ||
1095 | -- Strips link definitions from the text and stores the data in a lookup table. | |
1096 | function strip_link_definitions(text) | |
1097 | local linkdb = {} | |
1098 | ||
1099 | local function link_def(id, url, title) | |
1100 | id = id:match("%[(.+)%]"):lower() | |
1101 | linkdb[id] = linkdb[id] or {} | |
1102 | linkdb[id].url = url or linkdb[id].url | |
1103 | linkdb[id].title = title or linkdb[id].title | |
1104 | return "" | |
1105 | end | |
1106 | ||
1107 | local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*<?([^%s>]+)>?[ \t]*" | |
1108 | local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" | |
1109 | local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" | |
1110 | local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" | |
1111 | ||
1112 | text = text:gsub(def_title1, link_def) | |
1113 | text = text:gsub(def_title2, link_def) | |
1114 | text = text:gsub(def_title3, link_def) | |
1115 | text = text:gsub(def_no_title, link_def) | |
1116 | return text, linkdb | |
1117 | end | |
1118 | ||
1119 | link_database = {} | |
1120 | ||
1121 | -- Main markdown processing function | |
1122 | function markdown(text) | |
1123 | init_hash(text) | |
1124 | init_escape_table() | |
1125 | ||
1126 | text = cleanup(text) | |
1127 | text = protect(text) | |
1128 | text, link_database = strip_link_definitions(text) | |
1129 | text = block_transform(text) | |
1130 | text = unescape_special_chars(text) | |
1131 | return text | |
1132 | end | |
1133 | ||
1134 | ---------------------------------------------------------------------- | |
1135 | -- End of module | |
1136 | ---------------------------------------------------------------------- | |
1137 | ||
1138 | setfenv(1, _G) | |
1139 | M.lock(M) | |
1140 | ||
1141 | -- Expose markdown function to the world | |
1142 | markdown = M.markdown | |
1143 | ||
1144 | -- Class for parsing command-line options | |
1145 | local OptionParser = {} | |
1146 | OptionParser.__index = OptionParser | |
1147 | ||
1148 | -- Creates a new option parser | |
1149 | function OptionParser:new() | |
1150 | local o = {short = {}, long = {}} | |
1151 | setmetatable(o, self) | |
1152 | return o | |
1153 | end | |
1154 | ||
1155 | -- Calls f() whenever a flag with specified short and long name is encountered | |
1156 | function OptionParser:flag(short, long, f) | |
1157 | local info = {type = "flag", f = f} | |
1158 | if short then self.short[short] = info end | |
1159 | if long then self.long[long] = info end | |
1160 | end | |
1161 | ||
1162 | -- Calls f(param) whenever a parameter flag with specified short and long name is encountered | |
1163 | function OptionParser:param(short, long, f) | |
1164 | local info = {type = "param", f = f} | |
1165 | if short then self.short[short] = info end | |
1166 | if long then self.long[long] = info end | |
1167 | end | |
1168 | ||
1169 | -- Calls f(v) for each non-flag argument | |
1170 | function OptionParser:arg(f) | |
1171 | self.arg = f | |
1172 | end | |
1173 | ||
1174 | -- Runs the option parser for the specified set of arguments. Returns true if all arguments | |
1175 | -- where successfully parsed and false otherwise. | |
1176 | function OptionParser:run(args) | |
1177 | local pos = 1 | |
1178 | while pos <= #args do | |
1179 | local arg = args[pos] | |
1180 | if arg == "--" then | |
1181 | for i=pos+1,#args do | |
1182 | if self.arg then self.arg(args[i]) end | |
1183 | return true | |
1184 | end | |
1185 | end | |
1186 | if arg:match("^%-%-") then | |
1187 | local info = self.long[arg:sub(3)] | |
1188 | if not info then print("Unknown flag: " .. arg) return false end | |
1189 | if info.type == "flag" then | |
1190 | info.f() | |
1191 | pos = pos + 1 | |
1192 | else | |
1193 | param = args[pos+1] | |
1194 | if not param then print("No parameter for flag: " .. arg) return false end | |
1195 | info.f(param) | |
1196 | pos = pos+2 | |
1197 | end | |
1198 | elseif arg:match("^%-") then | |
1199 | for i=2,arg:len() do | |
1200 | local c = arg:sub(i,i) | |
1201 | local info = self.short[c] | |
1202 | if not info then print("Unknown flag: -" .. c) return false end | |
1203 | if info.type == "flag" then | |
1204 | info.f() | |
1205 | else | |
1206 | if i == arg:len() then | |
1207 | param = args[pos+1] | |
1208 | if not param then print("No parameter for flag: -" .. c) return false end | |
1209 | info.f(param) | |
1210 | pos = pos + 1 | |
1211 | else | |
1212 | param = arg:sub(i+1) | |
1213 | info.f(param) | |
1214 | end | |
1215 | break | |
1216 | end | |
1217 | end | |
1218 | pos = pos + 1 | |
1219 | else | |
1220 | if self.arg then self.arg(arg) end | |
1221 | pos = pos + 1 | |
1222 | end | |
1223 | end | |
1224 | return true | |
1225 | end | |
1226 | ||
1227 | -- Handles the case when markdown is run from the command line | |
1228 | local function run_command_line(arg) | |
1229 | -- Generate output for input s given options | |
1230 | local function run(s, options) | |
1231 | s = markdown(s) | |
1232 | if not options.wrap_header then return s end | |
1233 | local header = "" | |
1234 | if options.header then | |
1235 | local f = io.open(options.header) or error("Could not open file: " .. options.header) | |
1236 | header = f:read("*a") | |
1237 | f:close() | |
1238 | else | |
1239 | header = [[ | |
1240 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
1241 | <html> | |
1242 | <head> | |
1243 | <meta http-equiv="content-type" content="text/html; charset=CHARSET" /> | |
1244 | <title>TITLE</title> | |
1245 | <link rel="stylesheet" type="text/css" href="STYLESHEET" /> | |
1246 | </head> | |
1247 | <body> | |
1248 | ]] | |
1249 | local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or | |
1250 | s:match("<h3>(.-)</h3>") or "Untitled" | |
1251 | header = header:gsub("TITLE", title) | |
1252 | if options.inline_style then | |
1253 | local style = "" | |
1254 | local f = io.open(options.stylesheet) | |
1255 | if f then | |
1256 | style = f:read("*a") f:close() | |
1257 | else | |
1258 | error("Could not include style sheet " .. options.stylesheet .. ": File not found") | |
1259 | end | |
1260 | header = header:gsub('<link rel="stylesheet" type="text/css" href="STYLESHEET" />', | |
1261 | "<style type=\"text/css\"><!--\n" .. style .. "\n--></style>") | |
1262 | else | |
1263 | header = header:gsub("STYLESHEET", options.stylesheet) | |
1264 | end | |
1265 | header = header:gsub("CHARSET", options.charset) | |
1266 | end | |
1267 | local footer = "</body></html>" | |
1268 | if options.footer then | |
1269 | local f = io.open(options.footer) or error("Could not open file: " .. options.footer) | |
1270 | footer = f:read("*a") | |
1271 | f:close() | |
1272 | end | |
1273 | return header .. s .. footer | |
1274 | end | |
1275 | ||
1276 | -- Generate output path name from input path name given options. | |
1277 | local function outpath(path, options) | |
1278 | if options.append then return path .. ".html" end | |
1279 | local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end | |
1280 | m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end | |
1281 | return path .. ".html" | |
1282 | end | |
1283 | ||
1284 | -- Default commandline options | |
1285 | local options = { | |
1286 | wrap_header = true, | |
1287 | header = nil, | |
1288 | footer = nil, | |
1289 | charset = "utf-8", | |
1290 | title = nil, | |
1291 | stylesheet = "default.css", | |
1292 | inline_style = false | |
1293 | } | |
1294 | local help = [[ | |
1295 | Usage: markdown.lua [OPTION] [FILE] | |
1296 | Runs the markdown text markup to HTML converter on each file specified on the | |
1297 | command line. If no files are specified, runs on standard input. | |
1298 | ||
1299 | No header: | |
1300 | -n, --no-wrap Don't wrap the output in <html>... tags. | |
1301 | Custom header: | |
1302 | -e, --header FILE Use content of FILE for header. | |
1303 | -f, --footer FILE Use content of FILE for footer. | |
1304 | Generated header: | |
1305 | -c, --charset SET Specifies charset (default utf-8). | |
1306 | -i, --title TITLE Specifies title (default from first <h1> tag). | |
1307 | -s, --style STYLE Specifies style sheet file (default default.css). | |
1308 | -l, --inline-style Include the style sheet file inline in the header. | |
1309 | Generated files: | |
1310 | -a, --append Append .html extension (instead of replacing). | |
1311 | Other options: | |
1312 | -h, --help Print this help text. | |
1313 | -t, --test Run the unit tests. | |
1314 | ]] | |
1315 | ||
1316 | local run_stdin = true | |
1317 | local op = OptionParser:new() | |
1318 | op:flag("n", "no-wrap", function () options.wrap_header = false end) | |
1319 | op:param("e", "header", function (x) options.header = x end) | |
1320 | op:param("f", "footer", function (x) options.footer = x end) | |
1321 | op:param("c", "charset", function (x) options.charset = x end) | |
1322 | op:param("i", "title", function(x) options.title = x end) | |
1323 | op:param("s", "style", function(x) options.stylesheet = x end) | |
1324 | op:flag("l", "inline-style", function(x) options.inline_style = true end) | |
1325 | op:flag("a", "append", function() options.append = true end) | |
1326 | op:flag("t", "test", function() | |
1327 | local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") | |
1328 | local f = io.open(n) | |
1329 | if f then | |
1330 | f:close() dofile(n) | |
1331 | else | |
1332 | error("Cannot find markdown-tests.lua") | |
1333 | end | |
1334 | run_stdin = false | |
1335 | end) | |
1336 | op:flag("h", "help", function() print(help) run_stdin = false end) | |
1337 | op:arg(function(path) | |
1338 | local file = io.open(path) or error("Could not open file: " .. path) | |
1339 | local s = file:read("*a") | |
1340 | file:close() | |
1341 | s = run(s, options) | |
1342 | file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options)) | |
1343 | file:write(s) | |
1344 | file:close() | |
1345 | run_stdin = false | |
1346 | end | |
1347 | ) | |
1348 | ||
1349 | if not op:run(arg) then | |
1350 | print(help) | |
1351 | run_stdin = false | |
1352 | end | |
1353 | ||
1354 | if run_stdin then | |
1355 | local s = io.read("*a") | |
1356 | s = run(s, options) | |
1357 | io.write(s) | |
1358 | end | |
1359 | end | |
1360 | ||
1361 | -- If we are being run from the command-line, act accordingly | |
1362 | if arg and arg[0]:find("markdown%.lua$") then | |
1363 | run_command_line(arg) | |
1364 | else | |
18
a96836139ff9
parsers.markdown: Make module callable, to allow parsing text as a module
Matthew Wild <mwild1@gmail.com>
parents:
12
diff
changeset
|
1365 | return M |
0 | 1366 | end |