# HG changeset patch # User Matthew Wild # Date 1333241769 -3600 # Node ID 5b24d66365ece617011a1f334b9674cf8f9bb2dd # Parent 5c9d6eba65bcfe983a503dee5c6fd2e79d36123b Fix handling of quoted CSV fields, allowing this year's CSV to be properly parsed diff -r 5c9d6eba65bc -r 5b24d66365ec csv.lua --- a/csv.lua Sat Mar 31 22:13:32 2012 +0100 +++ b/csv.lua Sun Apr 01 01:56:09 2012 +0100 @@ -12,7 +12,8 @@ local numeric_escape = (lpeg.R"09"^1)^-3; local escape = (lpeg.P"\\" * (char_escape + numeric_escape)); -local value = (escape + (1-delim))^0; +local quoted_value = lpeg.P"\"" * ((1-lpeg.P"\"")^0) * lpeg.P"\""; +local value = quoted_value + (escape + (1-delim))^0; local escape_map = setmetatable({ t = "\t", b = "\b", f = "\f"; diff -r 5c9d6eba65bc -r 5b24d66365ec parse.lua --- a/parse.lua Sat Mar 31 22:13:32 2012 +0100 +++ b/parse.lua Sun Apr 01 01:56:09 2012 +0100 @@ -5,11 +5,7 @@ for line in io.lines(arg[1] or "gsoc12.csv") do local project = {}; csv.read_record(line, function (v) project[#project+1] = v; end); - -- FIXME: The original code doesn't work this year, - -- I think csv.lua doesn't handle quoted values - -- (ie. the 'Tags' column) properly. - --local url = project[5]:match("https?://[^\"]+"); - local url = line:match(",([^,]+)$"); + local url = project[5] and project[5]:match("https?://[^\"]+"); if url and url:match("^http"..(have_https and "s?" or "")..":") then local data, err = http.request(url); if not data then