parse.lua

Sun, 01 Apr 2012 01:56:09 +0100

author
Matthew Wild <mwild1@gmail.com>
date
Sun, 01 Apr 2012 01:56:09 +0100
changeset 3
5b24d66365ec
parent 2
5c9d6eba65bc
child 4
0dc2ccd703e3
permissions
-rw-r--r--

Fix handling of quoted CSV fields, allowing this year's CSV to be properly parsed

local http = require "socket.http"
local have_https, https = pcall(require,"https");
local csv = require "csv"

for line in io.lines(arg[1] or "gsoc12.csv") do
	local project = {};
	csv.read_record(line, function (v) project[#project+1] = v; end);
	local url = project[5] and project[5]:match("https?://[^\"]+");
	if url and url:match("^http"..(have_https and "s?" or "")..":") then
		local data, err = http.request(url);
		if not data then
			print("[FAIL] "..project[2]..": "..tostring(err));
		elseif data:match("[%W%p][Ll][Uu][Aa][%W%p]") then
			print(project[2]..": "..url);
		end
	elseif project[2] ~= "Name" then
		print("[FAIL] "..project[2]..": Couldn't fetch URL ("..(url or "couldn't find one")..")");
	end
end

mercurial