feeds.lua

Mon, 31 Jan 2011 03:27:53 +0100

author
Kim Alvefur <zash@zash.se>
date
Mon, 31 Jan 2011 03:27:53 +0100
changeset 6
4afd0e6206b2
parent 5
58d3cecc68b7
permissions
-rw-r--r--

Add special handling of author and link elements, and a fluffy bunny

local io, string = io, string;
local error = error;
local print = print;
local t_insert = table.insert;
local ipairs = ipairs;

local http = require "socket.http";
local st = require "stanza";
local new_stream = require "xmlstream".new;

local xmlns_atom = "http://www.w3.org/2005/Atom";

--[[
By popular request; a fluffy bunny:

 (\ /)
 (. .)
c(")(")

--]]

module "feeds"

local translate_entry = {};

function translate_entry.atom(feed, stanza)
	if stanza.name == "entry" then
		feed[#feed+1] = stanza;
	elseif stanza.name == "author" then
		feed.author = {};
		for i,a in ipairs(stanza) do
			if a.name and a[1] then
				feed.author[a.name] = a[1];
			end
		end
	elseif stanza.name == "link" then
		if not feed.links then feed.links = {}; end
		if stanza.attr.href then
			if stanza.attr.rel then
				feed.links[stanza.attr.rel] = stanza.attr.href;
			else
				t_insert(feed.links, stanza.attr.href)
			end
		end
	else
		feed[stanza.name] = stanza:get_text();
	end
end

-- RSS->Atom translator

-- Helpers to translate item child elements
local rss2atom = {};
function rss2atom.title(atom_entry, tag)
	atom_entry:tag("title"):text(tag:get_text()):up();
end

function rss2atom.link(atom_entry, tag)
	atom_entry:tag("link", { href = tag:get_text() }):up();
end

function rss2atom.author(atom_entry, tag)
	atom_entry:tag("author")
		:tag("email"):text(tag:get_text()):up()
	:up();
end

function rss2atom.guid(atom_entry, tag)
	atom_entry:tag("id"):text(tag:get_text()):up();
end

function rss2atom.category(atom_entry, tag)
	atom_entry:tag("category", { term = tag:get_text(), scheme = tag.attr.domain }):up();
end

function rss2atom.description(atom_entry, tag)
	atom_entry:tag("summary"):text(tag:get_text()):up();
end

local months = {
	jan = "01", feb = "02", mar = "03", apr = "04", may = "05", jun = "06";
	jul = "07", aug = "08", sep = "09", oct = "10", nov = "11", dec = "12";
};

function rss2atom.pubDate(atom_entry, tag)
	local pubdate = tag:get_text():gsub("^%a+,", ""):gsub("^%s*", "");
	local date, month, year, hour, minute, second, zone =
		pubdate:match("^(%d%d?) (%a+) (%d+) (%d+):(%d+):?(%d*) ?(.*)$");
	if not date then return; end
	if #date == 1 then
		date = "0"..date;
	end
	month = months[month:sub(1,3):lower()];
	if #year == 2 then -- GAH!
		if tonumber(year) > 80 then
			year = "19"..year;
		else
			year = "20"..year;
		end
	end
	if zone == "UT" or zone == "GMT" then zone = "Z"; end
	if #second == 0 then
		second = "00";
	end
	local date_string = string.format("%s-%s-%sT%s:%s:%s%s", year, month, date, hour, minute, second, zone);
	atom_entry:tag("published"):text(date_string):up();
end

-- Translate a single item to atom
function translate_entry.rss(feed, stanza)
	if stanza.name == "item" then
		local atom_entry = st.stanza("entry", { xmlns = xmlns_atom });
		for tag in stanza:childtags() do
			local translator = rss2atom[tag.name];
			if translator then
				translator(atom_entry, tag);
			end
		end
		translate_entry.atom(feed, atom_entry:reset());
	else
		translate_entry.atom(feed, stanza);
	end
end

local function new_feed_stream(feed)
	local callbacks = {
		streamopened = function (feed, attr, name)
			if name == "feed" and attr.xmlns == xmlns_atom then
				feed.type = "atom";
				feed.xmlns = xmlns_atom;
				feed.notopen = nil;
			elseif name == "rss" and attr.xmlns == "" then
				feed.type = "rss";
				feed.xmlns = "";
				-- Don't open until channel
			elseif feed.type == "rss" and name == "channel" and attr.xmlns == feed.xmlns then
				feed.notopen = nil;
			else
				error("Unsupported feed type: <"
					..name
					..(attr.xmlns and (" xmlns='"..attr.xmlns.."'") or "")
					..">"
				);
			end
		end;
		
		streamclosed = function (feed)
		end;
		
		handlestanza = function (feed, stanza)
			-- Skip tags not in the feed's default namespace
			if stanza.attr.xmlns ~= feed.xmlns then
				return;
			end
			
			translate_entry[feed.type](feed, stanza);
		end;
		
		error = function (feed, err, d)
			error(err..": "..d);
		end;
	};
	
	return new_stream(feed, callbacks);
end

function feed_from_string(data)
	local feed = {notopen = true};
	
	local stream = new_feed_stream(feed);
	stream:feed(data);
	
	return feed;
end

function open(url)
	if url:match("^file://") then
		return open_file(url);
	elseif url:match("^https?://") then
		return open_http(url);
	else
		return false, "Could not understand URL: "..url;
	end
end

function open_file(filename)
	local file, err = io.open((filename:gsub("^file://", "")));
	if not file then
		return file, err;
	end
	
	local feed = feed_from_string(file:read("*a"));
	
	file:close();
	
	return feed;
end

function open_http(url)
	local data, err = http.request(url);
	if not data then
		return data, err;
	end
	
	local feed = feed_from_string(data);
	
	return feed;
end

mercurial