feeds.lua

changeset 3
ab02540afcf3
parent 0
319733864f05
child 5
58d3cecc68b7
--- a/feeds.lua	Fri Sep 17 14:47:45 2010 +0100
+++ b/feeds.lua	Mon Dec 06 04:00:24 2010 +0000
@@ -1,4 +1,7 @@
-local io = io;
+local io, string = io, string;
+local error = error;
+local print = print;
+
 local http = require "socket.http";
 local st = require "stanza";
 local new_stream = require "xmppstream".new;
@@ -7,13 +10,111 @@
 
 module "feeds"
 
+local translate_entry = {};
+
+function translate_entry.atom(feed, stanza)
+	if stanza.name == "entry" then
+		feed[#feed+1] = stanza;
+	else
+		feed[stanza.name] = stanza:get_text();
+	end
+end
+
+-- RSS->Atom translator
+
+-- Helpers to translate item child elements
+local rss2atom = {};
+function rss2atom.title(atom_entry, tag)
+	atom_entry:tag("title"):text(tag:get_text()):up();
+end
+
+function rss2atom.link(atom_entry, tag)
+	atom_entry:tag("link", { href = tag:get_text() }):up();
+end
+
+function rss2atom.author(atom_entry, tag)
+	atom_entry:tag("author")
+		:tag("email"):text(tag:get_text()):up()
+	:up();
+end
+
+function rss2atom.guid(atom_entry, tag)
+	atom_entry:tag("id"):text(tag:get_text()):up();
+end
+
+function rss2atom.category(atom_entry, tag)
+	atom_entry:tag("category", { term = tag:get_text(), scheme = tag.attr.domain }):up();
+end
+
+function rss2atom.description(atom_entry, tag)
+	atom_entry:tag("summary"):text(tag:get_text()):up();
+end
+
+local months = {
+	jan = "01", feb = "02", mar = "03", apr = "04", may = "05", jun = "06";
+	jul = "07", aug = "08", sep = "09", oct = "10", nov = "11", dec = "12";
+};
+
+function rss2atom.pubDate(atom_entry, tag)
+	local pubdate = tag:get_text():gsub("^%a+,", ""):gsub("^%s*", "");
+	local date, month, year, hour, minute, second, zone =
+		pubdate:match("^(%d%d?) (%a+) (%d+) (%d+):(%d+):?(%d*) ?(.*)$");
+	if not date then return; end
+	if #date == 1 then
+		date = "0"..date;
+	end
+	month = months[month:sub(1,3):lower()];
+	if #year == 2 then -- GAH!
+		if tonumber(year) > 80 then
+			year = "19"..year;
+		else
+			year = "20"..year;
+		end
+	end
+	if zone == "UT" or zone == "GMT" then zone = "Z"; end
+	if #second == 0 then
+		second = "00";
+	end
+	local date_string = string.format("%s-%s-%sT%s:%s:%s%s", year, month, date, hour, minute, second, zone);
+	atom_entry:tag("published"):text(date_string):up();
+end
+
+-- Translate a single item to atom
+function translate_entry.rss(feed, stanza)
+	if stanza.name == "item" then
+		local atom_entry = st.stanza("entry", { xmlns = xmlns_atom });
+		for tag in stanza:childtags() do
+			local translator = rss2atom[tag.name];
+			if translator then
+				translator(atom_entry, tag);
+			end
+		end
+		translate_entry.atom(feed, atom_entry:reset());
+	else
+		translate_entry.atom(feed, stanza);
+	end
+end
+
 local function new_feed_stream(feed)
 	local callbacks = {
-		default_ns = xmlns_atom;
-		stream_ns = xmlns_atom; stream_tag = "feed";
-		
-		streamopened = function (feed, attr)
-			feed.notopen = nil;
+		streamopened = function (feed, attr, name)
+			if name == "feed" and attr.xmlns == xmlns_atom then
+				feed.type = "atom";
+				feed.xmlns = xmlns_atom;
+				feed.notopen = nil;
+			elseif name == "rss" and attr.xmlns == "" then
+				feed.type = "rss";
+				feed.xmlns = "";
+				-- Don't open until channel
+			elseif feed.type == "rss" and name == "channel" and attr.xmlns == feed.xmlns then
+				feed.notopen = nil;
+			else
+				error("Unsupported feed type: <"
+					..name
+					..(attr.xmlns and (" xmlns='"..attr.xmlns.."'") or "")
+					..">"
+				);
+			end
 		end;
 		
 		streamclosed = function (feed)
@@ -21,15 +122,15 @@
 		
 		handlestanza = function (feed, stanza)
 			-- Skip tags not in the feed's default namespace
-			if stanza.attr.xmlns ~= nil then
+			if stanza.attr.xmlns ~= feed.xmlns then
 				return;
 			end
 			
-			if stanza.name == "entry" then
-				feed[#feed+1] = stanza;
-			else
-				feed[stanza.name] = stanza:get_text();
-			end
+			translate_entry[feed.type](feed, stanza);
+		end;
+		
+		error = function (feed, err, d)
+			error(err..": "..d);
 		end;
 	};
 	

mercurial