Mon, 06 Dec 2010 04:38:47 +0000
Rename xmppstream to xmlstream, and make some changes to make it compatible with Prosody
3
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
1 | local io, string = io, string; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
2 | local error = error; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
3 | local print = print; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
4 | |
0 | 5 | local http = require "socket.http"; |
6 | local st = require "stanza"; | |
7 | local new_stream = require "xmppstream".new; | |
8 | ||
9 | local xmlns_atom = "http://www.w3.org/2005/Atom"; | |
10 | ||
11 | module "feeds" | |
12 | ||
3
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
13 | local translate_entry = {}; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
14 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
15 | function translate_entry.atom(feed, stanza) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
16 | if stanza.name == "entry" then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
17 | feed[#feed+1] = stanza; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
18 | else |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
19 | feed[stanza.name] = stanza:get_text(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
20 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
21 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
22 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
23 | -- RSS->Atom translator |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
24 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
25 | -- Helpers to translate item child elements |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
26 | local rss2atom = {}; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
27 | function rss2atom.title(atom_entry, tag) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
28 | atom_entry:tag("title"):text(tag:get_text()):up(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
29 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
30 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
31 | function rss2atom.link(atom_entry, tag) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
32 | atom_entry:tag("link", { href = tag:get_text() }):up(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
33 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
34 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
35 | function rss2atom.author(atom_entry, tag) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
36 | atom_entry:tag("author") |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
37 | :tag("email"):text(tag:get_text()):up() |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
38 | :up(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
39 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
40 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
41 | function rss2atom.guid(atom_entry, tag) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
42 | atom_entry:tag("id"):text(tag:get_text()):up(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
43 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
44 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
45 | function rss2atom.category(atom_entry, tag) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
46 | atom_entry:tag("category", { term = tag:get_text(), scheme = tag.attr.domain }):up(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
47 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
48 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
49 | function rss2atom.description(atom_entry, tag) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
50 | atom_entry:tag("summary"):text(tag:get_text()):up(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
51 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
52 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
53 | local months = { |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
54 | jan = "01", feb = "02", mar = "03", apr = "04", may = "05", jun = "06"; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
55 | jul = "07", aug = "08", sep = "09", oct = "10", nov = "11", dec = "12"; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
56 | }; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
57 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
58 | function rss2atom.pubDate(atom_entry, tag) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
59 | local pubdate = tag:get_text():gsub("^%a+,", ""):gsub("^%s*", ""); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
60 | local date, month, year, hour, minute, second, zone = |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
61 | pubdate:match("^(%d%d?) (%a+) (%d+) (%d+):(%d+):?(%d*) ?(.*)$"); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
62 | if not date then return; end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
63 | if #date == 1 then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
64 | date = "0"..date; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
65 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
66 | month = months[month:sub(1,3):lower()]; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
67 | if #year == 2 then -- GAH! |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
68 | if tonumber(year) > 80 then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
69 | year = "19"..year; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
70 | else |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
71 | year = "20"..year; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
72 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
73 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
74 | if zone == "UT" or zone == "GMT" then zone = "Z"; end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
75 | if #second == 0 then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
76 | second = "00"; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
77 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
78 | local date_string = string.format("%s-%s-%sT%s:%s:%s%s", year, month, date, hour, minute, second, zone); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
79 | atom_entry:tag("published"):text(date_string):up(); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
80 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
81 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
82 | -- Translate a single item to atom |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
83 | function translate_entry.rss(feed, stanza) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
84 | if stanza.name == "item" then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
85 | local atom_entry = st.stanza("entry", { xmlns = xmlns_atom }); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
86 | for tag in stanza:childtags() do |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
87 | local translator = rss2atom[tag.name]; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
88 | if translator then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
89 | translator(atom_entry, tag); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
90 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
91 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
92 | translate_entry.atom(feed, atom_entry:reset()); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
93 | else |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
94 | translate_entry.atom(feed, stanza); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
95 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
96 | end |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
97 | |
0 | 98 | local function new_feed_stream(feed) |
99 | local callbacks = { | |
3
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
100 | streamopened = function (feed, attr, name) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
101 | if name == "feed" and attr.xmlns == xmlns_atom then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
102 | feed.type = "atom"; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
103 | feed.xmlns = xmlns_atom; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
104 | feed.notopen = nil; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
105 | elseif name == "rss" and attr.xmlns == "" then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
106 | feed.type = "rss"; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
107 | feed.xmlns = ""; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
108 | -- Don't open until channel |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
109 | elseif feed.type == "rss" and name == "channel" and attr.xmlns == feed.xmlns then |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
110 | feed.notopen = nil; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
111 | else |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
112 | error("Unsupported feed type: <" |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
113 | ..name |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
114 | ..(attr.xmlns and (" xmlns='"..attr.xmlns.."'") or "") |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
115 | ..">" |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
116 | ); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
117 | end |
0 | 118 | end; |
119 | ||
120 | streamclosed = function (feed) | |
121 | end; | |
122 | ||
123 | handlestanza = function (feed, stanza) | |
124 | -- Skip tags not in the feed's default namespace | |
3
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
125 | if stanza.attr.xmlns ~= feed.xmlns then |
0 | 126 | return; |
127 | end | |
128 | ||
3
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
129 | translate_entry[feed.type](feed, stanza); |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
130 | end; |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
131 | |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
132 | error = function (feed, err, d) |
ab02540afcf3
A few changes to support RSS (converts to Atom \o/)
Matthew Wild <mwild1@gmail.com>
parents:
0
diff
changeset
|
133 | error(err..": "..d); |
0 | 134 | end; |
135 | }; | |
136 | ||
137 | return new_stream(feed, callbacks); | |
138 | end | |
139 | ||
140 | function feed_from_string(data) | |
141 | local feed = {notopen = true}; | |
142 | ||
143 | local stream = new_feed_stream(feed); | |
144 | stream:feed(data); | |
145 | ||
146 | return feed; | |
147 | end | |
148 | ||
149 | function open(url) | |
150 | if url:match("^file://") then | |
151 | return open_file(url); | |
152 | elseif url:match("^https?://") then | |
153 | return open_http(url); | |
154 | else | |
155 | return false, "Could not understand URL: "..url; | |
156 | end | |
157 | end | |
158 | ||
159 | function open_file(filename) | |
160 | local file, err = io.open((filename:gsub("^file://", ""))); | |
161 | if not file then | |
162 | return file, err; | |
163 | end | |
164 | ||
165 | local feed = feed_from_string(file:read("*a")); | |
166 | ||
167 | file:close(); | |
168 | ||
169 | return feed; | |
170 | end | |
171 | ||
172 | function open_http(url) | |
173 | local data, err = http.request(url); | |
174 | if not data then | |
175 | return data, err; | |
176 | end | |
177 | ||
178 | local feed = feed_from_string(data); | |
179 | ||
180 | return feed; | |
181 | end |