Created
August 10, 2014 09:57
-
-
Save anonymous/378e9522ffd8edb2e0e2 to your computer and use it in GitHub Desktop.
rss_proxy.d
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module rss_proxy; | |
import tools.base, std.file, tools.downloader; | |
string download(string url) { | |
char* tmpfilp = tmpnam(null); | |
if (!tmpfilp) throw new Exception("could not get temp file name"); | |
auto tmpfil = toString(tmpfilp); | |
scope(exit) unlink(toStringz(tmpfil)); | |
if (system(toStringz("/usr/bin/wget --timeout=60 -q -O \""~tmpfil~"\" \""~url~"\"")) == -1) { | |
throw new Exception("wget failed"); | |
} | |
// fprintf(stderr, "test: %s\n", toStringz(tmpfil)); | |
return cast(string) read(tmpfil); | |
} | |
string withTag(string tag, lazy string dg) { | |
return "<"~tag~">"~dg()~"</"~tag~">\n"; | |
} | |
class RSSFile { | |
string title, link, description; | |
static class Entry { | |
string title, link, description; | |
} | |
Entry[] entries; | |
bool has(Entry entry) { | |
foreach (e2; entries) | |
if (e2.title == entry.title && | |
e2.link == entry.link && | |
e2.description == entry.description) | |
return true; | |
return false; | |
} | |
string build() { | |
return withTag("channel", { | |
return | |
withTag("title", title) ~ | |
withTag("link", link) ~ | |
withTag("description", description) ~ | |
{ | |
string res; | |
foreach (entry; entries) { | |
res ~= withTag("item", | |
withTag("title", entry.title) ~ | |
withTag("description", entry.description) ~ | |
withTag("link", entry.link) | |
); | |
} | |
return res; | |
}(); | |
}()); | |
} | |
} | |
string getLastURL(RSSFile rf) { | |
return rf.entries[0].link.replace("://m.", "://www.");; | |
} | |
string getNextLink(string data) { | |
// return data.between(" Next > ", "'\"").between("self.location='", ""); | |
return data.between("TYPE=BUTTON onClick=\"self.location='", "'\">Next >", true); | |
} | |
class AbortException : Exception { this() { super("AbortEx"); } } | |
// fanfiction.net | |
RSSFile parseStory(string id, RSSFile start = null) { | |
string cur; | |
RSSFile res; | |
string data; | |
if (start) { | |
res = start; | |
cur = getLastURL(start); | |
data = cur.download(); | |
} else { | |
res = new RSSFile; | |
cur = Format("http://www.fanfiction.net/s/", id, "/1/"); | |
data = cur.download(); | |
res.title = data.between("» <b>", "</b>"); | |
res.link = cur; | |
} | |
while (true) { | |
logln("Process ", cur); | |
auto entry = new RSSFile.Entry; | |
entry.title = data.between("\"Chapter Navigation\"", "</table") | |
.between("selected>", "<"); | |
entry.link = cur.replace("://www.", "://m."); | |
if (!res.has(entry)) | |
res.entries = entry ~ res.entries; | |
if (auto next = getNextLink(data)) cur = cur.followLink(next); | |
else break; | |
data = cur.download(); | |
} | |
return res; | |
} | |
string next_text(string s) { | |
if (s.length > 60) s = s[0 .. 60]; | |
return s.replace("\n", "\\"); | |
} | |
void expectTag(ref string data, string name, void delegate(string) dg) { | |
if (auto post = data.startsWith("<"~name~">")) { | |
if (auto mid = post.between("", "</"~name~">\n")) { | |
dg(mid); | |
data = post.between("</"~name~">\n", ""); | |
} else throw new Exception("Missing end tag: "~post.next_text()); | |
} else throw new Exception("Malformed start: "~data.next_text()~", expected "~name); | |
} | |
void expectTag(ref string data, string name, ref string s) { | |
return expectTag(data, name, (string t) { s = t; }); | |
} | |
RSSFile parseOutput(string fn) { | |
auto data = fn.read().castLike(""); | |
auto res = new RSSFile; | |
data.expectTag("channel", (string rest) { | |
rest.expectTag("title", res.title); | |
rest.expectTag("link", res.link); | |
rest.expectTag("description", res.description); | |
while (rest.startsWith("<item")) { | |
rest.expectTag("item", (string rest) { | |
auto entry = new RSSFile.Entry; | |
rest.expectTag("title", entry.title); | |
rest.expectTag("description", entry.description); | |
rest.expectTag("link", entry.link); | |
res.entries ~= entry; | |
}); | |
} | |
}); | |
return res; | |
} | |
import tools.log; | |
void main(string[] args) { | |
agent_override = "Mozilla/5.0 (D/tools/rss_proxy downloader)"; | |
log_threads = false; | |
auto exec = args.take(); | |
if (args.length != 1) { | |
throw new Exception("fanfiction.net id expected! "); | |
} | |
if (args[0].find("&") != -1) args[0].slice("&"); | |
string tempPath = "/mnt/data/www/temp_rss/"; | |
auto fn = tempPath ~ args[0]; | |
RSSFile res; | |
if (fn.exists()) { | |
res = parseOutput(fn); | |
if (getLastURL(res).download().getNextLink()) { | |
res = parseStory(args[0], res); | |
} | |
} else res = parseStory(args[0]); | |
fn.write(res.build()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment