Skip to content

Instantly share code, notes, and snippets.

Created August 10, 2014 09:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/378e9522ffd8edb2e0e2 to your computer and use it in GitHub Desktop.
Save anonymous/378e9522ffd8edb2e0e2 to your computer and use it in GitHub Desktop.
rss_proxy.d
module rss_proxy;
import tools.base, std.file, tools.downloader;
string download(string url) {
char* tmpfilp = tmpnam(null);
if (!tmpfilp) throw new Exception("could not get temp file name");
auto tmpfil = toString(tmpfilp);
scope(exit) unlink(toStringz(tmpfil));
if (system(toStringz("/usr/bin/wget --timeout=60 -q -O \""~tmpfil~"\" \""~url~"\"")) == -1) {
throw new Exception("wget failed");
}
// fprintf(stderr, "test: %s\n", toStringz(tmpfil));
return cast(string) read(tmpfil);
}
string withTag(string tag, lazy string dg) {
return "<"~tag~">"~dg()~"</"~tag~">\n";
}
class RSSFile {
string title, link, description;
static class Entry {
string title, link, description;
}
Entry[] entries;
bool has(Entry entry) {
foreach (e2; entries)
if (e2.title == entry.title &&
e2.link == entry.link &&
e2.description == entry.description)
return true;
return false;
}
string build() {
return withTag("channel", {
return
withTag("title", title) ~
withTag("link", link) ~
withTag("description", description) ~
{
string res;
foreach (entry; entries) {
res ~= withTag("item",
withTag("title", entry.title) ~
withTag("description", entry.description) ~
withTag("link", entry.link)
);
}
return res;
}();
}());
}
}
string getLastURL(RSSFile rf) {
return rf.entries[0].link.replace("://m.", "://www.");;
}
string getNextLink(string data) {
// return data.between("&nbsp;Next &gt;&nbsp;", "'\"").between("self.location='", "");
return data.between("TYPE=BUTTON onClick=\"self.location='", "'\">Next &gt;", true);
}
class AbortException : Exception { this() { super("AbortEx"); } }
// fanfiction.net
RSSFile parseStory(string id, RSSFile start = null) {
string cur;
RSSFile res;
string data;
if (start) {
res = start;
cur = getLastURL(start);
data = cur.download();
} else {
res = new RSSFile;
cur = Format("http://www.fanfiction.net/s/", id, "/1/");
data = cur.download();
res.title = data.between("&#187; <b>", "</b>");
res.link = cur;
}
while (true) {
logln("Process ", cur);
auto entry = new RSSFile.Entry;
entry.title = data.between("\"Chapter Navigation\"", "</table")
.between("selected>", "<");
entry.link = cur.replace("://www.", "://m.");
if (!res.has(entry))
res.entries = entry ~ res.entries;
if (auto next = getNextLink(data)) cur = cur.followLink(next);
else break;
data = cur.download();
}
return res;
}
string next_text(string s) {
if (s.length > 60) s = s[0 .. 60];
return s.replace("\n", "\\");
}
void expectTag(ref string data, string name, void delegate(string) dg) {
if (auto post = data.startsWith("<"~name~">")) {
if (auto mid = post.between("", "</"~name~">\n")) {
dg(mid);
data = post.between("</"~name~">\n", "");
} else throw new Exception("Missing end tag: "~post.next_text());
} else throw new Exception("Malformed start: "~data.next_text()~", expected "~name);
}
void expectTag(ref string data, string name, ref string s) {
return expectTag(data, name, (string t) { s = t; });
}
RSSFile parseOutput(string fn) {
auto data = fn.read().castLike("");
auto res = new RSSFile;
data.expectTag("channel", (string rest) {
rest.expectTag("title", res.title);
rest.expectTag("link", res.link);
rest.expectTag("description", res.description);
while (rest.startsWith("<item")) {
rest.expectTag("item", (string rest) {
auto entry = new RSSFile.Entry;
rest.expectTag("title", entry.title);
rest.expectTag("description", entry.description);
rest.expectTag("link", entry.link);
res.entries ~= entry;
});
}
});
return res;
}
import tools.log;
void main(string[] args) {
agent_override = "Mozilla/5.0 (D/tools/rss_proxy downloader)";
log_threads = false;
auto exec = args.take();
if (args.length != 1) {
throw new Exception("fanfiction.net id expected! ");
}
if (args[0].find("&") != -1) args[0].slice("&");
string tempPath = "/mnt/data/www/temp_rss/";
auto fn = tempPath ~ args[0];
RSSFile res;
if (fn.exists()) {
res = parseOutput(fn);
if (getLastURL(res).download().getNextLink()) {
res = parseStory(args[0], res);
}
} else res = parseStory(args[0]);
fn.write(res.build());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment