Created
June 3, 2009 16:40
-
-
Save ku/123087 to your computer and use it in GitHub Desktop.
tombloo 2ch extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Tombloo.Service.extractors.register( [ | |
{ | |
name: 'Quote - 2ch', | |
RE : /^[-a-z0-9]+\.2ch\.net\//, | |
ICON : 'http://2ch.net/favicon.ico', | |
_contextNode: function (ctx) { | |
return $x('(ancestor-or-self::dd)[1] | (ancestor-or-self::dt)[1]/following-sibling::dd[1]', ctx.target); | |
}, | |
check : function (ctx) { | |
var context = this._contextNode(ctx); | |
if ( !context ) | |
return false; | |
return true; | |
}, | |
extract : function (ctx) { | |
var context = this._contextNode(ctx); | |
if ( !context ) | |
return false; | |
var doc = context.ownerDocument; | |
var location = doc.location; | |
var pathComponents = location.pathname.split(/\//); | |
var threadIdIndex; | |
var threadId = pathComponents.filter ( function (path, index) { | |
threadIdIndex = index; | |
return path.match(/^\d+$/) | |
} ).shift(); | |
var dt = $x('(./preceding-sibling::dt)[last()]', context).textContent; | |
var m = { | |
type: 'quote', | |
item : doc.title, | |
body: [dt , $x('.', context).innerHTML ].join('<br />'), | |
//tags: "2ch 2ch." + threadId | |
}; | |
var postId = dt.match(/^\s*(\d+)/)[1]; | |
var path = pathComponents.slice(0, threadIdIndex).concat(postId).join("/"); | |
m.pageUrl = m.itemUrl = resolveRelativePath(path, location); | |
return m; | |
} | |
} | |
] ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment