Skip to content

Instantly share code, notes, and snippets.

@chengyuhui
Created March 18, 2013 13:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chengyuhui/5186990 to your computer and use it in GitHub Desktop.
Save chengyuhui/5186990 to your computer and use it in GitHub Desktop.
fs = require 'fs'
jsdom = require 'jsdom'
flowless = require 'flowless'
mysql = require 'mysql'
jquery = fs.readFileSync('../jquery.js').toString()
_ = require 'underscore'
strip_html = require 'strip_html'
serialize = require 'serialize'
cur_page = 1
start = (callback)->
connection = mysql.createConnection
host:'127.0.0.1'
user:'root'
password:''
database:'moefou'
flowless.runSeq [
#stert fetch
(cb)->
jsdom.env
html:"http://share.dmhy.org/topics/list/page/#{cur_page}"
src:[jquery]
done:cb
,(window,cb)->
$ = window.$
rows = []
$('#topic_list td.title').each (i,v)->
v = $ v
title = v.children('a').text().replace(/\r\n\t\t\t\t/,'')
tags = _.compact get_tags title
url = 'http://share.dmhy.org' + v.children('a').attr('href')
hash = require('crypto').createHash('md5').update(url).digest("hex")
rows.push {
title
tags
url
hash
}
connection.connect((err)->
cb err,rows
)
,flowless.map (element,cb)->
flowless.runSeq [
#check if the resource is already exists in the db
(cb)->
query = "SELECT `rs_id` FROM `mp_resources` WHERE `rs_encode` = ?"
connection.query query,[element.hash],cb
,(results,q,cb)->
unless results.length is 0
cb null,false
return 0
jsdom.env
html:element.url
src:[jquery]
proxy:'http://192.168.11.2:8888'
done:cb
console.log 'Send.'
,(window,cb)->
unless window?
cb(null,false)
return 0
$ = window.$
magnet = $('#tabs-1').children('p').eq(1).children('a').attr('href')
torrent = 'http://share.dmhy.org' + $('#tabs-1').children('p').eq(0).children('a').attr('href')
rs_data = serialize {magnet,torrent}
rs_about = strip_html($('.topic-nfo').children('strong').remove().end().html()).trim()
rs_size = get_size $('.resource-info').children().children().eq(4).children('span').text()
res = {
rs_data
rs_about
rs_size
rs_encode:element.hash
rs_site:'dmhy'
rs_type:'bt'
rs_title:element.title
rs_source:element.url
rs_date:Math.round new Date().getTime()/1000
rs_img:''
}
console.log(JSON.stringify(res));
cb()
],cb
],(err)->
console.log err
exports.start = start
start()
get_tags = (raw)->
return '' if raw is ''
return '' if !raw?
raw = raw.replace /☪|★|☆/g,''
pattern = /\[(.+?)\]|【(.+?)\】|\((.+?)\)|『(.+?)\』|~(.+?)\~|◆(.+?)◇/g
tmp = raw.match(pattern)
tmp = _.compact tmp
if tmp? then tmp = for val in tmp
val[1...-1]
else tmp = []
_raw = raw.replace(pattern,'').split(/\s+/g)
if _raw? and _raw.length >0 then for val in _raw
if val.replace /\s/g,'' is not val then tmp.push val
tmp = for tag in tmp
tag = tag.replace(/\s|&|&|&|\/|/|_|\|/g,' ')
tag = tag.split(/\s/g)
tmp = _.flatten tmp
patterns = [
/(第(\d+)话)/
/(第(\d+)話)/
/(第(\d+)集)/
/(CH(\d+))/i
/(VOL\.?(\d+-?\d*))/i
]
tmp = for tag in tmp
for pattern in patterns
sp = tag.split pattern
continue if sp.length is 1
tag = [sp[0],sp[1],sp[3]]
break
tag
_.flatten tmp
get_size = (str,target = 'KB')->
str = str.toUpperCase()
[str,size,format] = str.match(/([0-9]+\.?[0-9]*)(TB|GB|MB|KB|B)/)
size = switch format
when 'TB' then size*1024*1024*1024*1024
when 'GB' then size*1024*1024*1024
when 'MB' then size*1024*1024
when 'KB' then size*1024
ret = switch target
when 'TB' then size/1024/1024/1024/1024
when 'GB' then size/1024/1024/1024
when 'MB' then size/1024/1024
when 'KB' then size/1024
Math.round ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment