Skip to content

Instantly share code, notes, and snippets.

@chengyuhui
Created March 17, 2013 05:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chengyuhui/5180224 to your computer and use it in GitHub Desktop.
Save chengyuhui/5180224 to your computer and use it in GitHub Desktop.
// Generated by CoffeeScript 1.6.1
(function() {
var cur_page, flowless, fs, get_tags, jquery, jsdom, keywords, start, _;
fs = require('fs');
jsdom = require('jsdom');
flowless = require('flowless');
jquery = fs.readFileSync('../jquery.js').toString();
_ = require('underscore');
cur_page = 1;
keywords = ["GB", "BIG5", "MP4", "PC", "PV", "PSP", "RMVB", "720P", "480P"];
start = function(callback) {
return flowless.runSeq([
function(cb) {
return jsdom.env({
html: "http://share.dmhy.org/topics/list/page/" + cur_page,
src: [jquery],
done: cb
});
}, function(window, cb) {
var $, rows;
$ = window.$;
rows = [];
$('#topic_list td.title').each(function(i, v) {
var tags, title;
v = $(v);
title = v.children('a').text();
console.log('====================================');
tags = get_tags(title);
return console.log(_.flatten(tags));
});
return cb();
}
], function(err) {
return console.log(err);
});
};
exports.start = start;
start();
get_tags = function(raw) {
var pattern, patterns, sp, tag, tmp, val, _i, _len, _raw;
if (raw === '') {
return '';
}
if (raw == null) {
return '';
}
raw = raw.replace(/☪|★|☆/g, '');
pattern = /\[(.+?)\]|【(.+?)\】|\((.+?)\)|『(.+?)\』|~(.+?)\~|◆(.+?)◇/g;
tmp = raw.match(pattern);
tmp = _.compact(tmp);
if (tmp != null) {
tmp = (function() {
var _i, _len, _results;
_results = [];
for (_i = 0, _len = tmp.length; _i < _len; _i++) {
val = tmp[_i];
_results.push(val.slice(1, -1));
}
return _results;
})();
} else {
tmp = [];
}
_raw = raw.replace(pattern, '').split(/\s+/g);
if ((_raw != null) && _raw.length > 0) {
for (_i = 0, _len = _raw.length; _i < _len; _i++) {
val = _raw[_i];
if (val.replace(/\s/g, '' === !val)) {
tmp.push(val);
}
}
}
tmp = (function() {
var _j, _len1, _results;
_results = [];
for (_j = 0, _len1 = tmp.length; _j < _len1; _j++) {
tag = tmp[_j];
tag = tag.replace(/\s|&amp;|&|&|\/|/|_|\|/g, ' ');
_results.push(tag = tag.split(/\s/g));
}
return _results;
})();
tmp = _.flatten(tmp);
patterns = [/(第(\d+)话)/, /(第(\d+)話)/, /(第(\d+)集)/, /(CH(\d+))/i, /(VOL\.?(\d+))/i];
return tmp = (function() {
var _j, _len1, _results;
_results = [];
for (_j = 0, _len1 = tmp.length; _j < _len1; _j++) {
tag = tmp[_j];
_results.push((function() {
var _k, _len2, _results1;
_results1 = [];
for (_k = 0, _len2 = patterns.length; _k < _len2; _k++) {
pattern = patterns[_k];
sp = tag.split(pattern);
if (sp.length === 1) {
break;
} else {
_results1.push(void 0);
}
}
return _results1;
})());
}
return _results;
})();
};
/*
restore_english = (arr)->
arr = _.compact arr
return arr[0] if arr.length is 1
console.log '+=+=+='
tmp = []
for val in arr
if /^[A-Za-z0-9!]+$/.test val then tmp.push val else tmp.push false
tmp = _.compact tmp
console.log tmp.join '|'
arr
*/
}).call(this);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment