Skip to content

Instantly share code, notes, and snippets.

@sillykelvin
Last active August 9, 2016 08:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sillykelvin/8630120 to your computer and use it in GitHub Desktop.
Save sillykelvin/8630120 to your computer and use it in GitHub Desktop.
A node.js script for sohu video downloading
#!/usr/bin/env node
var fs = require('fs');
var http = require('http');
var request = require('request');
var urlListFile = 'url.list';
if (!fs.existsSync(urlListFile)) {
console.error('[ERROR] the url list file does not exist.');
return;
}
var videoUrls = fs.readFileSync(urlListFile).toString().split('\n');
videoUrls = videoUrls.filter(function (url) { return url; }); // remove empty lines
for (var v = 0; v < videoUrls.length; v++) {
console.log('[ INFO] Start to analyse the video url: ' + videoUrls[v] + ' ...');
analyzeVideoUrl(videoUrls[v]);
}
function analyzeVideoUrl(url) {
request(url, function (err, res, body) {
if (err) {
console.log("[ERROR] Error opening video url: " + err);
return;
}
// console.log(body);
var result = /var vid="(.*)"/gim.exec(body);
if (!result) {
console.error('[ERROR] Invalid html response.');
return;
}
var vid = result[1];
/***
* The video quality is relevant to vid, the rule is as below:
* 1. 高清: vid (default)
* 2. 标清: vid + 1
* 3. 超清: vid + 2
* 4. 原画: vid + 3
***/
var highestQualityVid = parseInt(vid) + 3;
// var jsonUrl = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + vid;
var jsonUrl = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + highestQualityVid;
analyzeJsonUrl(jsonUrl);
});
}
function analyzeJsonUrl(url) {
request(url, function (err, res, body) {
if (err) {
console.error("[ERROR] Error fetching json data: " + err);
return;
}
// console.log(body);
var json = JSON.parse(body);
var host = json['allot'];
var prot = json['prot'];
var data = json['data'];
var title = data['tvName'];
console.log('[ INFO] Url analysis finished, video: ' + title + ', start to analyze the downloading url...');
var sizes = data['clipsBytes'];
var clipUrls = data['clipsURL'];
var sus = data['su'];
if (clipUrls.length != sus.length || clipUrls.length != sizes.length) {
console.error('[ERROR] Parameter length mismatch.');
return;
}
var videoProperties = {};
videoProperties.name = title;
videoProperties.finishedCount = 0;
videoProperties.totalCount = clipUrls.length;
videoProperties.dir = title + '/';
for (var i = 0; i < clipUrls.length; ++i) {
var newUrl = 'http://' + host + '/?prot=' + prot + '&file=' + clipUrls[i] + '&new=' + sus[i];
// console.log('url: ' + newUrl);
analyzeNewUrl(newUrl, sus[i], i, videoProperties);
}
});
}
function analyzeNewUrl(url, su, episodeSequence, videoProperties) {
request(url, function (err, res, body) {
if (err) {
console.error('[ERROR] Error fetching downloading url: ' + err.message);
return;
}
var items = body.split('|');
var downloadUrl = items[0].substr(0, items[0].length - 1) + su + '?key=' + items[3];
console.log('[ INFO] The downloading url is retrieved: ' + downloadUrl + ', start to download...');
var pad = function (num, size) {
var s = num + '';
while(s.length < size) s = '0' + s;
return s;
};
var seq = pad(episodeSequence, videoProperties.totalCount.toString().length);
var filename = 'Episode' + seq + '.mp4';
downloadVideo(downloadUrl, filename, videoProperties);
});
}
function downloadVideo(url, filename, videoProperties) {
var fullpath = videoProperties.dir + filename;
var reqOptions = {
method: 'GET',
url: url,
encoding: null
};
request(reqOptions, function (err, res, body) {
if (err) {
console.error('[ERROR] Error fetching video: ' + err);
console.log('[ INFO] Failed to download ' + url + ', retrying...');
downloadVideo(url, filename, videoProperties);
return;
}
if(res.statusCode != 200) {
console.error('[ERROR] Status code error: ' + res.statusCode + ', body: ' + body);
console.log('[ INFO] Failed to download ' + url + ', retrying...');
downloadVideo(url, filename, videoProperties);
return;
}
if(!fs.existsSync(videoProperties.dir)) {
fs.mkdirSync(videoProperties.dir);
}
++videoProperties.finishedCount;
console.log('[ INFO] One episode [' + filename + '] of ' + videoProperties.name + ' is finished, writing to hard drive...');
fs.writeFile(fullpath, body, function(e) {
if (e) {
console.error('[ERROR] Error writing video to hard drive: ' + e);
return;
}
console.log('[ INFO] Episode ' + fullpath + ' has been written to hard drive.');
if(videoProperties.finishedCount >= videoProperties.totalCount) {
console.log('[ INFO] All episodes of ' + videoProperties.name + ' are downloaded.');
}
});
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment