Last active
December 20, 2015 00:18
-
-
Save hpstuff/6040246 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "cinemacity.bg", | |
"preferGlobal": true, | |
"version": "0.1.0", | |
"author": "hpstuff <despario@gmail.com>", | |
"description": "cinemacity parcer", | |
"dependencies" : { | |
"request" : "", | |
"express" : "", | |
"iconv" : "" | |
}, | |
"engines": { | |
"node": ">=0.6" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var request = require("request"); | |
var express = require('express'); | |
var Iconv = require('iconv').Iconv; | |
var app = express(); | |
var urls = { | |
cinemas: "http://cinemacity.bg/program_ad_new.php" | |
}; | |
var patterns = { | |
cinemas: /(http\:\/\/cinemacity\.bg\/program_ad_new\.php\?c_id=([0-9]?)*?)\"(?:\s+)?>(.*?)<\/a>/g, | |
programURL: /<iframe.+src=\'(.*?)\'/, | |
movies: /<tr[\s\S]*?<\/tr>/g, | |
movieName: /(?:.*?)>(.*)<\//, | |
time: /(?:.*)>(.*?)<\/a/ | |
}; | |
var cinemas = []; | |
var cinemasById = {}; | |
app.all('*', function(req, res, next){ | |
if (!req.get('Origin')) return next(); | |
// use "*" here to accept any origin | |
res.set('Access-Control-Allow-Origin', '*'); | |
res.set('Access-Control-Allow-Methods', 'GET, POST'); | |
res.set('Access-Control-Allow-Headers', 'X-Requested-With, Content-Type'); | |
// res.set('Access-Control-Allow-Max-Age', 3600); | |
if ('OPTIONS' == req.method) return res.send(200); | |
next(); | |
}); | |
app.get("/cinemas", function(req, res){ | |
if(cinemas.length > 0){ | |
res.charset = "utf-8"; | |
res.set('Content-Type', 'application/json'); | |
res.send(JSON.stringify(cinemas)); | |
}else{ | |
requestUrl(urls.cinemas, function(text){ | |
getCinemas(text); | |
res.charset = "utf-8"; | |
res.set('Content-Type', 'application/json'); | |
res.send(JSON.stringify(cinemas)); | |
}); | |
} | |
}); | |
app.get("/cinemas/:id/program/:date", function(req, res){ | |
getCinemaProgram(cinemasById[req.params.id], req.params.date, function(data){ | |
res.charset = "utf-8"; | |
res.set('Content-Type', 'application/json'); | |
res.send(JSON.stringify(data)); | |
}); | |
}); | |
app.listen(process.env.VCAP_APP_PORT || 3000); | |
function getCinemas(text){ | |
var match; | |
cinemas.length = 0; | |
//console.log(text); | |
while(match = patterns.cinemas.exec(text)){ | |
cinemas.push(cinemasById[match[2]] = { | |
id: match[2], | |
url: match[1], | |
name: match[3] | |
}); | |
} | |
} | |
//get cinemas info | |
//get program urls | |
function getProgramURLs(cinemas){ | |
var i = 0; | |
for(i; i< cinemas.length; i++){ | |
getCinemaProgramURL(cinemas[i], function(c){ | |
console.log(c); | |
}); | |
} | |
} | |
function getCinemaProgramURL(cinema, callback){ | |
requestUrl(cinema.url, function(text){ | |
cinema.programURL = getProgramURL(text); | |
callback(cinema); | |
}); | |
} | |
function getMovies(text){ | |
var i = 1, k, movies = [], time, | |
rows = text.match(patterns.movies), cols; | |
for(i; i < rows.length; i++){ | |
var movie = {}; | |
cols = rows[i].match(/<td[\s\S]*?<\/td>/g); | |
movie.name = cols[0].match(patterns.movieName); | |
if(movie.name) movie.name = movie.name[1]; | |
movie.hours = []; | |
for(k = 1; k < cols.length; k++){ | |
time = cols[k].match(patterns.time); | |
if(time) movie.hours.push(time[1]); | |
} | |
movies.push(movie); | |
} | |
return movies; | |
} | |
function getProgramURL(text){ | |
var matches = text.match(patterns.programURL); | |
if(matches) | |
return matches[1]; | |
} | |
function requestUrl(url, callback){ | |
request({ url: url, encoding: 'binary' }, function(err, response, body) { | |
var iconv; | |
body = body.replace(/<!--[^>]*-->/g, ""); | |
var charset = body.match(/(?:-->.*)?charset=(.*?)(?:'|")/)[1]; | |
body = new Buffer(body, 'binary'); | |
if(charset.toLowerCase() != "utf-8"){ | |
iconv = new Iconv(charset, "utf8"); | |
body = iconv.convert(body); | |
body = body.toString(); | |
}else{ | |
body = body.toString(); | |
} | |
callback(body); | |
}); | |
} | |
function getNow(){ | |
var d = new Date(); | |
return [d.getFullYear(), d.getMonth()+1, d.getDate()].join("-"); | |
} | |
function getCinemaProgram(cinema, date, callback){ | |
if(!cinema.programURL){ | |
getCinemaProgramURL(cinema, function(cinema){ | |
console.log(cinema); | |
getProgram(cinema, date, callback); | |
}); | |
}else{ | |
getProgram(cinema, date, callback); | |
} | |
} | |
function getProgram(cinema, date, callback){ | |
var url; | |
url = cinema.programURL.split("&"); | |
url[1] = "c_data="+(date || getNow()); | |
url = url.join("&"); | |
requestUrl(url, function(text){ | |
callback(getMovies(text)); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment