Skip to content

Instantly share code, notes, and snippets.

@hpstuff
Last active December 20, 2015 00:18
Show Gist options
  • Save hpstuff/6040246 to your computer and use it in GitHub Desktop.
Save hpstuff/6040246 to your computer and use it in GitHub Desktop.
{
"name": "cinemacity.bg",
"preferGlobal": true,
"version": "0.1.0",
"author": "hpstuff <despario@gmail.com>",
"description": "cinemacity parcer",
"dependencies" : {
"request" : "",
"express" : "",
"iconv" : ""
},
"engines": {
"node": ">=0.6"
}
}
var request = require("request");
var express = require('express');
var Iconv = require('iconv').Iconv;
var app = express();
var urls = {
cinemas: "http://cinemacity.bg/program_ad_new.php"
};
var patterns = {
cinemas: /(http\:\/\/cinemacity\.bg\/program_ad_new\.php\?c_id=([0-9]?)*?)\"(?:\s+)?>(.*?)<\/a>/g,
programURL: /<iframe.+src=\'(.*?)\'/,
movies: /<tr[\s\S]*?<\/tr>/g,
movieName: /(?:.*?)>(.*)<\//,
time: /(?:.*)>(.*?)<\/a/
};
var cinemas = [];
var cinemasById = {};
app.all('*', function(req, res, next){
if (!req.get('Origin')) return next();
// use "*" here to accept any origin
res.set('Access-Control-Allow-Origin', '*');
res.set('Access-Control-Allow-Methods', 'GET, POST');
res.set('Access-Control-Allow-Headers', 'X-Requested-With, Content-Type');
// res.set('Access-Control-Allow-Max-Age', 3600);
if ('OPTIONS' == req.method) return res.send(200);
next();
});
app.get("/cinemas", function(req, res){
if(cinemas.length > 0){
res.charset = "utf-8";
res.set('Content-Type', 'application/json');
res.send(JSON.stringify(cinemas));
}else{
requestUrl(urls.cinemas, function(text){
getCinemas(text);
res.charset = "utf-8";
res.set('Content-Type', 'application/json');
res.send(JSON.stringify(cinemas));
});
}
});
app.get("/cinemas/:id/program/:date", function(req, res){
getCinemaProgram(cinemasById[req.params.id], req.params.date, function(data){
res.charset = "utf-8";
res.set('Content-Type', 'application/json');
res.send(JSON.stringify(data));
});
});
app.listen(process.env.VCAP_APP_PORT || 3000);
function getCinemas(text){
var match;
cinemas.length = 0;
//console.log(text);
while(match = patterns.cinemas.exec(text)){
cinemas.push(cinemasById[match[2]] = {
id: match[2],
url: match[1],
name: match[3]
});
}
}
//get cinemas info
//get program urls
function getProgramURLs(cinemas){
var i = 0;
for(i; i< cinemas.length; i++){
getCinemaProgramURL(cinemas[i], function(c){
console.log(c);
});
}
}
function getCinemaProgramURL(cinema, callback){
requestUrl(cinema.url, function(text){
cinema.programURL = getProgramURL(text);
callback(cinema);
});
}
function getMovies(text){
var i = 1, k, movies = [], time,
rows = text.match(patterns.movies), cols;
for(i; i < rows.length; i++){
var movie = {};
cols = rows[i].match(/<td[\s\S]*?<\/td>/g);
movie.name = cols[0].match(patterns.movieName);
if(movie.name) movie.name = movie.name[1];
movie.hours = [];
for(k = 1; k < cols.length; k++){
time = cols[k].match(patterns.time);
if(time) movie.hours.push(time[1]);
}
movies.push(movie);
}
return movies;
}
function getProgramURL(text){
var matches = text.match(patterns.programURL);
if(matches)
return matches[1];
}
function requestUrl(url, callback){
request({ url: url, encoding: 'binary' }, function(err, response, body) {
var iconv;
body = body.replace(/<!--[^>]*-->/g, "");
var charset = body.match(/(?:-->.*)?charset=(.*?)(?:'|")/)[1];
body = new Buffer(body, 'binary');
if(charset.toLowerCase() != "utf-8"){
iconv = new Iconv(charset, "utf8");
body = iconv.convert(body);
body = body.toString();
}else{
body = body.toString();
}
callback(body);
});
}
function getNow(){
var d = new Date();
return [d.getFullYear(), d.getMonth()+1, d.getDate()].join("-");
}
function getCinemaProgram(cinema, date, callback){
if(!cinema.programURL){
getCinemaProgramURL(cinema, function(cinema){
console.log(cinema);
getProgram(cinema, date, callback);
});
}else{
getProgram(cinema, date, callback);
}
}
function getProgram(cinema, date, callback){
var url;
url = cinema.programURL.split("&");
url[1] = "c_data="+(date || getNow());
url = url.join("&");
requestUrl(url, function(text){
callback(getMovies(text));
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment