Last active
August 29, 2015 14:01
-
-
Save pedro-stanaka/9340b0be59514326046f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/usr/local/crawler-rio/logs/*.log { | |
rotate 7 | |
daily | |
compress | |
missingok | |
nocreate | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
###### | |
# Criar uma pasta /usr/local/crawler-rio/ e copiar este arquivo e o crawler.logrotate.conf | |
# Rodar esse script como cronjob todo dia para pegar os dados dos onibus | |
# Ex cronjob: 03 00 * * * * root /usr/local/crawler-rio/crawler_daily_rio.sh | |
###### | |
/usr/sbin/logrotate -f -s crawler.logrotate.state crawler.logrotate.conf | |
/usr/bin/php crawler_rio.php > logs/log_crawler-`date +%Y-%m-%d`.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php5 | |
<?php | |
function curlFile($url,$proxy_ip = NULL,$proxy_port = NULL,$loginpassw=NULL) { | |
//$loginpassw = 'username:password'; | |
//$proxy_ip = '192.168.1.1'; | |
//$proxy_port = '12345'; | |
//$url = 'http://www.domain.com'; | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_HEADER, 0); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
if(isset($proxy_ip) and !empty($proxy_ip)){ | |
curl_setopt($ch, CURLOPT_PROXYPORT, $proxy_port); | |
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP'); | |
curl_setopt($ch, CURLOPT_PROXY, $proxy_ip); | |
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $loginpassw); | |
} | |
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,360); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 900); //timeout in seconds | |
$data = curl_exec($ch); | |
if(!empty($data)){ | |
curl_close($ch); | |
return $data; | |
} | |
else{ | |
echo "CURL ERROR: ".curl_error($ch) . "\n"; | |
echo | |
curl_close($ch); | |
exit(-1); | |
} | |
} | |
// limite de tempo para a execução do script. | |
set_time_limit(0); | |
// URL do serviço REST com todas as posições do dia | |
$url = "http://dadosabertos.rio.rj.gov.br/apiTransporte/apresentacao/rest/index.cfm/obterTodasPosicoes"; | |
echo date('H:i:s') . " - " . "Início requisição cURL... <br>\n"; | |
// Requisição cURL | |
$conteudo = curlFile($url); | |
if(!empty($conteudo)){ | |
$json = json_decode($conteudo, false); | |
echo date('H:i:s') . " - " . "Final requisição cURL...<br>\n"; | |
// Conecta no banco | |
$host = "localhost"; | |
$port = "5432"; | |
$user = "postgres"; | |
$password = "secret"; | |
$dbname = "database"; | |
$string_dsn = "host=".$host | |
." port=".$port | |
." dbname=".$dbname | |
." user=".$user | |
." password=".$password; | |
try { | |
$conexao = new PDO("pgsql:".$string_dsn, null, null, [ PDO::ATTR_PERSISTENT => true ]); | |
} catch (PDOExeption $e) { | |
print $e->getMessage(); | |
} | |
echo date('H:i:s') . " - " ."Set DATESTYLE<br>\n"; | |
$conexao->exec("SET DATESTYLE TO postgres, mdy;"); | |
echo "Preparing inserts<br>\n"; | |
$stmt = $conexao->prepare("INSERT INTO posicoes_onibus (onibus_ordem, linha, lat, long, velocidade, datahora) ". | |
"VALUES (:ordem, :linha, :lat, :long, :velocidade, :data);"); | |
$checkSelect = $conexao->prepare("SELECT max(datahora) FROM posicoes_onibus WHERE onibus_ordem = :onibus_ordem;"); | |
echo date('H:i:s') . " - " ."Total de dados a ser processado: ". count($json->DATA)."<br>\n"; | |
if ($conexao) | |
{ | |
$it = 0; | |
foreach ($json->DATA as $entrada) { | |
$checkSelect->bindValue(':onibus_ordem', $entrada[1]); | |
if($checkSelect->execute()) | |
{ | |
$data = strtotime($checkSelect->fetch(PDO::FETCH_ASSOC)['max']); | |
$dataBanco = DateTime::createFromFormat('m-d-Y H:i:s', $entrada[0])->getTimeStamp(); | |
if($data === FALSE or strtotime($entrada[0]) > strtotime($data)) | |
{ // Se é mais novo | |
$stmt->bindValue(':data', $entrada[0]); | |
$stmt->bindValue(':ordem', $entrada[1]); | |
$stmt->bindValue(':linha', $entrada[2]); | |
$stmt->bindValue(':lat', $entrada[3]); | |
$stmt->bindValue(':long', $entrada[4]); | |
$stmt->bindValue(':velocidade', $entrada[5]); | |
if(!$stmt->execute()){ | |
print_r($stmt->errorInfo()); | |
} | |
}// END if se é mais novo | |
if($it++ % 100==0){ | |
echo date('H:i:s') . " - " ."Processados " . ($it-1) . "/".count($json->DATA)."<br>\n"; | |
} | |
} // CheckSelect | |
else { | |
print_r($checkSelect->errorInfo()); | |
} | |
} // foreach DATA | |
// Liberando recursos | |
$stmt->closeCursor(); | |
$checkSelect->closeCursor(); | |
$stmt = null; | |
$checkSelect = null; | |
$conexao = null; | |
} // if conexao | |
}// if conteudo - json | |
echo date('H:i:s') . " - " ."FIM DO SCRIPT\n###\n" | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE posicoes_onibus | |
( | |
id serial NOT NULL, | |
onibus_ordem character varying(7), | |
linha character varying(7), | |
lat numeric, | |
"long" numeric, | |
velocidade numeric, | |
datahora timestamp without time zone, | |
CONSTRAINT posicoes_onibus_pkey PRIMARY KEY (id ) | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment