Skip to content

Instantly share code, notes, and snippets.

@abrahaj
Last active August 29, 2015 14:04
Show Gist options
  • Save abrahaj/981f2f8793c0c4027a6c to your computer and use it in GitHub Desktop.
Save abrahaj/981f2f8793c0c4027a6c to your computer and use it in GitHub Desktop.
phpDom
<?php
//Libraria eshte https://code.google.com/p/phpquery/
ini_set ( "display_errors", 1 );
require ('phpQuery.php');
//Per ndonje lidhje databaze
require_once '../v1/sysconfig.php';
//URL mund ta kalosh dhe si parameter
$url = "http://www.ata.gov.al/ekon-leku-i-qendrueshem-kundrejt-valutave-kryesore-59843.html";
$rssUrl = "http://www.ata.gov.al/rss";
// print_r(retrieveArticleLinks ( $rssUrl ));
print_r ( fetchData ( $url ) );
/**
* Fetch Data and put them in an array
*/
function fetchData($url) {
$dom = phpQuery::newDocumentHTML ( connect ( $url ) );
$articleDom = $dom->find ( "article" );
$article ["title"] = $dom->find ( "article" )->find ( "h1" )->html ();
$article ["author"] = $dom->find ( "article" )->find ( "span" )->find ( "a" )->html ();
$article ["date"] = $dom->find ( "article" )->find ( "time" )->html ();
$article ["body"] = $dom->find ( "article" )->find ( "div.entry-content" )->html ();
$article ["image"] = $dom->find ( "article" )->find ( "img" )->attr ( "src" );
$article ["category"] = $dom->find ( "article:tag" )->attr("content");
$article ["url"] = $url;
return $article;
}
/**
* Connect to URL and retrieve all content
*/
function connect($url) {
$curl = curl_init ();
// Optional - Setup headers - The same headers from Firefox version 2.0.0.6
// below was split up because php.net said the line was too long. :/
$header [0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header [0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header [] = "Cache-Control: max-age=0";
$header [] = "Connection: keep-alive";
$header [] = "Keep-Alive: 900";
$header [] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header [] = "Accept-Language: en-us,en;q=0.5";
$header [] = "Pragma: "; // browsers keep this blank.
curl_setopt ( $curl, CURLOPT_URL, $url );
curl_setopt ( $curl, CURLOPT_USERAGENT, 'Quareos/2.1 (+http://agent.quareos.com/)' );
curl_setopt ( $curl, CURLOPT_COOKIEFILE, '/var/www/vhosts/infoarkiva.com/httpdocs/v1/config/korriericookie.txt' );
curl_setopt ( $curl, CURLOPT_HTTPHEADER, $header );
curl_setopt ( $curl, CURLOPT_REFERER, 'http://www.quareos.com' );
curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip,deflate' );
curl_setopt ( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt ( $curl, CURLOPT_FOLLOWLOCATION, TRUE );
curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $curl, CURLOPT_TIMEOUT, 290 );
$html = curl_exec ( $curl ); // execute the curl command
// //some debuging can be done here
/*
* $info = curl_getinfo($curl); if (!$info['http_code']==200) echo "Error not 200";//die(mail|log); print_r (curl_getinfo($curl)); //- Kthen te gjithe headerat nga response curl_close($curl); // close the connection
*/
// echo $html;
return $html; // and finally, return $html
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment