Skip to content

Instantly share code, notes, and snippets.

@karrikas
Created October 1, 2015 14:52
Show Gist options
  • Save karrikas/011a7a108d86dd026715 to your computer and use it in GitHub Desktop.
Save karrikas/011a7a108d86dd026715 to your computer and use it in GitHub Desktop.
Find url from entire website
<?php
class urlfinder {
var $urls = array();
var $visited = array();
var $domain;
public function get ($url, $domain = '') {
if (in_array($url, $this->visited)) {
return;
}
$this->visited[] = $url;
if (!empty($domain)) {
$this->domain = $domain;
}
$doc = file_get_contents($url);
if (!preg_match_all('/<a [^>]*href=["|\']+([^"]+)["|\']+/', $doc, $res)) {
return;
}
foreach ($res[1] as $url) {
if (!preg_match('/^http[s]*:\/\//', $url)) {
$url = $this->domain . $url;
}
$subdomain = str_replace(array('http://', 'https://'), '', $this->domain);
if (preg_match('/^http[s]*:\/\/' . $subdomain . '/', $url)) {
$this->addUrl($url);
$this->get($url);
}
}
return $this->urls;
}
private function addUrl($url) {
foreach ($this->urls as $id => $values) {
if ($values['url'] == $url) {
$this->urls[$id]['count'] = $values['count'] + 1;
return;
}
}
$this->urls[] = array(
'url' => $url,
'count' => 1,
);
return;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment