Last active
August 29, 2015 14:12
-
-
Save fivefilters/0a758b6d64ce4fb5728c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
libraries/humble-http-agent/CookieJar.php | 2 +- | |
libraries/humble-http-agent/HumbleHttpAgent.php | 47 +++++++++++++++++-------- | |
2 files changed, 33 insertions(+), 16 deletions(-) | |
diff --git a/libraries/humble-http-agent/CookieJar.php b/libraries/humble-http-agent/CookieJar.php | |
index ac346b5..350b706 100644 | |
--- a/libraries/humble-http-agent/CookieJar.php | |
+++ b/libraries/humble-http-agent/CookieJar.php | |
@@ -229,7 +229,7 @@ class CookieJar | |
} | |
// return array of set-cookie values extracted from HTTP response headers (string $h) | |
- public function extractCookies($h) { | |
+ public static function extractCookies($h) { | |
$x = 0; | |
$lines = 0; | |
$headers = array(); | |
diff --git a/libraries/humble-http-agent/HumbleHttpAgent.php b/libraries/humble-http-agent/HumbleHttpAgent.php | |
index 11b30b5..f972891 100644 | |
--- a/libraries/humble-http-agent/HumbleHttpAgent.php | |
+++ b/libraries/humble-http-agent/HumbleHttpAgent.php | |
@@ -34,7 +34,7 @@ class HumbleHttpAgent | |
protected $curlOptions; | |
protected $minimiseMemoryUse = false; //TODO | |
protected $method; | |
- protected $cookieJar; | |
+ protected $cookieJar = array(); | |
public $debug = false; | |
public $debugVerbose = false; | |
public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html | |
@@ -79,7 +79,6 @@ class HumbleHttpAgent | |
require_once(dirname(__FILE__).'/RollingCurl.php'); | |
} | |
// create cookie jar | |
- $this->cookieJar = new CookieJar(); | |
// set request options (redirect must be 0) | |
// HTTP PECL (http://php.net/manual/en/http.request.options.php) | |
$this->requestOptions = array( | |
@@ -284,6 +283,7 @@ class HumbleHttpAgent | |
$this->debug("Following redirects #$redirects..."); | |
$this->fetchAllOnce($this->redirectQueue, $isRedirect=true); | |
} | |
+ $this->deleteCookies(); | |
} | |
// fetch all URLs without following redirects | |
@@ -326,7 +326,7 @@ class HumbleHttpAgent | |
} | |
$httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions); | |
// send cookies, if we have any | |
- if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { | |
+ if ($cookies = $this->getCookies($orig, $req_url)) { | |
$this->debug("......sending cookies: $cookies"); | |
$httpRequest->addHeaders(array('Cookie' => $cookies)); | |
} | |
@@ -374,10 +374,7 @@ class HumbleHttpAgent | |
} | |
if ($this->validateURL($redirectURL)) { | |
$this->debug('Redirect detected. Valid URL: '.$redirectURL); | |
- // store any cookies | |
- $cookies = $request->getResponseHeader('set-cookie'); | |
- if ($cookies && !is_array($cookies)) $cookies = array($cookies); | |
- if ($cookies) $this->cookieJar->storeCookies($url, $cookies); | |
+ $this->storeCookies($orig, $url); | |
$this->redirectQueue[$orig] = $redirectURL; | |
} else { | |
$this->debug('Redirect detected. Invalid URL: '.$redirectURL); | |
@@ -459,7 +456,7 @@ class HumbleHttpAgent | |
// add referer for picky sites | |
$headers[] = 'Referer: '.$this->referer; | |
// send cookies, if we have any | |
- if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { | |
+ if ($cookies = $this->getCookies($orig, $req_url)) { | |
$this->debug("......sending cookies: $cookies"); | |
$headers[] = 'Cookie: '.$cookies; | |
} | |
@@ -496,9 +493,7 @@ class HumbleHttpAgent | |
} | |
if ($this->validateURL($redirectURL)) { | |
$this->debug('Redirect detected. Valid URL: '.$redirectURL); | |
- // store any cookies | |
- $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); | |
- if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); | |
+ $this->storeCookies($orig, $url); | |
$this->redirectQueue[$orig] = $redirectURL; | |
} else { | |
$this->debug('Redirect detected. Invalid URL: '.$redirectURL); | |
@@ -557,7 +552,7 @@ class HumbleHttpAgent | |
$httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n"; | |
// add referer for picky sites | |
$httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n"; | |
- if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { | |
+ if ($cookies = $this->getCookies($orig, $req_url)) { | |
$this->debug("......sending cookies: $cookies"); | |
$httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n"; | |
} | |
@@ -589,9 +584,7 @@ class HumbleHttpAgent | |
} | |
if ($this->validateURL($redirectURL)) { | |
$this->debug('Redirect detected. Valid URL: '.$redirectURL); | |
- // store any cookies | |
- $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); | |
- if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); | |
+ $this->storeCookies($orig, $url); | |
$this->redirectQueue[$orig] = $redirectURL; | |
} else { | |
$this->debug('Redirect detected. Invalid URL: '.$redirectURL); | |
@@ -709,6 +702,30 @@ class HumbleHttpAgent | |
} | |
return false; | |
} | |
+ | |
+ protected function getCookies($orig, $req_url) { | |
+ $jar = $this->cookieJar[$orig]; | |
+ if (!isset($jar)) { | |
+ return null; | |
+ } | |
+ return $jar->getMatchingCookies($req_url); | |
+ } | |
+ | |
+ protected function storeCookies($orig, $url) { | |
+ $headers = $this->requests[$orig]['headers']; | |
+ $cookies = CookieJar::extractCookies($headers); | |
+ if (empty($cookies)) { | |
+ return; | |
+ } | |
+ if (!isset($this->cookieJar[$orig])) { | |
+ $this->cookieJar[$orig] = new CookieJar(); | |
+ } | |
+ $this->cookieJar[$orig]->storeCookies($url, $cookies); | |
+ } | |
+ | |
+ protected function deleteCookies() { | |
+ $this->cookieJar = array(); | |
+ } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment