Skip to content

Instantly share code, notes, and snippets.

@fivefilters
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fivefilters/0a758b6d64ce4fb5728c to your computer and use it in GitHub Desktop.
Save fivefilters/0a758b6d64ce4fb5728c to your computer and use it in GitHub Desktop.
---
libraries/humble-http-agent/CookieJar.php | 2 +-
libraries/humble-http-agent/HumbleHttpAgent.php | 47 +++++++++++++++++--------
2 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/libraries/humble-http-agent/CookieJar.php b/libraries/humble-http-agent/CookieJar.php
index ac346b5..350b706 100644
--- a/libraries/humble-http-agent/CookieJar.php
+++ b/libraries/humble-http-agent/CookieJar.php
@@ -229,7 +229,7 @@ class CookieJar
}
// return array of set-cookie values extracted from HTTP response headers (string $h)
- public function extractCookies($h) {
+ public static function extractCookies($h) {
$x = 0;
$lines = 0;
$headers = array();
diff --git a/libraries/humble-http-agent/HumbleHttpAgent.php b/libraries/humble-http-agent/HumbleHttpAgent.php
index 11b30b5..f972891 100644
--- a/libraries/humble-http-agent/HumbleHttpAgent.php
+++ b/libraries/humble-http-agent/HumbleHttpAgent.php
@@ -34,7 +34,7 @@ class HumbleHttpAgent
protected $curlOptions;
protected $minimiseMemoryUse = false; //TODO
protected $method;
- protected $cookieJar;
+ protected $cookieJar = array();
public $debug = false;
public $debugVerbose = false;
public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
@@ -79,7 +79,6 @@ class HumbleHttpAgent
require_once(dirname(__FILE__).'/RollingCurl.php');
}
// create cookie jar
- $this->cookieJar = new CookieJar();
// set request options (redirect must be 0)
// HTTP PECL (http://php.net/manual/en/http.request.options.php)
$this->requestOptions = array(
@@ -284,6 +283,7 @@ class HumbleHttpAgent
$this->debug("Following redirects #$redirects...");
$this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
}
+ $this->deleteCookies();
}
// fetch all URLs without following redirects
@@ -326,7 +326,7 @@ class HumbleHttpAgent
}
$httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
// send cookies, if we have any
- if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+ if ($cookies = $this->getCookies($orig, $req_url)) {
$this->debug("......sending cookies: $cookies");
$httpRequest->addHeaders(array('Cookie' => $cookies));
}
@@ -374,10 +374,7 @@ class HumbleHttpAgent
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
- // store any cookies
- $cookies = $request->getResponseHeader('set-cookie');
- if ($cookies && !is_array($cookies)) $cookies = array($cookies);
- if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
+ $this->storeCookies($orig, $url);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
@@ -459,7 +456,7 @@ class HumbleHttpAgent
// add referer for picky sites
$headers[] = 'Referer: '.$this->referer;
// send cookies, if we have any
- if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+ if ($cookies = $this->getCookies($orig, $req_url)) {
$this->debug("......sending cookies: $cookies");
$headers[] = 'Cookie: '.$cookies;
}
@@ -496,9 +493,7 @@ class HumbleHttpAgent
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
- // store any cookies
- $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
- if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
+ $this->storeCookies($orig, $url);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
@@ -557,7 +552,7 @@ class HumbleHttpAgent
$httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
// add referer for picky sites
$httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
- if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+ if ($cookies = $this->getCookies($orig, $req_url)) {
$this->debug("......sending cookies: $cookies");
$httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
}
@@ -589,9 +584,7 @@ class HumbleHttpAgent
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
- // store any cookies
- $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
- if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
+ $this->storeCookies($orig, $url);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
@@ -709,6 +702,30 @@ class HumbleHttpAgent
}
return false;
}
+
+ protected function getCookies($orig, $req_url) {
+ $jar = $this->cookieJar[$orig];
+ if (!isset($jar)) {
+ return null;
+ }
+ return $jar->getMatchingCookies($req_url);
+ }
+
+ protected function storeCookies($orig, $url) {
+ $headers = $this->requests[$orig]['headers'];
+ $cookies = CookieJar::extractCookies($headers);
+ if (empty($cookies)) {
+ return;
+ }
+ if (!isset($this->cookieJar[$orig])) {
+ $this->cookieJar[$orig] = new CookieJar();
+ }
+ $this->cookieJar[$orig]->storeCookies($url, $cookies);
+ }
+
+ protected function deleteCookies() {
+ $this->cookieJar = array();
+ }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment