Created
October 14, 2015 19:31
-
-
Save justinlittman/0b3d76ca0465a9d914ed to your computer and use it in GitHub Desktop.
Recording Flickr API calls to WARC using warcprox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2015-10-14 15:24:45,556 90037 INFO MainThread warcprox.dedup.DedupDb.__init__(dedup.py:25) creating new deduplication database ./warcprox-dedup.db | |
2015-10-14 15:24:45,563 90037 INFO MainThread warcprox.warcprox.WarcProxy.server_activate(warcprox.py:265) WarcProxy listening on 127.0.0.1:8000 | |
2015-10-14 15:24:45,564 90037 INFO MainThread warcprox.warcwriter.WarcWriter.__init__(warcwriter.py:50) warc destination directory ./warcs doesn't exist, creating it | |
2015-10-14 15:24:45,564 90037 INFO MainThread warcprox.controller.WarcproxController.run_until_shutdown(controller.py:58) SIGTERM will initiate graceful shutdown | |
2015-10-14 15:24:45,565 90037 INFO WarcWriterThread warcprox.warcwriter.WarcWriterThread.run(warcwriter.py:273) WarcWriterThread starting, directory=/Users/justinlittman/Data/sfm3/blog_examples/warcs gzip=False rollover_size=1000000000 rollover_idle_time=None prefix=WARCPROX port=8000 | |
2015-10-14 15:24:50,601 90037 INFO Thread-1 warcprox.warcprox.WarcProxyHandler.log_message(mitmproxy.py:140) WarcProxyHandler 1.0.0.127.in-addr.arpa - - [14/Oct/2015 15:24:50] "CONNECT api.flickr.com:443 HTTP/1.0" 200 - | |
/Users/justinlittman/Data/sfm3/blog_examples/ENV/lib/python2.7/site-packages/requests/packages/urllib3/connection.py:264: SubjectAltNameWarning: Certificate for api.flickr.com has no `subjectAltName`, falling back to check for a `commonName` for now. This feature is being removed by major browsers and deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 for details.) | |
SubjectAltNameWarning | |
2015-10-14 15:24:50,983 90037 INFO Thread-1 warcprox.warcprox.WarcProxyHandler.log_message(mitmproxy.py:140) WarcProxyHandler 1.0.0.127.in-addr.arpa - - [14/Oct/2015 15:24:50] "POST /services/rest/?photo_id=16610484049&secret=ee80d9ecdc&nojsoncallback=1&method=flickr.photos.getInfo&format=json HTTP/1.1" 200 1171 | |
{"photo":{"id":"16610484049","secret":"ee80d9ecdc","server":"8751","farm":9,"dateuploaded":"1426191780","isfavorite":0,"license":"0","safety_level":"0","rotation":0,"originalsecret":"d205cb161b","originalformat":"jpg","owner":{"nsid":"131866249@N02","username":"justin.littman","realname":"Justin Littman","location":"","iconserver":"0","iconfarm":0,"path_alias":null},"title":{"_content":"IMG_7599"},"description":{"_content":""},"visibility":{"ispublic":1,"isfriend":0,"isfamily":0},"dates":{"posted":"1426191780","taken":"2013-03-29 15:20:10","takengranularity":"0","takenunknown":"0","lastupdate":"1426251618"},"views":"0","editability":{"cancomment":0,"canaddmeta":0},"publiceditability":{"cancomment":1,"canaddmeta":0},"usage":{"candownload":1,"canblog":0,"canprint":0,"canshare":1},"comments":{"_content":"0"},"notes":{"note":[]},"people":{"haspeople":0},"tags":{"tag":[]},"urls":{"url":[{"type":"photopage","_content":"https:\/\/www.flickr.com\/photos\/131866249@N02\/16610484049\/"}]},"media":"photo"},"stat":"ok"} | |
Shutting down warcprox | |
2015-10-14 15:24:50,995 90037 INFO WarcWriterThread warcprox.warcwriter.WarcWriterThread.run(warcwriter.py:280) recorded_url.warcprox_meta={} for https://api.flickr.com/services/rest/?photo_id=16610484049&secret=ee80d9ecdc&nojsoncallback=1&method=flickr.photos.getInfo&format=json | |
2015-10-14 15:24:50,997 90037 INFO WarcWriterThread warcprox.warcwriter.WarcWriterThread.run(warcwriter.py:298) WarcWriterThread shutting down | |
2015-10-14 15:24:50,997 90037 INFO WarcWriterThread warcprox.warcwriter.WarcWriter.close_writer(warcwriter.py:167) closing WARCPROX-20151014192450996-00000-90037-GLSS-F0G5RP-8000.warc | |
2015-10-14 15:24:51,426 90037 INFO MainThread warcprox.warcprox.WarcProxy.server_close(warcprox.py:268) WarcProxy shutting down | |
Done | |
Shutting down warcprox |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WARC/1.0 | |
WARC-Record-ID: <urn:uuid:18a965a6-438b-4af9-80b7-25fc61c7270b> | |
WARC-Type: warcinfo | |
WARC-Filename: WARCPROX-20151014192450996-00000-90037-GLSS-F0G5RP-8000.warc | |
WARC-Date: 2015-10-14T19:24:50Z | |
Content-Type: application/warc-fields | |
Content-Length: 92 | |
software: warcprox 1.4 | |
hostname: GLSS-F0G5RP | |
ip: 127.0.0.1 | |
format: WARC File Format 1.0 | |
WARC/1.0 | |
WARC-Type: response | |
WARC-Record-ID: <urn:uuid:40ca0384-39f8-4d12-9163-2e600d558447> | |
WARC-Date: 2015-10-14T19:24:50Z | |
WARC-Target-URI: https://api.flickr.com/services/rest/?photo_id=16610484049&secret=ee80d9ecdc&nojsoncallback=1&method=flickr.photos.getInfo&format=json | |
WARC-IP-Address: 98.138.81.72 | |
Content-Type: application/http;msgtype=response | |
Content-Length: 1171 | |
WARC-Block-Digest: sha1:7f36a82704bfa7e446468926fec1f3f75c18069c | |
WARC-Payload-Digest: sha1:61816dae6b8100826ed57733e398420011ad6e81 | |
HTTP/1.1 200 OK | |
Date: Wed, 14 Oct 2015 19:24:51 GMT | |
Content-Type: application/json | |
Content-Length: 542 | |
P3P: policyref="https://policies.yahoo.com/w3c/p3p.xml", CP="CAO DSP COR CUR ADM DEV TAI PSA PSD IVAi IVDi CONi TELo OTPi OUR DELi SAMi OTRi UNRi PUBi IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC GOV" | |
Cache-Control: private | |
X-Served-By: bm-www930.flickr.bf1.yahoo.com | |
Vary: Accept-Encoding | |
Content-Encoding: gzip | |
Age: 2 | |
Via: http/1.1 fts107.flickr.bf1.yahoo.com (ApacheTrafficServer [cMsSf ]), http/1.1 r04.ycpi.ne1.yahoo.net (ApacheTrafficServer [cMsSf ]) | |
Server: ATS | |
Connection: keep-alive | |
}Sю?0????9????U???@??/??c[?B??? л???xf73;?;?hY}eJ????|?^?E?&,`?1????F6?'?WKA?=???ՄI??;mAb?֢(E%VkNE*??d???j>aZ5h??D?c?l5?P?????&WS_? | |
?Y?e??ر'????ľ?6?g?\^? ?hs?.?bQ}?? | |
?{??@?<??!*3?*?LRG?#?=s/????茥?k??!??a | |
ZA`?鵾MXTQc???&????v??*F???x??wU?=??v??^?ʂ?w%?EN?+42 | |
?<BB?]%?h=??s?P?!0???<#9?#H?a???0?z??>@?c?$闼?m?BΫ?j8??&A??Ç?4??c??տ?$?hݰ???!???Zz?????'!?\\J+?B?\O?U<??B??mf??y?? ?~J?6??6?w5??w3c???Qvp?H??o?=??_?7?7? | |
WARC/1.0 | |
WARC-Type: request | |
WARC-Record-ID: <urn:uuid:0d28721f-f641-4c6a-bb9f-2c302de419c4> | |
WARC-Date: 2015-10-14T19:24:50Z | |
WARC-Target-URI: https://api.flickr.com/services/rest/?photo_id=16610484049&secret=ee80d9ecdc&nojsoncallback=1&method=flickr.photos.getInfo&format=json | |
WARC-Concurrent-To: <urn:uuid:40ca0384-39f8-4d12-9163-2e600d558447> | |
WARC-Block-Digest: sha1:2d547a81539dd826c05c74f86ce8278c6e8b69c9 | |
Content-Type: application/http;msgtype=request | |
Content-Length: 509 | |
POST /services/rest/?photo_id=16610484049&secret=ee80d9ecdc&nojsoncallback=1&method=flickr.photos.getInfo&format=json HTTP/1.1 | |
content-length: 0 | |
accept-encoding: gzip, deflate | |
host: api.flickr.com | |
accept: */* | |
user-agent: python-requests/2.8.1 | |
authorization: OAuth oauth_nonce="95082556483776065821444850690", oauth_timestamp="1444850690", oauth_version="1.0", oauth_signature_method="HMAC-SHA1", oauth_consumer_key="abddfe6fb8bba36e8ef0278ec65dbbc8", oauth_signature="k69OTYLbqaZVTsXhWyVZ9qgBZzw%3D" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import flickrapi | |
import subprocess | |
import atexit | |
import sys | |
import os | |
from time import sleep | |
#Borrowed from https://github.com/ikreymer/pywb-webrecorder/blob/master/pywb-webrecorder.py | |
class SubProcess(object): | |
""" | |
Track a subprocess from command-line. | |
Add atexit callback to terminate it on shutdown. | |
""" | |
def __init__(self, cl): | |
""" | |
Launch subprocess | |
""" | |
args = cl.split(' ') | |
self.name = args[0] | |
self.proc = subprocess.Popen(args, stdout=sys.stdout) | |
atexit.register(self.cleanup) | |
def cleanup(self): | |
""" | |
Terminate subprocess, wait for it to finish | |
""" | |
try: | |
print 'Shutting down ', self.name | |
if self.proc: | |
self.proc.terminate() | |
self.proc.wait() | |
except Exception: | |
try: | |
self.proc.kill() | |
except Exception: | |
pass | |
def warced(func): | |
""" | |
A decorator that will create/teardown an instance of warcprox. | |
""" | |
def inner(*args, **kwargs): | |
try: | |
warcprox = SubProcess("warcprox -c warcprox-ca.pem") | |
sleep(5) | |
return func(*args, **kwargs) | |
finally: | |
warcprox.cleanup() | |
print "Done" | |
return inner | |
@warced | |
def run(): | |
#FlickrApi uses requests. Configure requests to use the proxy by setting | |
#environment variables. | |
os.environ["HTTP_PROXY"] = "localhost:8000" | |
os.environ["HTTPS_PROXY"] = "localhost:8000" | |
os.environ["REQUESTS_CA_BUNDLE"] = os.path.join(os.path.dirname(os.path.realpath(__file__)), "warcprox-ca.pem") | |
#Construct an API | |
api = flickrapi.FlickrAPI("<YOUR KEY HERE>", "<YOUR SECRET HERE>", store_token=False) | |
#Call the API | |
print api.photos.getInfo(photo_id="16610484049", secret="ee80d9ecdc", format='json') | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment