Last active
October 26, 2016 10:37
-
-
Save shyal/19fb6661f9cf02682eb2d05efb681f67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import requests | |
from argparse import ArgumentParser | |
import pickle | |
limit = 50 | |
# getting urls and dumping them into file | |
def get_urls(): | |
sites = requests.get("http://readthedocs.org/api/v1/project/?limit=%s&offset=0&format=json" % limit) | |
objects = sites.json()['objects'] | |
links = ["http://readthedocs.org" + x['resource_uri'] for x in objects] | |
with open("links.p", "wb") as outfile: | |
pickle.dump(links, outfile) | |
def fetch_links(): | |
with open("links.p", "rb") as infile: | |
links = pickle.load(infile) | |
import time | |
start = time.time() | |
for link in links: | |
response = requests.get(link) | |
print("url: %s, status code: %s" % (link, response.status_code)) | |
print("url: %s, content: %s" % (link, response.text[0:50])) | |
print(time.time() - start) | |
# main function | |
def main(): | |
parser = ArgumentParser(description="Perform proxy testing/URL list creation") | |
parser.add_argument("--urls", help="download and save urls ") | |
args = parser.parse_args() | |
# get urls | |
if args.urls: | |
get_urls() | |
else: | |
fetch_links() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment