Created
February 12, 2017 06:55
-
-
Save AngshumanGhosh/58bc64b2d28354893712f576781bf7f8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/sbin/env python | |
import requests,re,json | |
output_file='foodnetwork_data' | |
recipe_link_pattern='m-PromoList__a-ListItem\"><a href="(.*?)">' | |
rlp=re.compile(recipe_link_pattern) | |
pagination_link_pattern='' | |
index_link="http://www.foodnetwork.com/recipes/a-z/" | |
pages="123 a b c d e f g h i j k l m n o p q r s t u v w xyz".split() | |
def get_page(url): | |
return requests.get(url).text | |
def get_recipe_links(page): | |
return rlp.findall(page) | |
def get_all_recipe_links(): | |
links=[] | |
for starting_letters in pages: | |
i=1 | |
while True: | |
link=index_link+starting_letters+"/p/"+str(i) | |
page=get_page(link) | |
links_here=get_recipe_links(page) | |
if len(links_here)==0: | |
break | |
links.extend(links_here) | |
print "Starting Letter: ",starting_letters,", Page Number: ",str(i) | |
i=i+1 | |
return links | |
def get_page_data(url): | |
page = get_page(url) | |
start=page.find('<script type=\"application/ld+json\">') + 35 | |
print start | |
end=page[start:].find("</script>") | |
return json.loads(page[start:end]) | |
def main(): | |
links=get_all_recipe_links() | |
with open(output_file,"w") as op: | |
for link in links: | |
data=get_page_data(link) | |
op.write(json.dumps(data)+'\n') | |
if __name__=="__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Madam, Sir
The objective of the development ,the state beninois offers an opportunity for investors to investire in benin on the occasion of the construction of the road fishing and instalation of industries on near 13870km an investment demand, which had expired by the 31/08/2017
our agency selling the stamps for the deposit of your file at the office of the government beninois is available at the following address: armel1536@gmail.com