Skip to content

Instantly share code, notes, and snippets.

Created July 9, 2017 11:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save anonymous/39e07cb64427c849ce6a41b1999a8d9c to your computer and use it in GitHub Desktop.
Save anonymous/39e07cb64427c849ce6a41b1999a8d9c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import feedparser
import requests
from bs4 import BeautifulSoup
from subprocess import call
def get_urls(urlgot):
response = requests.get(urlgot)
# parse html
page = str(BeautifulSoup(response.content,'lxml'))
def getURL(page):
start_link = page.find("a href")
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1: end_quote]
return url, end_quote
while True:
url, n = getURL(page)
page = page[n:]
if url:
call(["image-scraper", url])
print(url)
else:
break
urls = []
for urlx in urls:
url = feedparser.parse(urlx)
mm = 0
for x in url.entries:
print("current entry is" + str(mm))
mm=mm+1
print(str(x.link))
get_urls(str(x.link))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment