Skip to content

Instantly share code, notes, and snippets.

@szabadkai
Created March 3, 2014 10:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save szabadkai/9322238 to your computer and use it in GitHub Desktop.
Save szabadkai/9322238 to your computer and use it in GitHub Desktop.
import re
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
p = re.compile("a href=\"(.+?)\">View Sequence")
p2 = re.compile("pre>([^<]+)<", re.MULTILINE|re.DOTALL)
urls=open('sample_url') #file with urls, to download
count=0
try:
for link in urls:
browser=webdriver.Firefox()
browser.get(link)
page=browser.page_source
new_url="http://www.jcvi.org/"+p.findall(page)[0]
browser.close()
browser=webdriver.Firefox()
browser.get(new_url)
page2=browser.page_source
browser.close()
print(p2.findall(page2)[0].replace('&gt;','>')),
time.sleep(10)
count = count+1
finally:
print(count+' lines processed')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment