Last active
June 10, 2016 08:50
-
-
Save talha252/be1bc97d46c3aeaa8f0833b63d7ccf43 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: UTF-8 -*- | |
import urllib2 | |
from bs4 import BeautifulSoup as bs | |
links = open("links").readlines(); | |
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', | |
'Accept-Encoding': 'none', | |
'Accept-Language': 'en-US,en;q=0.8', | |
'Connection': 'keep-alive'} | |
with open("sonuc.txt", "w") as out: | |
for i,link in enumerate(links): | |
print "%{:.1f} - '{}' şimdi işleniyor..".format((i+1)/float(len(links))*100, link.strip()) | |
req = urllib2.Request(link,headers=hdr) | |
soup = bs(urllib2.urlopen(req).read(),"html.parser") | |
for element in soup.find_all(class_ = "kmsgtext"): | |
out.write(element.text.encode("utf8").strip() + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment