Created
January 15, 2016 22:02
-
-
Save RobertMatkulcik/9c8dac0549a3ba526eeb to your computer and use it in GitHub Desktop.
data scrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from bs4 import BeautifulSoup | |
from requests import Session | |
import csv | |
def main(username, password): | |
login_url = "http://www.titulky.com/" | |
my_account_url = "http://www.titulky.com/?orderby=3&OrderDate=2" | |
post_data = { | |
"Login" : username, | |
"Password" : password | |
} | |
file = "filename.txt" | |
file= open(file, "w") | |
# create session and perform login | |
session = Session() | |
session.post(login_url, post_data) | |
# visit my account page | |
content = session.get(my_account_url).content | |
soup = BeautifulSoup(content, "html.parser") | |
# get element with desired info | |
el = soup.findAll("tr", class_="r1") | |
for el in el: | |
film_url = el.find("a") | |
film_href = film_url.get("href") | |
film_name = film_url.text | |
# print(film_name) | |
text = "{} - {}".format(film_href, film_name) | |
text +="\n" | |
file.write(text) | |
# with open("output.csv", "w") as file: | |
# file_writer = csv.writer(file) | |
# file_writer.writerow(["url", "name"]) | |
# file_writer.writerow(text) | |
# # # remove unnecessary elements | |
print(text) | |
if __name__ == '__main__': | |
username = "E.T.Bong" | |
password = input() | |
main(username, password) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment