Skip to content

Instantly share code, notes, and snippets.

@renefs
Last active April 2, 2024 17:24
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save renefs/a10a3e9f17b30edf431619ddcc629f2e to your computer and use it in GitHub Desktop.
Save renefs/a10a3e9f17b30edf431619ddcc629f2e to your computer and use it in GitHub Desktop.
Genera un CSV para exportar de FilmAffinity
#! /usr/bin/python3
# Author: Pablo Baeyens
# Usage:
# ./faScrap.py -h
# for usage info and options
import time
import argparse
import requests
import csv
import bs4
from datetime import datetime
import platform
import locale
def set_locale(lang):
"""Attempts to set locale."""
if platform.system() in {"Linux", "Darwin"}:
loc = "es_ES.utf8" if lang == "es" else "en_US.utf8"
elif platform.system() == "Windows":
loc = "es-ES" if lang == "es" else "en-US"
else:
raise locale.Error()
locale.setlocale(locale.LC_ALL, loc)
def get_date(tag, lang):
"""Gets date from tag (format YYYY-MM-DD)"""
if lang == "es":
date_str = tag.string[len("Votada el día: "):].strip()
print(date_str)
fecha = datetime.strptime(date_str, "%d de %B de %Y").date()
else:
date_str = tag.string[len("Rated on "):].strip()
fecha = datetime.strptime(date_str, "%B %d, %Y").date()
return fecha.strftime("%Y-%m-%d")
def get_directors(tag):
"""Gets directors from a film"""
directors = list(
map(
lambda d: d.a["title"],
tag.find_all(class_="mc-director")[0].find_all(class_="nb")))
for director in directors:
if director.endswith("(Creator)"):
director = director[:-10]
return ", ".join(directors)
def is_film(tag, lang):
"""Checks if given tag is a film"""
title = tag.find_all(class_="mc-title")[0].a.string.strip()
skip = []
if lang == "es":
skip = ["(Serie de TV)", "(Miniserie de TV)", "(TV)", "(C)"]
else:
skip = ["(TV Series)", "(TV Miniseries)", "(TV)", "(S)"]
return not any(map(title.endswith, skip))
def get_data(user_id, lang):
"""Gets list of films from user id"""
data = []
eof = False
n = 1
FA = "https://www.filmaffinity.com/" + lang + \
"/userratings.php?user_id={id}&p={n}&orderby=4"
print(FA)
while not eof:
url = FA.format(id=user_id, n=n)
print(url)
request = requests.get(FA.format(id=user_id, n=n))
request.encoding = "utf-8"
page = bs4.BeautifulSoup(request.text, "lxml")
tags = page.find_all(
class_=["user-ratings-header", "user-ratings-movie"])
cur_date = None
for tag in tags:
if tag["class"] == ["user-ratings-header"]:
cur_date = get_date(tag, lang)
elif is_film(tag, lang):
title = tag.find_all(class_="mc-title")[0].a
film = {
"Title": title.string.strip(),
"Year": title.next_sibling.strip()[1:-1],
"Directors": get_directors(tag),
"WatchedDate": cur_date,
"Rating": int(tag.find_all(class_="ur-mr-rat")[0].string) / 2,
"Rating10": tag.find_all(class_="ur-mr-rat")[0].string
}
data.append(film)
eof = request.status_code != 200
if not eof:
print("Página {n}".format(n=n), end="\r")
else:
print("Página {n}. Download complete!".format(n=n - 1))
n += 1
return data
def save_to_csv(data, filename):
"""Saves list of dictionaries in a csv file"""
with open(filename, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(list(data[0]))
for film in data:
writer.writerow(list(film.values()))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=
"Generates csv compatible with LetterBoxd from Filmaffinity user's id.")
parser.add_argument("id", help="User's id")
parser.add_argument(
"--csv", nargs=1, help="Name of export FILE", metavar="FILE")
parser.add_argument(
"--lang",
nargs=1,
help="Language for exporting",
metavar="LANG",
default=["en"],
choices={"es", "en"})
args = parser.parse_args()
export_file = args.csv[
0] if args.csv else "filmAffinity_{lang}_{id}.csv".format(
id=args.id, lang=args.lang[0])
try:
set_locale(args.lang[0])
except locale.Error:
print(
"Could not set locale for \'{lang}\' and UTF-8 encoding.".format(
lang=args.lang[0]))
manual_locale = input("locale (empty for default): ").strip()
if manual_locale:
try:
locale.setlocale(locale.LC_ALL, manual_locale)
except locale.Error as e:
print(e)
exit()
try:
data = get_data(args.id, "en")
print(data)
except ValueError as v:
print("Error:", v)
exit()
save_to_csv(data, export_file)
beautifulsoup4==4.7.1
bs4==0.0.1
certifi==2019.3.9
chardet==3.0.4
idna==2.8
lxml==4.3.3
requests==2.21.0
soupsieve==1.9.1
urllib3==1.24.2
Title Year Directors WatchedDate Rating Rating10
Avengers: Infinity War 2018 Anthony Russo, Joe Russo 2019-04-28 4.5 9
Avengers: Endgame 2019 Anthony Russo, Joe Russo 2019-04-28 4.5 9
Saw 2 2005 Darren Lynn Bousman 2006-03-12 3.5 7
Munich 2005 Steven Spielberg 2006-03-12 2.0 4
Jarhead 2005 Sam Mendes 2006-03-12 4.0 8
The Chronicles of Narnia: The Lion, The Witch and the Wardrobe 2005 Andrew Adamson 2006-03-12 3.0 6
Torrente 3 2005 Santiago Segura 2006-03-12 2.0 4
War of the Worlds 2005 Steven Spielberg 2006-03-12 2.5 5
Star Wars: Episode III Revenge of the Sith 2005 George Lucas 2006-03-12 4.5 9
King Kong 2005 Peter Jackson 2006-03-12 1.5 3
Mars Attacks! 1996 Tim Burton 2006-03-12 4.5 9
The Matrix Revolutions 2003 Lilly Wachowski, Lana Wachowski 2006-03-12 4.0 8
Night Watch 2004 Timur Bekmambetov 2006-03-12 1.0 2
Kill Bill: Volume 1 2003 Quentin Tarantino 2006-03-12 3.5 7
The Faculty 1998 Robert Rodriguez 2006-03-12 3.0 6
The Village 2004 M. Night Shyamalan 2006-03-12 2.5 5
There's Something About Mary 1998 Peter Farrelly, Bobby Farrelly 2006-03-12 3.5 7
Twelve Monkeys 1995 Terry Gilliam 2006-03-12 3.0 6
Dumb and Dumber (Dumb & Dumber) 1994 Peter Farrelly, Bobby Farrelly 2006-03-12 3.0 6
A Clockwork Orange 1971 Stanley Kubrick 2006-03-12 5.0 10
Elephant 2003 Gus Van Sant 2006-03-12 2.0 4
The Blair Witch Project 1999 Daniel Myrick, Eduardo Sánchez 2006-03-12 4.0 8
Moulin Rouge 2001 Baz Luhrmann 2006-03-12 1.5 3
@renefs
Copy link
Author

renefs commented Apr 29, 2019

Run with python main.py <FILM_AFFINITY_ID> --csv result.csv

@pasalomoco
Copy link

if trhows me this error:
Error: unknown locale: en-US

:(

Do you know what is the problem ?

Thanks for this code :)

@renefs
Copy link
Author

renefs commented Dec 9, 2020

Which line? Can you print the stack trace?

@pasalomoco
Copy link

pasalomoco commented Dec 9, 2020

    try:
           data = get_data(args.id, "en")
           print(data)
     except ValueError as v:
167     print("Error:", v)
           exit()

Line 167

@pasalomoco
Copy link

pasalomoco commented Dec 9, 2020

@pasalomoco
Copy link

pasalomoco commented Dec 9, 2020

hi, i just fixed this error but i have another one:

UnicodeEncodeError: 'charmap' codec can't encode character '\u014d' in position 36: character maps to "< undefined > "

UnicodeEncodeError: 'charmap' codec can't encode character '\u014c' in position 21: character maps to "< undefined >"

i dont know why but this line "locale.setlocale(locale.LC_ALL, loc)" on line 28 wasn't working properly.

@pasalomoco
Copy link

pasalomoco commented Dec 9, 2020

hi, i just fixed the other error. Try to change line 123 to this:

with open(filename, 'w', newline='', encoding='utf8') as csvfile:

Now i have the .csv file but it doenst have a table format.

Thx for your code men :)

@mrdonado
Copy link

mrdonado commented Aug 6, 2023

Thanks for the script! It worked like a charm.

I had only an issue when I tried to install the requirements.txt that you specified. After installing the dependencies manually, though, it worked just fine. I guess that some of the versions are not compatible with my system (I'm on an M1 Pro Mac).

@simonbcn
Copy link

What is the point of asking for language in the parameters if it is then not used?

    try:
        data = get_data(args.id, "en")

@TiggerElPro
Copy link

The code only run this 2 lines and stops in line 167 https://www.filmaffinity.com/en/userratings.php?user_id={id}&p={n}&orderby=4 https://www.filmaffinity.com/en/userratings.php?user_id=731957&p=1&orderby=4 Error: unknown locale: en-US

I am getting the exact same error

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment