Last active
September 24, 2023 12:42
-
-
Save JohanAR/fb67f10e3f8ea84c7d18e1fd77f4a98c to your computer and use it in GitHub Desktop.
Convert filmtipset.se exported csv to imdb format (with some fake data)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Put the exported csv from filmtipset.se next to this script and rename it to film.csv | |
import re | |
IMPORT=re.compile(r"(\d{4}-\d{2}-\d{2}),(.+);(\d+);(\d)") | |
def to_imdb(num): | |
return 'tt{:07d}'.format(int(num)) | |
def parse(ft): | |
m = re.match(IMPORT, ft) | |
if m: | |
return m.groups() | |
else: | |
print("Failed to parse: {}".format(ft)) | |
return None | |
def emit(t): | |
imdb = to_imdb(t[2]) | |
date = t[0] | |
name = t[1].replace(',', '') # Remove comma since it will mess up csv output | |
rate = int(t[3]) * 2 - 1 | |
return f'{imdb},{rate},{name},https://www.imdb/title/{imdb}/,movie,{rate},123,1979,"",123,1979-01-01,Anonymous' | |
with open('film.csv', 'r') as input: | |
with open('imdb.csv', 'w') as output: | |
input.readline() | |
line = input.readline() | |
while line != '': | |
p = parse(line) | |
if p: | |
output.write(emit(p)) | |
output.write('\n') | |
line = input.readline() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment