Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Scan folders, match filenames with IMDB_titles, retrieve details for each item
import os
import json
import re
from typing import Optional
import requests
from pprint import pprint
PATH_MOVIES = "/run/media/yannick/my-hard-drive/my-movie-folder"
PATH_TV_SHOWS = "/run/media/yannick/my-hard-drive/my-tv-folder"
ITEM_TITLES_FILE = ".item_titles.json"
ITEM_IDS_BY_TITLE_FILE = ".item_ids_by_title.json"
ITEM_DETAILS_BY_ID_FILE = ".item_imdb_details.json"
IMDB_API_URL = "https://imdb-api.com/API"
IMDB_API_KEY = "YOUR IMDB-API-KEY"
IMDB_API_SEARCH_TITLE = "SearchTitle"
IMDB_API_TITLE = "Title"
def _create_file_if_not_exists(filename: str, content: Optional[str] = None) -> None:
if not os.path.exists(filename):
file = open(filename, "w+")
if content:
file.write(json.dump(content))
file.close()
def _read_json_from_file(filename: str, default_obj: dict[str, str] = {}):
_create_file_if_not_exists(filename, default_obj)
content = default_obj
try:
file = open(filename, "r")
content = json.load(file)
except:
pass
if file:
file.close()
return content
def _write_json_to_file(filename: str, content) -> None:
_create_file_if_not_exists(filename)
file = open(filename, "w")
file.seek(0)
file.write(json.dumps(content))
file.close()
def api_search_title(title: str) -> str:
response = requests.get(f"{IMDB_API_URL}/{IMDB_API_SEARCH_TITLE}/{IMDB_API_KEY}/{title}")
return response.json()
def api_get_details(title_id: str) -> str:
response = requests.get(f"{IMDB_API_URL}/{IMDB_API_TITLE}/{IMDB_API_KEY}/{title_id}")
return response.json()
def split_by_camel_case(title: str) -> str:
matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', title)
return [m.group(0) for m in matches]
def get_folder_content_names(path: str) -> list[str]:
dir_content = os.listdir(path)
dir_content = list(map(split_by_camel_case, dir_content))
dir_content = list(map(" ".join,dir_content))
return dir_content
def match_title_first_result(title: str) -> str:
results = api_search_title(title)["results"]
if results:
return results[0]['id']
def match_title_interactive(title: str) -> str:
results = api_search_title(title)["results"]
print(f"Original title: {title}")
for i, result in enumerate(results):
print(f"[{i+1}] {result['title']} {result['description']}")
choice = int(input("Choice: "))
return results[choice-1]['id']
def get_unmatched_items() -> list[str]:
titles_list = _read_json_from_file(ITEM_TITLES_FILE, [])
ids_dict = _read_json_from_file(ITEM_IDS_BY_TITLE_FILE)
matched_list = ids_dict.keys()
unmatched_items = list(set(titles_list) - set(matched_list))
unmatched_items.sort()
return unmatched_items
def get_detail_less_items() -> list[str]:
ids_list = _read_json_from_file(ITEM_IDS_BY_TITLE_FILE).values()
details_list = _read_json_from_file(ITEM_DETAILS_BY_ID_FILE).keys()
return list(set(ids_list) - set(details_list))
# these are the functions you wanna use
def scan_hard_drive() -> None:
movie_names = get_folder_content_names(PATH_MOVIES)
show_names = get_folder_content_names(PATH_TV_SHOWS)
combined_list = movie_names + show_names
combined_list.sort()
_write_json_to_file(ITEM_TITLES_FILE, combined_list)
def persist_items_with_id(upper_bound: int) -> None:
ids_dict = _read_json_from_file(ITEM_IDS_BY_TITLE_FILE)
for item_name in get_unmatched_items():
if upper_bound == 0:
break
item_id = match_title_first_result(item_name)
upper_bound = upper_bound - 1
if item_id:
print(f"Matched {item_name} with https://imdb.com/title/{item_id}")
ids_dict[item_name] = item_id
_write_json_to_file(ITEM_IDS_BY_TITLE_FILE, ids_dict)
def persist_ids_with_details(upper_bound: int) -> None:
details_dict = _read_json_from_file(ITEM_DETAILS_BY_ID_FILE)
for item_id in get_detail_less_items():
if upper_bound == 0:
break
item_details = api_get_details(item_id)
upper_bound = upper_bound - 1
details_dict[item_id] = item_details
_write_json_to_file(ITEM_DETAILS_BY_ID_FILE, details_dict)
def generate_final_item_list(filename: str):
details_dict = _read_json_from_file(ITEM_DETAILS_BY_ID_FILE)
_write_json_to_file(filename, list(details_dict.values()))
scan_hard_drive()
print(f"Found titles: {len(_read_json_from_file(ITEM_TITLES_FILE))}")
persist_items_with_id()
persist_ids_with_details()
generate_final_item_list("my-awesome-list.json")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment