Last active
December 1, 2023 02:30
-
-
Save rakinishraq/5e1d1768ea46a9e9aae6157c226ad99d to your computer and use it in GitHub Desktop.
Organize Youtube subscriptions into categories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import tkinter as tk | |
from tkinter import filedialog | |
from webbrowser import open | |
import pandas as pd | |
info = """ | |
Youtube Subscription Organizer | |
0. Go to Google Takeout and export your Youtube data (change HTML option to CSV) | |
1. Open your subscriptions.csv file (or place this script near it) | |
2. Use the following instructions to categorize | |
Enter blank to open channel in browser | |
Enter x or X or 0 to archive | |
Enter any words to create new category and add this channel to it | |
Enter valid category number/exact name to add channel to existing category | |
Enter "category name | note here" to add optional note about the channel | |
Tip: You can close this program and reopen to resume, it'll skip categorized items | |
""" | |
def select_file(): | |
"""Selects a file to process. If 'subscriptions.csv' exists, it is selected. Otherwise, a file dialog is opened.""" | |
file_path = "subscriptions.csv" | |
if os.path.isfile(file_path): | |
return file_path | |
else: | |
root = tk.Tk() | |
root.withdraw() | |
file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")]) | |
return file_path | |
def get_output(output_file): | |
"""Reads the output file and returns its contents as a DataFrame.""" | |
try: | |
return pd.read_csv(output_file) | |
except FileNotFoundError: | |
return pd.DataFrame(columns=['Channel Title', 'Channel Url', 'Category', 'Note']) | |
def show_categories(cat): | |
"""Displays the categories in a compact way.""" | |
formatted_cat = [f"{i if i != 0 else 'X'}: {item}" for i, item in enumerate(cat)] | |
print("Categories -\t" + " | ".join(formatted_cat)) | |
def categorize(categories, channel_id, channel_url, channel_title): | |
"""Categorizes a channel and returns the category and a note.""" | |
category = input(f"{channel_title} ({channel_id}) ~> ") | |
if category.lower() == 'x': # X/x = archive | |
return categories[0] | |
elif category == '': # empty = open channel | |
open(channel_url) | |
return categorize(categories, channel_id, channel_url, channel_title) | |
elif category in categories: # category = pass | |
return category | |
else: | |
# 0 = Archive, 1 = last, -2 = penultimate, etc. | |
try: # Num = categories[N] | |
category_key = int(category) | |
return categories[category_key] | |
except ValueError: # Str = new category | |
note = '' | |
if ' | ' in category: | |
category, note = category.split(' | ') | |
categories.append(category) | |
show_categories(categories) | |
if note: return category + ' | ' + note | |
return category | |
except IndexError: # invalid Num = retry | |
print("Incorrect number input.") | |
return categorize(categories, channel_id, channel_url, channel_title) | |
def add_entry(output_file, channel_title, channel_url, category, note): | |
"""Adds an entry to the output file.""" | |
df = get_output(output_file) | |
new_entry = pd.DataFrame( | |
{'Channel Title': [channel_title], 'Channel Url': [channel_url], 'Category': [category], 'Note': [note]}) | |
df = pd.concat([df, new_entry], ignore_index=True) | |
df.to_csv(output_file, index=False) | |
def main(): | |
source_file = select_file() | |
data = pd.read_csv(source_file) | |
output_file = source_file.replace('.csv', '_output.csv') | |
output_data = get_output(output_file) | |
categories = ["Archive"] | |
show_categories(categories) | |
for index, row in data.iterrows(): | |
channel_id = row['Channel Id'] | |
channel_url = row['Channel Url'] | |
channel_title = row['Channel Title'] | |
if output_data['Channel Url'].isin([channel_url]).any(): | |
continue | |
category, note = categorize(categories, channel_id, channel_url, channel_title), "" | |
if ' | ' in category: | |
category, note = category.split(" | ") | |
add_entry(output_file, channel_title, channel_url, category, note) | |
if __name__ == "__main__": | |
print(info+'\n') | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment