Skip to content

Instantly share code, notes, and snippets.

@rakinishraq
Last active December 1, 2023 02:30
Show Gist options
  • Save rakinishraq/5e1d1768ea46a9e9aae6157c226ad99d to your computer and use it in GitHub Desktop.
Save rakinishraq/5e1d1768ea46a9e9aae6157c226ad99d to your computer and use it in GitHub Desktop.
Organize Youtube subscriptions into categories
import os
import tkinter as tk
from tkinter import filedialog
from webbrowser import open
import pandas as pd
info = """
Youtube Subscription Organizer
0. Go to Google Takeout and export your Youtube data (change HTML option to CSV)
1. Open your subscriptions.csv file (or place this script near it)
2. Use the following instructions to categorize
Enter blank to open channel in browser
Enter x or X or 0 to archive
Enter any words to create new category and add this channel to it
Enter valid category number/exact name to add channel to existing category
Enter "category name | note here" to add optional note about the channel
Tip: You can close this program and reopen to resume, it'll skip categorized items
"""
def select_file():
"""Selects a file to process. If 'subscriptions.csv' exists, it is selected. Otherwise, a file dialog is opened."""
file_path = "subscriptions.csv"
if os.path.isfile(file_path):
return file_path
else:
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
return file_path
def get_output(output_file):
"""Reads the output file and returns its contents as a DataFrame."""
try:
return pd.read_csv(output_file)
except FileNotFoundError:
return pd.DataFrame(columns=['Channel Title', 'Channel Url', 'Category', 'Note'])
def show_categories(cat):
"""Displays the categories in a compact way."""
formatted_cat = [f"{i if i != 0 else 'X'}: {item}" for i, item in enumerate(cat)]
print("Categories -\t" + " | ".join(formatted_cat))
def categorize(categories, channel_id, channel_url, channel_title):
"""Categorizes a channel and returns the category and a note."""
category = input(f"{channel_title} ({channel_id}) ~> ")
if category.lower() == 'x': # X/x = archive
return categories[0]
elif category == '': # empty = open channel
open(channel_url)
return categorize(categories, channel_id, channel_url, channel_title)
elif category in categories: # category = pass
return category
else:
# 0 = Archive, 1 = last, -2 = penultimate, etc.
try: # Num = categories[N]
category_key = int(category)
return categories[category_key]
except ValueError: # Str = new category
note = ''
if ' | ' in category:
category, note = category.split(' | ')
categories.append(category)
show_categories(categories)
if note: return category + ' | ' + note
return category
except IndexError: # invalid Num = retry
print("Incorrect number input.")
return categorize(categories, channel_id, channel_url, channel_title)
def add_entry(output_file, channel_title, channel_url, category, note):
"""Adds an entry to the output file."""
df = get_output(output_file)
new_entry = pd.DataFrame(
{'Channel Title': [channel_title], 'Channel Url': [channel_url], 'Category': [category], 'Note': [note]})
df = pd.concat([df, new_entry], ignore_index=True)
df.to_csv(output_file, index=False)
def main():
source_file = select_file()
data = pd.read_csv(source_file)
output_file = source_file.replace('.csv', '_output.csv')
output_data = get_output(output_file)
categories = ["Archive"]
show_categories(categories)
for index, row in data.iterrows():
channel_id = row['Channel Id']
channel_url = row['Channel Url']
channel_title = row['Channel Title']
if output_data['Channel Url'].isin([channel_url]).any():
continue
category, note = categorize(categories, channel_id, channel_url, channel_title), ""
if ' | ' in category:
category, note = category.split(" | ")
add_entry(output_file, channel_title, channel_url, category, note)
if __name__ == "__main__":
print(info+'\n')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment