Skip to content

Instantly share code, notes, and snippets.

@JoJoseph25
JoJoseph25 / channel2excel.py
Last active June 29, 2021 08:28
Parse multiple specified YouTube video URL at the same time using parallel processing and saving to excel file.
from multiprocessing import Process, Manager
def channel2excel(url_csv, from_date, to_date):
print(url_csv,' checking started')
save_name = url_csv.split('_reverse')[0]
save_name = save_name+'_checked.xlsx'
df = pd.read_csv(url_csv)
n_threads = 18
df_split = np.array_split(df['Video URL'].values, n_threads)
@JoJoseph25
JoJoseph25 / video_within_range.py
Created June 29, 2021 08:19
Check if video published within specified data range
from tqdm import tqdm
def check_url(urls, from_date, to_date, add_list, i):
for url in tqdm(urls):
meta_data = parse_video_info(url)
if bool(meta_data):
cur_date = datetime.strptime(meta_data['upload_date'],'%Y-%m-%d')
if cur_date>=from_date and cur_date<to_date:
add_list.append(meta_data)
else:
print('Non-Url', url)
@JoJoseph25
JoJoseph25 / parse_video_info.py
Last active June 28, 2021 04:15
Fetch Youtube Video Detail from URL using BeatifulSoup
import requests
from requests.exceptions import ConnectionError
from bs4 import BeautifulSoup as bs
import re
import pandas as pd
import time
## modified from https://github.com/sachinrai27/Youtube_video_detail_extraction ##
def parse_video_info(url):
import time
@JoJoseph25
JoJoseph25 / yt_channel_videos.py
Last active June 25, 2021 13:17
Multi-threaded program to get simultaneous video list from channels mentioned in a text file
import time
import threading #standard python muti-threading library
#YouTube bot to make a YouTube videos list (no API token required)
from yt_videos_list import ListCreator #https://pypi.org/project/yt-videos-list/
my_driver = 'firefox' #Selenium driver to use
#increase scroll time if program ends prematurely or slow internet
lc = ListCreator(driver=my_driver, scroll_pause_time=2)
number_of_threads = 3 #number of channels to parse simultaneously