This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing import Process, Manager | |
def channel2excel(url_csv, from_date, to_date): | |
print(url_csv,' checking started') | |
save_name = url_csv.split('_reverse')[0] | |
save_name = save_name+'_checked.xlsx' | |
df = pd.read_csv(url_csv) | |
n_threads = 18 | |
df_split = np.array_split(df['Video URL'].values, n_threads) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm import tqdm | |
def check_url(urls, from_date, to_date, add_list, i): | |
for url in tqdm(urls): | |
meta_data = parse_video_info(url) | |
if bool(meta_data): | |
cur_date = datetime.strptime(meta_data['upload_date'],'%Y-%m-%d') | |
if cur_date>=from_date and cur_date<to_date: | |
add_list.append(meta_data) | |
else: | |
print('Non-Url', url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from requests.exceptions import ConnectionError | |
from bs4 import BeautifulSoup as bs | |
import re | |
import pandas as pd | |
import time | |
## modified from https://github.com/sachinrai27/Youtube_video_detail_extraction ## | |
def parse_video_info(url): | |
import time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import threading #standard python muti-threading library | |
#YouTube bot to make a YouTube videos list (no API token required) | |
from yt_videos_list import ListCreator #https://pypi.org/project/yt-videos-list/ | |
my_driver = 'firefox' #Selenium driver to use | |
#increase scroll time if program ends prematurely or slow internet | |
lc = ListCreator(driver=my_driver, scroll_pause_time=2) | |
number_of_threads = 3 #number of channels to parse simultaneously |