Last active
June 9, 2022 20:42
-
-
Save jeffehobbs/512bfb7b9a426a2496491053a8c6197b to your computer and use it in GitHub Desktop.
hotgrepper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hotgrepper.py | jeffehobbs@gmail.com | |
import requests, json, boto3, random, os, videogrep, configparser | |
from collections import Counter | |
from yt_dlp import YoutubeDL | |
from tweepy import OAuthHandler | |
from tweepy import API | |
STOPWORDS = ["i", "we're", "you're", "that's", "it's", "us", "i'm", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"] | |
VIDEO_NUM = 0 | |
BUCKET_NAME = 'hotgrepper' | |
s3 = boto3.resource("s3").Bucket(BUCKET_NAME) | |
json.load_s3 = lambda f: json.load(s3.Object(key=f).get()["Body"]) | |
json.dump_s3 = lambda obj, f: s3.Object(key=f).put(Body=json.dumps(obj)) | |
# set up API keys from external config apikeys.txt file | |
config = configparser.ConfigParser() | |
config.read('apikeys.txt') | |
YOUTUBE_API_KEY = config.get('apikeys', 'youtube_apikey') | |
TWITTER_CONSUMER_KEY = config.get('twitter', 'consumer_key') | |
TWITTER_CONSUMER_SECRET = config.get('twitter', 'consumer_secret') | |
TWITTER_ACCESS_TOKEN = config.get('twitter', 'access_token') | |
TWITTER_ACCESS_TOKEN_SECRET = config.get('twitter', 'access_token_secret') | |
def get_logs(): | |
data = json.load_s3("log.json") | |
return data | |
def save_logs(previous, new): | |
previous.append(new) | |
json.dump_s3(previous, "log.json") | |
def clear_logs(): | |
previous = [] | |
json.dump_s3(previous, "log.json") | |
def get_youtube_most_popular(): | |
url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet&chart=mostPopular®ionCode=US&key={YOUTUBE_API_KEY}' | |
response = requests.get(url) | |
data = response.json() | |
#print(json.dumps(data, indent=4)) | |
return data | |
def download_video(id): | |
url = f'https://www.youtube.com/watch?v={id}' | |
print('URL: ' + str(url)) | |
try: | |
ydl_opts = {'outtmpl': f'/tmp/{id}.mp4', 'writeautomaticsub': f'/tmp/{id}', 'format': '22'} | |
with YoutubeDL(ydl_opts) as ydl: | |
ydl.download(url) | |
except: | |
ydl_opts = {'outtmpl': f'/tmp/{id}.mp4', 'writeautomaticsub': f'/tmp/{id}', 'format': '18'} | |
with YoutubeDL(ydl_opts) as ydl: | |
ydl.download(url) | |
def auto_supercut(video_id, total_words=3): | |
unigrams = videogrep.get_ngrams(f'/tmp/{video_id}.mp4') | |
unigrams = [w for w in unigrams if w[0] not in STOPWORDS] | |
most_common = Counter(unigrams).most_common(10) | |
print(most_common) | |
words = [w[0][0] for w in most_common] | |
random.shuffle(words) | |
words = words[0:total_words] | |
query = "|".join(words) | |
print(query) | |
videogrep.videogrep(f'/tmp/{video_id}.mp4', query, search_type="fragment", output=f"/tmp/{video_id}_supercut.mp4") | |
return query | |
def tweet_video(tweet_text, video_id): | |
auth = OAuthHandler(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET) | |
auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET) | |
api = API(auth) | |
media = api.media_upload(f'/tmp/{video_id}_supercut.mp4') | |
api.update_status(status=tweet_text, media_ids=[media.media_id]) | |
return | |
def main(): | |
global VIDEO_NUM | |
previous = get_logs() | |
print('PREVIOUS: ' + str(previous)) | |
data = get_youtube_most_popular() | |
try: | |
video_id = data['items'][VIDEO_NUM]['id'] | |
print('VIDEO_ID: ' + str(video_id)) | |
if video_id not in previous: | |
download_video(video_id) | |
query = auto_supercut(video_id, total_words=3) | |
tweet_video(query, video_id) | |
save_logs(previous, video_id) | |
print('...done.') | |
else: | |
print('...video already tweeted.') | |
pass | |
except: | |
VIDEO_NUM = VIDEO_NUM + 1 | |
if VIDEO_NUM <= len(data['items']): | |
main() | |
if __name__ == "__main__": | |
main() | |
#fin |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment