Skip to content

Instantly share code, notes, and snippets.

@sudcha23
Last active August 23, 2023 15:46
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sudcha23/2fb16778e97962d26150753533e0107f to your computer and use it in GitHub Desktop.
Save sudcha23/2fb16778e97962d26150753533e0107f to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# export GOOGLE_APPLICATION_CREDENTIALS="mysecretcodefilehere.json"
# author: sudcha23
import wikipedia
import re
import codecs
import subprocess
import math
import subprocess
import os
import json
import requests
import shutil
import shlex
from PIL import Image, ImageDraw, ImageFont
from datetime import datetime
from google.cloud import translate
from argparse import Namespace
import upload_video
import GCS_texttovoice
import image_search
import pdb
import sqlite3
def transliterate(string):
from unidecode import unidecode
if not isinstance(string, bytes):
string = u''.join(string)
return unidecode(string)
def get_wiki_data(query, lang, lang_country):
wikipedia.set_lang(lang)
query = query.replace("_", " ")
err_status = False
summary = wikipedia.summary(query)
page = wikipedia.page(query)
return summary, page.url, page.title, err_status
def cleanup_content(summary):
result = re.sub("[\(\[].*?[\)\]]", "", summary) # Removes content in ()
print("content cleaned")
return result
def create_image_file(query, filename):
img = Image.new('RGB', (1280, 720), color=(30, 30, 30))
fnt = ImageFont.truetype(
'/Library/Fonts/Arial Unicode.ttf', 80, encoding="utf-8")
d = ImageDraw.Draw(img)
query = query.replace("_", " ")
d.text((240, 160), query, font=fnt, fill=(755, 20, 0))
img.save(os.getcwd() + "/downloads/" + filename+"/default.jpg")
print "image file created"
return 0
def get_language_code():
lang = 'en'
lang_country = 'en-US'
return lang, lang_country
def trending_queries(lang, country):
wiki_string = "https://wikimedia.org/api/rest_v1/metrics/pageviews/top/" + \
lang + ".wikipedia/all-access/2019/06/20"
print wiki_string
resp = requests.get(wiki_string)
if resp.status_code != 200:
# This means something went wrong.
raise ApiError('GET /tasks/ {}'.format(resp.status_code))
z = resp.json()
y_length = len(z['items'][0]['articles'])
print z['items'][0]['articles'][24]
print y_length
return z
def rename_files(query):
i = 1
dir = os.getcwd() + "/downloads/" + query + "/"
# print dir + "in rename function now!"
for filename in os.listdir(dir):
dst = dir + str(i) + ".jpg"
src = os.getcwd() + "/downloads/" + query + "/" + filename
os.rename(src, dst)
i += 1
# print dst
print "number of files!!" + str(i)
return i - 1
def init():
return 'en', 'en-US', datetime.today().strftime('%Y-%m-%d')
def translate_text_to_en(query):
translate_client = translate.Client()
translation = translate_client.translate(
query.encode('utf8'), target_language='en')
print(u'Translation: {}'.format(translation['translatedText']))
return translation['translatedText']
def translate_text_to_any(query, lang):
translate_client = translate.Client()
translation = translate_client.translate(
query.encode('utf8'), target_language=lang)
print(u'Translation: {}'.format(translation['translatedText']))
return translation['translatedText']
def magic_func(query, lang, lang_country, day):
filename = query.replace(" ", "_")
text_file = filename + '.txt'
audio_file = filename + '.mp3'
final_file = filename + '.mp4'
print "QUERY + " + query
cwd = os.getcwd()
wiki_summary, page_url, page_title, err_status = get_wiki_data(
query, lang, lang_country)
if err_status == True:
return "fail"
with codecs.open(text_file, 'w', encoding='utf8') as f:
f.write(wiki_summary)
text_file_new_path = "text/"+lang_country+"/"+day+"/"
if not os.path.exists(text_file_new_path):
os.makedirs(text_file_new_path)
shutil.move(os.path.join(cwd, text_file), os.path.join(
cwd, text_file_new_path, text_file))
text_file = os.path.join(cwd, text_file_new_path, text_file)
print "\n\n DOWNLOADING IMAGES NOW\n\n"
# DOWNLOADING IMAGES
query1 = query.replace("_", " ")
#query = re.sub('\W+',' ', query)
query1 = query1.replace("(", " ")
query1 = query1.replace(")", " ")
query1 = query1.replace(":", " ")
query_en = translate_text_to_en(query1)
#query_transliterated_en = transliterate(query)
shell_command = "googleimagesdownload -k \"" + query_en + \
"\" -i \"" + query.replace(" ", "_") + "\" -l 7"
print shell_command
subprocess.call(shlex.split(shell_command))
dir = os.getcwd() + "/downloads/" + query.replace(" ", "_") + "/"
if not os.path.exists(dir):
os.makedirs(dir)
num_images_downloaded = 0
default_template = False
print dir
for counter in os.listdir(dir.decode('utf-8')):
if counter.find(".jpg") != -1:
num_images_downloaded += 1
# If no images were downloaded, let's create the default template!
if num_images_downloaded == 0:
create_image_file(query, filename)
default_template = True
num_images_downloaded = 1
# Create text to speech!
resp = "success"
resp = GCS_texttovoice.textToSpeech1(filename, lang, lang_country, day)
if resp == "fail":
return "fail"
audio_file_new_path = "audio/"+lang_country+"/"+day+"/"
if not os.path.exists(audio_file_new_path):
os.makedirs(audio_file_new_path)
shutil.move(os.path.join(cwd, audio_file), os.path.join(
cwd, audio_file_new_path, audio_file))
audio_file = os.path.join(cwd, audio_file_new_path, audio_file)
proc = subprocess.Popen(["bash", "duration_of_mp3.sh", audio_file],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
proc.wait()
audio_length, err = proc.communicate()
print "AUDIO LENGTH = " + audio_length
audio_length = float(audio_length)
frame_rate = audio_length / num_images_downloaded
print "Number of images: " + str(num_images_downloaded)
print frame_rate
if frame_rate == 0:
frame_rate = 1
images_path = os.getcwd() + "/downloads/" + filename + "/*.jpg"
tmp_file1 = "output_t.mp4"
tmp_file = "output.mp4"
# Create Video File!
command_line = "ffmpeg -y -hide_banner -framerate 1/" + str(frame_rate) + " -pix_fmt yuvj420p -pattern_type glob -i \"" + str(images_path) + "\" -i \"" + str(
audio_file) + "\" -c:v libx264 -vf \" scale=1280:720:force_original_aspect_ratio=decrease,pad=1280:720:(ow-iw)/2:(oh-ih)/2\" -c:a aac -shortest \"" + str(tmp_file1) + "\""
args = shlex.split(command_line)
subprocess.call(args)
try:
subprocess.call(args)
except:
print "\n\nVideo Gneration FAILED!!\n\n\n"
return "fail"
# Add logo to the video
command_line = "ffmpeg -y -hide_banner -i " + \
str(tmp_file1) + " -i logo.png -max_muxing_queue_size 9999 -filter_complex \"overlay=main_w-overlay_w-5:main_h-overlay_h-5\" -codec:a copy " + str(tmp_file)
args = shlex.split(command_line)
subprocess.call(args)
# Add Text to the Video File!
if default_template:
print "\n\n\n Default template is being used!!!\n\n\n"
shutil.copyfile(os.path.join(os.getcwd(), tmp_file),
os.path.join(os.getcwd(), final_file))
else:
print "\n\n\n Default template not used as there are images!!!\n\n\n"
draw_text = "\"fontfile=/Library/Fonts/Arial\ Unicode.ttf:text='" + query.replace("_", " ").encode(
"utf8") + "': fontcolor=white: fontsize=55: box=1: boxcolor=black@0.5: boxborderw=5: x=(w-text_w)/10: y=(h-text_h)/10\""
command_line = "ffmpeg -y -hide_banner -i " + tmp_file + \
" -max_muxing_queue_size 9999 -vf drawtext=" + \
draw_text + " -codec:a copy " + final_file
args = shlex.split(command_line)
try:
subprocess.call(args)
except:
print "\n\n\nTEXT WRITING FAILED!!\n\n\n"
return "fail"
final_file_new_path = cwd + "/video/"+lang_country+"/"+day+"/"
print final_file_new_path
print final_file
if not os.path.exists(final_file_new_path):
os.makedirs(final_file_new_path)
if os.path.exists(os.path.join(os.getcwd(), final_file)):
shutil.move(os.path.join(cwd, final_file),
os.path.join(final_file_new_path, final_file))
return "success", wiki_summary, page_url, page_title
else:
return "fail", 0, 0, 0
def main():
day = datetime.today().strftime('%Y-%m-%d')
nf = open("languages.txt", "r")
lines = nf.readlines()
for line in lines:
lang = line[:2]
lang_country = line.strip('\n')
z = trending_queries(lang, lang_country)
text_file = "queries_done.txt"
for x in range(288, 289):
print " VALUE OF X : " + str(x)
query = z['items'][0]['articles'][x]['article']
print query
qd = open("queries_done.txt", "r")
queries_done = qd.readlines()
flag = 0
for l in queries_done:
if query in l and 'success' in l:
flag = 1
print l
break
if flag == 1:
print query + "already exists!!"
continue
result = "result"
# try:
result, wiki_summary, page_url, page_title = magic_func(
query, lang, lang_country, day)
# except: continue
magic_result = lang_country + " " + query + " result = " + result + "\n"
print "\n\n\n Magic Function result:" + magic_result + "\n\n\n"
if result == "fail":
continue
query1 = query.replace("_", " ")
print "\n\n\n\n\n\nin last phase query 1 = " + str(query1)
file_path = os.getcwd() + "/video/" + lang_country + "/" + \
day + "/" + query.replace(" ", "_") + ".mp4"
title_query_pre1 = "Who is"
title_query_pre2 = "What is"
title_query_pre3 = "Please click SUBSCRIBE button for more useful videos!"
translated_title_pre1 = translate_text_to_any(
title_query_pre1, lang)
translated_title_pre2 = translate_text_to_any(
title_query_pre2, lang)
translated_title_pre3 = translate_text_to_any(
title_query_pre3, lang)
title_query = translated_title_pre1 + " " + page_title + "? | " + \
translated_title_pre2 + " " + page_title + "?" + "| SoKnow - " + lang
description_query = title_query + "\n " + translated_title_pre3 + "\n\n" + wiki_summary + "\n\nSource: " + \
page_url + "\nCreative Commons License: https://creativecommons.org/licenses/by-sa/3.0/ \n\n"
keywords = query
keywords = keywords.replace("_", " ")
keywords = keywords.replace(")", " ")
keywords = keywords.replace("(", " ")
keywords = keywords.replace(":", " ")
keywords = keywords.replace(",", " ")
k = keywords + ", " + translated_title_pre1 + " " + \
keywords + ", " + translated_title_pre2 + " " + keywords
args = Namespace(auth_host_name='localhost', auth_host_port=[8080, 8090], category='22', description=description_query,
file=file_path, keywords=k, logging_level='ERROR', noauth_local_webserver=False, privacyStatus='public', title=title_query)
# print args
# upload_video.yt_up(args)
print "\n\n\n\n UPLOAD code Ran for " + query + "\n\n\n\n Continuing the loop... \n"
# CREATE TABLE queries_content (date text, title text, description text, keywords text, channel_id text, video_id text, lang text, lang_country text, upload_status text, metadata_status text);
#queries_content.db : ['date', 'title', 'description', 'keywords', 'channel_id', 'video_id', 'lang', 'lang_country']
conn = sqlite3.connect('queries_content.db')
c = conn.cursor()
sql_query = """INSERT INTO queries_content VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
c.execute('PRAGMA encoding="UTF-8";')
c.execute(sql_query, (day, title_query, description_query,
k, '0', '0', lang, lang_country, "yes", "no"))
conn.commit()
conn.close()
with codecs.open("queries_done_content.txt", 'a', encoding='utf8') as qc:
qc.write("title_query: \n"+title_query+"\n\n")
qc.write("description_query:\n" +
description_query+"\n\nEOD\n\n")
with codecs.open(text_file, 'a', encoding='utf8') as f:
f.write(magic_result)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment