-
-
Save sudcha23/2fb16778e97962d26150753533e0107f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# export GOOGLE_APPLICATION_CREDENTIALS="mysecretcodefilehere.json" | |
# author: sudcha23 | |
import wikipedia | |
import re | |
import codecs | |
import subprocess | |
import math | |
import subprocess | |
import os | |
import json | |
import requests | |
import shutil | |
import shlex | |
from PIL import Image, ImageDraw, ImageFont | |
from datetime import datetime | |
from google.cloud import translate | |
from argparse import Namespace | |
import upload_video | |
import GCS_texttovoice | |
import image_search | |
import pdb | |
import sqlite3 | |
def transliterate(string): | |
from unidecode import unidecode | |
if not isinstance(string, bytes): | |
string = u''.join(string) | |
return unidecode(string) | |
def get_wiki_data(query, lang, lang_country): | |
wikipedia.set_lang(lang) | |
query = query.replace("_", " ") | |
err_status = False | |
summary = wikipedia.summary(query) | |
page = wikipedia.page(query) | |
return summary, page.url, page.title, err_status | |
def cleanup_content(summary): | |
result = re.sub("[\(\[].*?[\)\]]", "", summary) # Removes content in () | |
print("content cleaned") | |
return result | |
def create_image_file(query, filename): | |
img = Image.new('RGB', (1280, 720), color=(30, 30, 30)) | |
fnt = ImageFont.truetype( | |
'/Library/Fonts/Arial Unicode.ttf', 80, encoding="utf-8") | |
d = ImageDraw.Draw(img) | |
query = query.replace("_", " ") | |
d.text((240, 160), query, font=fnt, fill=(755, 20, 0)) | |
img.save(os.getcwd() + "/downloads/" + filename+"/default.jpg") | |
print "image file created" | |
return 0 | |
def get_language_code(): | |
lang = 'en' | |
lang_country = 'en-US' | |
return lang, lang_country | |
def trending_queries(lang, country): | |
wiki_string = "https://wikimedia.org/api/rest_v1/metrics/pageviews/top/" + \ | |
lang + ".wikipedia/all-access/2019/06/20" | |
print wiki_string | |
resp = requests.get(wiki_string) | |
if resp.status_code != 200: | |
# This means something went wrong. | |
raise ApiError('GET /tasks/ {}'.format(resp.status_code)) | |
z = resp.json() | |
y_length = len(z['items'][0]['articles']) | |
print z['items'][0]['articles'][24] | |
print y_length | |
return z | |
def rename_files(query): | |
i = 1 | |
dir = os.getcwd() + "/downloads/" + query + "/" | |
# print dir + "in rename function now!" | |
for filename in os.listdir(dir): | |
dst = dir + str(i) + ".jpg" | |
src = os.getcwd() + "/downloads/" + query + "/" + filename | |
os.rename(src, dst) | |
i += 1 | |
# print dst | |
print "number of files!!" + str(i) | |
return i - 1 | |
def init(): | |
return 'en', 'en-US', datetime.today().strftime('%Y-%m-%d') | |
def translate_text_to_en(query): | |
translate_client = translate.Client() | |
translation = translate_client.translate( | |
query.encode('utf8'), target_language='en') | |
print(u'Translation: {}'.format(translation['translatedText'])) | |
return translation['translatedText'] | |
def translate_text_to_any(query, lang): | |
translate_client = translate.Client() | |
translation = translate_client.translate( | |
query.encode('utf8'), target_language=lang) | |
print(u'Translation: {}'.format(translation['translatedText'])) | |
return translation['translatedText'] | |
def magic_func(query, lang, lang_country, day): | |
filename = query.replace(" ", "_") | |
text_file = filename + '.txt' | |
audio_file = filename + '.mp3' | |
final_file = filename + '.mp4' | |
print "QUERY + " + query | |
cwd = os.getcwd() | |
wiki_summary, page_url, page_title, err_status = get_wiki_data( | |
query, lang, lang_country) | |
if err_status == True: | |
return "fail" | |
with codecs.open(text_file, 'w', encoding='utf8') as f: | |
f.write(wiki_summary) | |
text_file_new_path = "text/"+lang_country+"/"+day+"/" | |
if not os.path.exists(text_file_new_path): | |
os.makedirs(text_file_new_path) | |
shutil.move(os.path.join(cwd, text_file), os.path.join( | |
cwd, text_file_new_path, text_file)) | |
text_file = os.path.join(cwd, text_file_new_path, text_file) | |
print "\n\n DOWNLOADING IMAGES NOW\n\n" | |
# DOWNLOADING IMAGES | |
query1 = query.replace("_", " ") | |
#query = re.sub('\W+',' ', query) | |
query1 = query1.replace("(", " ") | |
query1 = query1.replace(")", " ") | |
query1 = query1.replace(":", " ") | |
query_en = translate_text_to_en(query1) | |
#query_transliterated_en = transliterate(query) | |
shell_command = "googleimagesdownload -k \"" + query_en + \ | |
"\" -i \"" + query.replace(" ", "_") + "\" -l 7" | |
print shell_command | |
subprocess.call(shlex.split(shell_command)) | |
dir = os.getcwd() + "/downloads/" + query.replace(" ", "_") + "/" | |
if not os.path.exists(dir): | |
os.makedirs(dir) | |
num_images_downloaded = 0 | |
default_template = False | |
print dir | |
for counter in os.listdir(dir.decode('utf-8')): | |
if counter.find(".jpg") != -1: | |
num_images_downloaded += 1 | |
# If no images were downloaded, let's create the default template! | |
if num_images_downloaded == 0: | |
create_image_file(query, filename) | |
default_template = True | |
num_images_downloaded = 1 | |
# Create text to speech! | |
resp = "success" | |
resp = GCS_texttovoice.textToSpeech1(filename, lang, lang_country, day) | |
if resp == "fail": | |
return "fail" | |
audio_file_new_path = "audio/"+lang_country+"/"+day+"/" | |
if not os.path.exists(audio_file_new_path): | |
os.makedirs(audio_file_new_path) | |
shutil.move(os.path.join(cwd, audio_file), os.path.join( | |
cwd, audio_file_new_path, audio_file)) | |
audio_file = os.path.join(cwd, audio_file_new_path, audio_file) | |
proc = subprocess.Popen(["bash", "duration_of_mp3.sh", audio_file], | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
proc.wait() | |
audio_length, err = proc.communicate() | |
print "AUDIO LENGTH = " + audio_length | |
audio_length = float(audio_length) | |
frame_rate = audio_length / num_images_downloaded | |
print "Number of images: " + str(num_images_downloaded) | |
print frame_rate | |
if frame_rate == 0: | |
frame_rate = 1 | |
images_path = os.getcwd() + "/downloads/" + filename + "/*.jpg" | |
tmp_file1 = "output_t.mp4" | |
tmp_file = "output.mp4" | |
# Create Video File! | |
command_line = "ffmpeg -y -hide_banner -framerate 1/" + str(frame_rate) + " -pix_fmt yuvj420p -pattern_type glob -i \"" + str(images_path) + "\" -i \"" + str( | |
audio_file) + "\" -c:v libx264 -vf \" scale=1280:720:force_original_aspect_ratio=decrease,pad=1280:720:(ow-iw)/2:(oh-ih)/2\" -c:a aac -shortest \"" + str(tmp_file1) + "\"" | |
args = shlex.split(command_line) | |
subprocess.call(args) | |
try: | |
subprocess.call(args) | |
except: | |
print "\n\nVideo Gneration FAILED!!\n\n\n" | |
return "fail" | |
# Add logo to the video | |
command_line = "ffmpeg -y -hide_banner -i " + \ | |
str(tmp_file1) + " -i logo.png -max_muxing_queue_size 9999 -filter_complex \"overlay=main_w-overlay_w-5:main_h-overlay_h-5\" -codec:a copy " + str(tmp_file) | |
args = shlex.split(command_line) | |
subprocess.call(args) | |
# Add Text to the Video File! | |
if default_template: | |
print "\n\n\n Default template is being used!!!\n\n\n" | |
shutil.copyfile(os.path.join(os.getcwd(), tmp_file), | |
os.path.join(os.getcwd(), final_file)) | |
else: | |
print "\n\n\n Default template not used as there are images!!!\n\n\n" | |
draw_text = "\"fontfile=/Library/Fonts/Arial\ Unicode.ttf:text='" + query.replace("_", " ").encode( | |
"utf8") + "': fontcolor=white: fontsize=55: box=1: boxcolor=black@0.5: boxborderw=5: x=(w-text_w)/10: y=(h-text_h)/10\"" | |
command_line = "ffmpeg -y -hide_banner -i " + tmp_file + \ | |
" -max_muxing_queue_size 9999 -vf drawtext=" + \ | |
draw_text + " -codec:a copy " + final_file | |
args = shlex.split(command_line) | |
try: | |
subprocess.call(args) | |
except: | |
print "\n\n\nTEXT WRITING FAILED!!\n\n\n" | |
return "fail" | |
final_file_new_path = cwd + "/video/"+lang_country+"/"+day+"/" | |
print final_file_new_path | |
print final_file | |
if not os.path.exists(final_file_new_path): | |
os.makedirs(final_file_new_path) | |
if os.path.exists(os.path.join(os.getcwd(), final_file)): | |
shutil.move(os.path.join(cwd, final_file), | |
os.path.join(final_file_new_path, final_file)) | |
return "success", wiki_summary, page_url, page_title | |
else: | |
return "fail", 0, 0, 0 | |
def main(): | |
day = datetime.today().strftime('%Y-%m-%d') | |
nf = open("languages.txt", "r") | |
lines = nf.readlines() | |
for line in lines: | |
lang = line[:2] | |
lang_country = line.strip('\n') | |
z = trending_queries(lang, lang_country) | |
text_file = "queries_done.txt" | |
for x in range(288, 289): | |
print " VALUE OF X : " + str(x) | |
query = z['items'][0]['articles'][x]['article'] | |
print query | |
qd = open("queries_done.txt", "r") | |
queries_done = qd.readlines() | |
flag = 0 | |
for l in queries_done: | |
if query in l and 'success' in l: | |
flag = 1 | |
print l | |
break | |
if flag == 1: | |
print query + "already exists!!" | |
continue | |
result = "result" | |
# try: | |
result, wiki_summary, page_url, page_title = magic_func( | |
query, lang, lang_country, day) | |
# except: continue | |
magic_result = lang_country + " " + query + " result = " + result + "\n" | |
print "\n\n\n Magic Function result:" + magic_result + "\n\n\n" | |
if result == "fail": | |
continue | |
query1 = query.replace("_", " ") | |
print "\n\n\n\n\n\nin last phase query 1 = " + str(query1) | |
file_path = os.getcwd() + "/video/" + lang_country + "/" + \ | |
day + "/" + query.replace(" ", "_") + ".mp4" | |
title_query_pre1 = "Who is" | |
title_query_pre2 = "What is" | |
title_query_pre3 = "Please click SUBSCRIBE button for more useful videos!" | |
translated_title_pre1 = translate_text_to_any( | |
title_query_pre1, lang) | |
translated_title_pre2 = translate_text_to_any( | |
title_query_pre2, lang) | |
translated_title_pre3 = translate_text_to_any( | |
title_query_pre3, lang) | |
title_query = translated_title_pre1 + " " + page_title + "? | " + \ | |
translated_title_pre2 + " " + page_title + "?" + "| SoKnow - " + lang | |
description_query = title_query + "\n " + translated_title_pre3 + "\n\n" + wiki_summary + "\n\nSource: " + \ | |
page_url + "\nCreative Commons License: https://creativecommons.org/licenses/by-sa/3.0/ \n\n" | |
keywords = query | |
keywords = keywords.replace("_", " ") | |
keywords = keywords.replace(")", " ") | |
keywords = keywords.replace("(", " ") | |
keywords = keywords.replace(":", " ") | |
keywords = keywords.replace(",", " ") | |
k = keywords + ", " + translated_title_pre1 + " " + \ | |
keywords + ", " + translated_title_pre2 + " " + keywords | |
args = Namespace(auth_host_name='localhost', auth_host_port=[8080, 8090], category='22', description=description_query, | |
file=file_path, keywords=k, logging_level='ERROR', noauth_local_webserver=False, privacyStatus='public', title=title_query) | |
# print args | |
# upload_video.yt_up(args) | |
print "\n\n\n\n UPLOAD code Ran for " + query + "\n\n\n\n Continuing the loop... \n" | |
# CREATE TABLE queries_content (date text, title text, description text, keywords text, channel_id text, video_id text, lang text, lang_country text, upload_status text, metadata_status text); | |
#queries_content.db : ['date', 'title', 'description', 'keywords', 'channel_id', 'video_id', 'lang', 'lang_country'] | |
conn = sqlite3.connect('queries_content.db') | |
c = conn.cursor() | |
sql_query = """INSERT INTO queries_content VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" | |
c.execute('PRAGMA encoding="UTF-8";') | |
c.execute(sql_query, (day, title_query, description_query, | |
k, '0', '0', lang, lang_country, "yes", "no")) | |
conn.commit() | |
conn.close() | |
with codecs.open("queries_done_content.txt", 'a', encoding='utf8') as qc: | |
qc.write("title_query: \n"+title_query+"\n\n") | |
qc.write("description_query:\n" + | |
description_query+"\n\nEOD\n\n") | |
with codecs.open(text_file, 'a', encoding='utf8') as f: | |
f.write(magic_result) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment