Skip to content

Instantly share code, notes, and snippets.

@GabrielSGoncalves
Last active September 24, 2019 14:29
Show Gist options
  • Save GabrielSGoncalves/2997b36db55763de8ff6d0c292c1f268 to your computer and use it in GitHub Desktop.
Save GabrielSGoncalves/2997b36db55763de8ff6d0c292c1f268 to your computer and use it in GitHub Desktop.
First part of the NLP analysis for the Medium article on AWS ML/AI tools
from __future__ import print_function
import boto3
import os
import time
import pandas as pd
import matplotlib as plt
import logging
from botocore.exceptions import ClientError
from datetime import date
import json
import seaborn as sns
import spacy
import wmd
# 1) Create a dictionary with the URLs for each speech on Youtube
dict_urls_youtube = {
'churchill': 'https://www.youtube.com/watch?v=s_LncVnecLA',
'reagan': 'https://www.youtube.com/watch?v=5MDFX-dNtsM&t=6s',
'luther_king': 'https://www.youtube.com/watch?v=I47Y6VHc3Ms',
'macarthur': 'https://www.youtube.com/watch?v=_42_aLGkRpg&t=19s',
'kennedy': 'https://www.youtube.com/watch?v=QAmHcdwKgtQ'}
# 2) Download each speech in mp3 format using Youtube-DL
for url in list(dict_urls_youtube.values()):
print(f'Downloading audio file from the link: {url}')
os.system(f'youtube-dl -x --audio-format "mp3" {url}')
# 3) Create a Dataframe to store the information trough the analysis
df_audio = pd.DataFrame.from_dict(dict_urls_youtube,
orient='index',
columns=['youtube_urls'])
# 4) Linking the name of each audio file to the speaker
for audio_file in os.listdir('.'):
if 'churchill' in audio_file.lower():
df_audio.at['churchill','filename'] = audio_file
elif 'reagan' in audio_file.lower():
df_audio.at['reagan', 'filename'] = audio_file
elif 'king' in audio_file.lower():
df_audio.at['luther_king', 'filename'] = audio_file
elif 'macarthur' in audio_file.lower():
df_audio.at['macarthur', 'filename'] = audio_file
elif 'kennedy' in audio_file.lower():
df_audio.at['kennedy', 'filename'] = audio_file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment