GabrielSGoncalves GabrielSGoncalves

## lambda_function.py
import json
from io import StringIO
import boto3
import os
import pandas as pd


def write_dataframe_to_csv_on_s3(dataframe, filename, bucket):
    """ Write a dataframe to a CSV on S3 """

## invoke_lambda.py
import boto3
import json
import sys

BUCKET = sys.argv[1]
KEY = sys.argv[2]
OUTPUT = sys.argv[3]
GROUP = sys.argv[4]
COLUMN = sys.argv[5]
CREDENTIALS = sys.argv[6]

## fifa19_kaggle.csv
,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value,Wage,Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height,Weight,LS,ST,RS,LW,LF,CF,RF,RW,LAM,CAM,RAM,LM,LCM,CM,RCM,RM,LWB,LDM,CDM,RDM,RWB,LB,LCB,CB,RCB,RB,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,https://cdn.sofifa.org/teams/2/light/241.png,€110.5M,€565K,2202,Left,5,4,4,Medium/ Medium,Messi,Yes,RF,10,"Jul 1, 2004",,2021,5'7,159lbs,88+2,88+2,88+2,92+2,93+2,93+2,93+

## fifa19_output.csv

          
            Club
            Overall

            
              Juventus
              82.28

            
              Napoli
              80.0

            
              Inter
              79.75

            
              Real Madrid
              78.24

            
              Milan
              78.07

            
              FC Barcelona
              78.03

            
              Paris Saint-Germain
              77.43

            
              Roma
              77.42

            
              Manchester United
              77.24

## nlp_aws_medium_part1.py
from __future__ import print_function
import boto3
import os
import time
import pandas as pd
import matplotlib as plt
import logging
from botocore.exceptions import ClientError
from datetime import date
import json

## nlp_aws_medium_part2.py
# 5) Creating a new S3 bucket to upload the audio files
bucket_name = 'medium-nlp-aws'
client_s3 = boto3.client('s3')
client_s3.create_bucket(Bucket=bucket_name)


# 6) Uploading the files to the created bucket
for audio_file in df_audio.filename.values:
    print(audio_file)
    client_s3.upload_file(audio_file, bucket_name,  audio_file)

## nlp_aws_medium_part3.py
# 10) Function to get text from the JSON file generated using Amazon Transcribe
def get_text_from_json(bucket, key):
    s3 = boto3.client('s3')
    object = s3.get_object(Bucket=bucket, Key=key)
    serializedObject = object['Body'].read()
    data = json.loads(serializedObject)
    return data.get('results').get('transcripts')[0].get('transcript')

# 11) Reading the original transcription from the JSON file
with open('original_transcripts.json', 'r') as f:

## nlp_aws_medium_part5.py
# 16) Function to call Amazon Comprehend service using boto3
def start_comprehend_job(text):
    """
    Executes sentiment analysis of a text using Amazon Comprehend.
    The text can be larger than 5000 bytes (one limitation for each job), as
    the function will split it into multiple processes and return a
    averaged value for each sentiment.

    Parameter
    - text (str): The text to be analyzed

## nlp_aws_medium_part4.py
# 15) Iterate over the speakers and apply spaCy visualizer on each speech
for index, row in df_audio.iterrows():
    print(f"Rendering {index}'s texts")
    nlp = spacy.load('en_core_web_lg')
    original_transcription = nlp(original_transcriptions.get(index))
    transcribe_transcription = nlp(get_text_from_json(bucket_name, row.json_transcription))

    svg_original = spacy.displacy.render(original_transcription, style="ent",jupyter=False)
    svg_transcribe = spacy.displacy.render(transcribe_transcription, style="ent",jupyter=False)


## pyowm_workflow_1.ipynb

      
              1 file
            
          
              1 fork
            
          
              1 comment
            
          
              0 stars
            
          
                GabrielSGoncalves
                / pyowm_workflow_1.ipynb
            
            
              Last active
              November 10, 2022 09:15
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	import json
	from io import StringIO
	import boto3
	import os
	import pandas as pd


	def write_dataframe_to_csv_on_s3(dataframe, filename, bucket):
	""" Write a dataframe to a CSV on S3 """
	import boto3
	import json
	import sys

	BUCKET = sys.argv[1]
	KEY = sys.argv[2]
	OUTPUT = sys.argv[3]
	GROUP = sys.argv[4]
	COLUMN = sys.argv[5]
	CREDENTIALS = sys.argv[6]
	Club	Overall
	Juventus	82.28
	Napoli	80.0
	Inter	79.75
	Real Madrid	78.24
	Milan	78.07
	FC Barcelona	78.03
	Paris Saint-Germain	77.43
	Roma	77.42
	Manchester United	77.24
	from __future__ import print_function
	import boto3
	import os
	import time
	import pandas as pd
	import matplotlib as plt
	import logging
	from botocore.exceptions import ClientError
	from datetime import date
	import json
	# 5) Creating a new S3 bucket to upload the audio files
	bucket_name = 'medium-nlp-aws'
	client_s3 = boto3.client('s3')
	client_s3.create_bucket(Bucket=bucket_name)


	# 6) Uploading the files to the created bucket
	for audio_file in df_audio.filename.values:
	print(audio_file)
	client_s3.upload_file(audio_file, bucket_name, audio_file)
	# 10) Function to get text from the JSON file generated using Amazon Transcribe
	def get_text_from_json(bucket, key):
	s3 = boto3.client('s3')
	object = s3.get_object(Bucket=bucket, Key=key)
	serializedObject = object['Body'].read()
	data = json.loads(serializedObject)
	return data.get('results').get('transcripts')[0].get('transcript')

	# 11) Reading the original transcription from the JSON file
	with open('original_transcripts.json', 'r') as f:
	# 16) Function to call Amazon Comprehend service using boto3
	def start_comprehend_job(text):
	"""
	Executes sentiment analysis of a text using Amazon Comprehend.
	The text can be larger than 5000 bytes (one limitation for each job), as
	the function will split it into multiple processes and return a
	averaged value for each sentiment.

	Parameter
	- text (str): The text to be analyzed
	# 15) Iterate over the speakers and apply spaCy visualizer on each speech
	for index, row in df_audio.iterrows():
	print(f"Rendering {index}'s texts")
	nlp = spacy.load('en_core_web_lg')
	original_transcription = nlp(original_transcriptions.get(index))
	transcribe_transcription = nlp(get_text_from_json(bucket_name, row.json_transcription))

	svg_original = spacy.displacy.render(original_transcription, style="ent",jupyter=False)
	svg_transcribe = spacy.displacy.render(transcribe_transcription, style="ent",jupyter=False)