This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# USING PANDAS | |
import pandas as pd | |
import os | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
current_dir = os.getcwd() | |
logging.info('Datasets will be read from current directory %s', current_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import * | |
from pyspark.sql import functions as f | |
from pyspark.sql import Window | |
dataframe =(spark.read.option("header","true").csv("/FileStore/sales_5000000.csv")) | |
#dataframe.show() | |
df = dataframe.drop("Country", "Sales Channel", "Order ID", 'Ship Date', 'Units Sold', 'Unit Price', 'Unit Cost', 'Total Cost', 'Total Profit' ,'Order Priority')\ | |
.withColumn('Order Date', to_date(col('Order Date'),'M/dd/yyyy'))\ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import json | |
import random | |
import time | |
import uuid | |
from faker import Faker | |
from dataclasses import dataclass, field | |
s3_resource = boto3.resource('s3') | |
#s3_resource.create_bucket(Bucket='data-stream-dump',CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#APPLY MANIPULATION TO CREATE NEW DF | |
df_final = (df.withColumn("order_id", df["Order ID"]).drop("Order ID") | |
.withColumn("order_date", to_date(col("Order Date"), "M/d/yyyy")).drop("Order Date") | |
.withColumn("item_type", df["Item Type"]).drop("Item Type") | |
.withColumn("sales_channel", df["Sales Channel"]).drop("Sales Channel") | |
.withColumn("units_sold", df["Units Sold"].cast('float')).drop("Units Sold") | |
.withColumn("unit_price", df["Unit Price"].cast('float')).drop("Unit Price") | |
.withColumn("total_revenue", df["Total Revenue"].cast("float")).drop("Total Revenue") | |
.drop("Region", "Country", "Order Priority", "Ship Date", "Total Profit", "Total Cost", "Unit Cost") | |
.distinct() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Example | |
# Input: nums = [-6, 1, 0, 3, 18] | |
# Output: [-1, 0, 9, 36, 324] | |
# Explanation: Only -6, 3 and 18 are divisible by 3 and will be squared. | |
# After squaring and sorting the list, it becomes [-1, 0, 9, 36, 324] | |
nums = [-6,-1,0,3,8,12] | |
def solution(nums): | |
return sorted([nums[i]**2 if nums[i]%3 ==0 else nums[i] for i in range(len(nums))]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%time #outputs the execution time of the python statement | |
fx_revenues.to_sql('table_name', con = engine, if_exists = 'replace', index= False, schema = "schema_name") | |
connection.execute('grant select on schema_name.table_name to your_username;') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### INPUT YOUR CREDENTIALS ### | |
DATABASE = "database_name" | |
USER = "your_user_name" | |
PASSWORD = "your_pw" | |
HOST = "company_name.eu-west-1.redshift.amazonaws.com" | |
PORT = "5439" | |
### CREATE A CONNECTION TO REDSHIFT DB | |
connection_string = "postgresql+psycopg2://%s:%s@%s:%s/%s" % (USER,PASSWORD,HOST,str(PORT),DATABASE) | |
engine = sa.create_engine(connection_string) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def gsheet2df(spreadsheet_name, sheet_num): | |
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive'] | |
credentials_path = 'tutorial-sa-b04b423afd77.json' | |
credentials = sac.from_json_keyfile_name(credentials_path, scope) | |
client = gspread.authorize(credentials) | |
sheet = client.open(spreadsheet_name).get_worksheet(sheet_num).get_all_records() | |
df = pd.DataFrame.from_dict(sheet) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gspread | |
from oauth2client.service_account import ServiceAccountCredentials as sac | |
import pandas as pd | |
import sqlalchemy as sa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ git checkout master | |
Switched to branch ‘master’ | |
$ git merge project1/add_commands | |
Updating b3088e3..cdb97d4 | |
Fast-forward | |
git_commands.md | 8 +++++++- | |
1 file changed, 7 insertions(+), 1 deletion(-) |
NewerOlder