Skip to content

Instantly share code, notes, and snippets.

View sanjeevai's full-sized avatar
🎧
➕ 💻

Sanjeev Yadav sanjeevai

🎧
➕ 💻
View GitHub Profile
@sanjeevai
sanjeevai / cassandra_etl_test3.py
Created June 20, 2019 13:21
Check if ETL process works correctly
query = "SELECT firstName, lastName FROM user_info WHERE song = 'All Hands \
Against His Own'"
rows = session.execute(query)
for row in rows:
print(row.firstname, row.lastname)
### OUTPUT
### firstName, lastName
Jacqueline, Lynch
Tegan, Levine
@sanjeevai
sanjeevai / cassandra_etl3.py
Created June 20, 2019 13:17
Transformation and loading part for the third query
# transformation and loading part of the ETL
file = 'event_datafile_new.csv'
with open(file, encoding='utf8') as f:
csvreader = csv.reader(f)
next(csvreader) # skip header
for line in csvreader:
# Assign the INSERT statements into the `query` variable
@sanjeevai
sanjeevai / cassandra_query3.py
Created June 20, 2019 13:08
Create user_info table
query3 = "CREATE TABLE IF NOT EXISTS user_info "
query3 = query3 + "(userId int, firstName text, lastName text, song text, \
PRIMARY KEY (song, userId))"
session.execute(query3)
@sanjeevai
sanjeevai / cassandra_etl_test2.py
Created June 20, 2019 13:02
Check if ETL process works correctly
query = "SELECT * FROM user_artist_relation WHERE userId = 10 AND sessionId = \
182"
rows = session.execute(query)
for row in rows:
print(row.artist, row.song, row.firstname, row.lastname, row.iteminsession)
### OUTPUT
### artist, song, firstname, .lastname, iteminsession
>>>
Down To The Bone, Keep On Keepin' On, Sylvie, Cruz, 0
@sanjeevai
sanjeevai / cassandra_etl2.py
Created June 20, 2019 12:52
Transformation and loading part for the second query
file = 'event_datafile_new.csv'
with open(file, encoding='utf8') as f:
csvreader = csv.reader(f)
next(csvreader) # skip header
for line in csvreader:
# Assign the INSERT statements into the `query` variable
@sanjeevai
sanjeevai / cassandra_query2.py
Created June 20, 2019 12:36
Create user_artist_relation table
query2 = "CREATE TABLE IF NOT EXISTS user_artist_relation "
query2 = query2 + "(artist text, song text, firstName text, lastName text, \
userId int, sessionId int, itemInSession int, \
PRIMARY KEY ((userId, sessionId), itemInSession))"
session.execute(query2)
@sanjeevai
sanjeevai / cassandra_etl_test1.py
Last active June 20, 2019 13:06
Check if ETL process works correctly
query = "SELECT * FROM discography WHERE sessionId = 338 AND itemInSession = 4"
rows = session.execute(query)
for row in rows:
print(row.artist, row.song, row.length, row.sessionid, row.iteminsession)
### OUTPUT
### artist, song, length, sessionId, itemInSession
>>> Faithless, Music Matters (Mark Knight Dub), 495.30731201171875, 338 4
@sanjeevai
sanjeevai / cassandra_etl1.py
Last active June 20, 2019 07:07
Transformation and loading part for the first query
file = 'event_datafile_new.csv'
with open(file, encoding = 'utf8') as f:
csvreader = csv.reader(f)
next(csvreader) # skip header
for line in csvreader:
# Assign the INSERT statements into the `query` variable
query = "INSERT INTO discography (artist, song, length, sessionId, \
@sanjeevai
sanjeevai / cassandra_query1.py
Created June 20, 2019 06:38
Create discography table
query1 = "CREATE TABLE IF NOT EXISTS discography "
query1 = query1 + "(artist text, song text, length float, sessionId int, \
itemInSession int, PRIMARY KEY (sessionId, itemInSession))"
session.execute(query1)
@sanjeevai
sanjeevai / postgres_connect.py
Created June 20, 2019 05:40
Connect to Postgres database
conn = psycopg2.connect("host=127.0.0.1 dbname=studentdb user=#### password=####")
cur = conn.cursor()