Skip to content

Instantly share code, notes, and snippets.

View jamesthomson's full-sized avatar

James Thomson jamesthomson

View GitHub Profile
%pyspark
#read in datafile
data = sc.textFile('s3://bucket/clustering/zep_tracks.csv')
#read as rdd
import csv
rdd = data.mapPartitions(lambda x: csv.reader(x))
#convert to dataframe
dataframe = rdd.toDF(['artist','artist_id','album','album_id','track','track_id','track_number','track_length',
'preview_url','danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness',
library(httr)
country = "GB"
albumType = "album"
artist="Led Zeppelin"
#get artist id
url <- paste0("https://api.spotify.com/v1/search?q=", sub(" ", "%20", artist), "&type=artist")
search <- content(GET(url))
search$artists$items[[1]]
library(plotly)
#read in data
final<-read.csv("outputFile.txt", header=FALSE)[,-c(1,2)]
original<-read.csv("zep_tracks.csv", header=FALSE)
#join together
merged<-merge(original, final, by.x="V5", by.y="V7")
#grab key vars and fix labels
@jamesthomson
jamesthomson / twitter_following_cleanup.R
Created March 17, 2017 12:21
clean up twitter following
library(twitteR)
#twitter authorisation
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)
user<-getUser("inspirationinf")
@jamesthomson
jamesthomson / audio_signal_processing.py
Last active October 25, 2021 14:52
Audio signal processing
#required libraries
import urllib
import scipy.io.wavfile
import pydub
#a temp folder for downloads
temp_folder="/Users/home/Desktop/"
#spotify mp3 sample file