Ricky Kim tthustla

## run_luigi.py
import luigi
from luigi.contrib.s3 import S3Target, S3Client
import spotipy
import spotipy.util as util
import csv
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import date
from time import strftime

## run_luigi.py
import luigi
from luigi.contrib.s3 import S3Target, S3Client
import spotipy
import spotipy.util as util
import csv
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import date
from time import strftime

## response_time.py
# import libraries
import requests
import numpy as np
import sys

# retrieve command line arguments for API IP address
host = sys.argv[1]
# Set a query text
params ={'query': "Hmm. Human Music. I like it."}

## api_call.py
import requests
url = 'http://[external_IP_address_of_your_app]:8080'
params ={'query': """Listen Morty, I hate to break it to you,
                    but what people call ‘love’ is just a
                    chemical reaction that compels animals to breed."""}
response = requests.get(url, params)
print(response.json())

## main.py
# import libraries needed for the code to run
import re
import pyspark as ps
from pyspark.ml import PipelineModel
from pyspark.sql import functions as f
from pyspark.sql import types as t
from flask import Flask
from flask_restful import reqparse, abort, Api, Resource

#define regex pattern for preprocessing

## install.sh
#! /bin/bash
#update the package list
sudo apt-get update
#install JDK8 and PIP
# sudo apt-get install -y openjdk-8-jdk python-pip python-dev build-essential
sudo apt-get install -y openjdk-8-jdk python-pip
#install required Python packages using pip
#pyspark uses --no-cache-dir option to prevent memory error due to the big packae size
pip install Flask==0.12.2 pyspark==2.3.0 --no-cache-dir flask-restful==0.3.7 numpy==1.15.3

## pyspark_sa.py
#import libraries
import sys
import pyspark as ps
import warnings
import re
from pyspark.sql import functions as f
from pyspark.sql import types as t
from pyspark.sql.types import StringType
from pyspark.ml.feature import Tokenizer, NGram, CountVectorizer, IDF, StringIndexer, VectorAssembler
from pyspark.ml import Pipeline

## train_test_split.py
# import libraries
import pandas as pd
import numpy as np

# set the names for each column
cols = ['sentiment','id','date','query_string','user','text']
def main():
	# read training data with ISO-8859-1 encoding and column names set above
	df = pd.read_csv('temp/training.1600000.processed.noemoticon.csv', encoding = 'ISO-8859-1',names=cols)
	# shuffle the data

## data_prep.sh
#!/bin/bash
# download wget with homebrew
brew install wget
# make temporary directory to download the data
# cd into the directory and download, unzip
mkdir temp
cd temp
wget http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
unzip trainingandtestdata.zip
# delete the zip file and the test data

## uscpart2_01.py
import pandas as pd
import numpy as np

data = pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")

import os
import struct
from scipy.io import wavfile as wav
import matplotlib.pyplot as plt
import IPython.display as ipd
	import luigi
	from luigi.contrib.s3 import S3Target, S3Client
	import spotipy
	import spotipy.util as util
	import csv
	import smtplib
	from email.mime.multipart import MIMEMultipart
	from email.mime.text import MIMEText
	from datetime import date
	from time import strftime
	# import libraries
	import requests
	import numpy as np
	import sys

	# retrieve command line arguments for API IP address
	host = sys.argv[1]
	# Set a query text
	params ={'query': "Hmm. Human Music. I like it."}
	import requests
	url = 'http://[external_IP_address_of_your_app]:8080'
	params ={'query': """Listen Morty, I hate to break it to you,
	but what people call ‘love’ is just a
	chemical reaction that compels animals to breed."""}
	response = requests.get(url, params)
	print(response.json())
	# import libraries needed for the code to run
	import re
	import pyspark as ps
	from pyspark.ml import PipelineModel
	from pyspark.sql import functions as f
	from pyspark.sql import types as t
	from flask import Flask
	from flask_restful import reqparse, abort, Api, Resource

	#define regex pattern for preprocessing
	#! /bin/bash
	#update the package list
	sudo apt-get update
	#install JDK8 and PIP
	# sudo apt-get install -y openjdk-8-jdk python-pip python-dev build-essential
	sudo apt-get install -y openjdk-8-jdk python-pip
	#install required Python packages using pip
	#pyspark uses --no-cache-dir option to prevent memory error due to the big packae size
	pip install Flask==0.12.2 pyspark==2.3.0 --no-cache-dir flask-restful==0.3.7 numpy==1.15.3
	#import libraries
	import sys
	import pyspark as ps
	import warnings
	import re
	from pyspark.sql import functions as f
	from pyspark.sql import types as t
	from pyspark.sql.types import StringType
	from pyspark.ml.feature import Tokenizer, NGram, CountVectorizer, IDF, StringIndexer, VectorAssembler
	from pyspark.ml import Pipeline
	# import libraries
	import pandas as pd
	import numpy as np

	# set the names for each column
	cols = ['sentiment','id','date','query_string','user','text']
	def main():
	# read training data with ISO-8859-1 encoding and column names set above
	df = pd.read_csv('temp/training.1600000.processed.noemoticon.csv', encoding = 'ISO-8859-1',names=cols)
	# shuffle the data
	#!/bin/bash
	# download wget with homebrew
	brew install wget
	# make temporary directory to download the data
	# cd into the directory and download, unzip
	mkdir temp
	cd temp
	wget http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
	unzip trainingandtestdata.zip
	# delete the zip file and the test data
	import pandas as pd
	import numpy as np

	data = pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")

	import os
	import struct
	from scipy.io import wavfile as wav
	import matplotlib.pyplot as plt
	import IPython.display as ipd