Skip to content

Instantly share code, notes, and snippets.

import luigi
from luigi.contrib.s3 import S3Target, S3Client
import spotipy
import spotipy.util as util
import csv
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import date
from time import strftime
import luigi
from luigi.contrib.s3 import S3Target, S3Client
import spotipy
import spotipy.util as util
import csv
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import date
from time import strftime
# import libraries
import requests
import numpy as np
import sys
# retrieve command line arguments for API IP address
host = sys.argv[1]
# Set a query text
params ={'query': "Hmm. Human Music. I like it."}
import requests
url = 'http://[external_IP_address_of_your_app]:8080'
params ={'query': """Listen Morty, I hate to break it to you,
but what people call ‘love’ is just a
chemical reaction that compels animals to breed."""}
response = requests.get(url, params)
print(response.json())
# import libraries needed for the code to run
import re
import pyspark as ps
from pyspark.ml import PipelineModel
from pyspark.sql import functions as f
from pyspark.sql import types as t
from flask import Flask
from flask_restful import reqparse, abort, Api, Resource
#define regex pattern for preprocessing
#! /bin/bash
#update the package list
sudo apt-get update
#install JDK8 and PIP
# sudo apt-get install -y openjdk-8-jdk python-pip python-dev build-essential
sudo apt-get install -y openjdk-8-jdk python-pip
#install required Python packages using pip
#pyspark uses --no-cache-dir option to prevent memory error due to the big packae size
pip install Flask==0.12.2 pyspark==2.3.0 --no-cache-dir flask-restful==0.3.7 numpy==1.15.3
#import libraries
import sys
import pyspark as ps
import warnings
import re
from pyspark.sql import functions as f
from pyspark.sql import types as t
from pyspark.sql.types import StringType
from pyspark.ml.feature import Tokenizer, NGram, CountVectorizer, IDF, StringIndexer, VectorAssembler
from pyspark.ml import Pipeline
# import libraries
import pandas as pd
import numpy as np
# set the names for each column
cols = ['sentiment','id','date','query_string','user','text']
def main():
# read training data with ISO-8859-1 encoding and column names set above
df = pd.read_csv('temp/training.1600000.processed.noemoticon.csv', encoding = 'ISO-8859-1',names=cols)
# shuffle the data
#!/bin/bash
# download wget with homebrew
brew install wget
# make temporary directory to download the data
# cd into the directory and download, unzip
mkdir temp
cd temp
wget http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
unzip trainingandtestdata.zip
# delete the zip file and the test data
import pandas as pd
import numpy as np
data = pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")
import os
import struct
from scipy.io import wavfile as wav
import matplotlib.pyplot as plt
import IPython.display as ipd