Skip to content

Instantly share code, notes, and snippets.

View weizhou2273's full-sized avatar
🎯
Focusing

William Zhou weizhou2273

🎯
Focusing
  • NYC Data Science Academy
View GitHub Profile
~/spark-2.1.1-bin-hadoop2.7/bin/spark-submit --packages org.apache.hadoop:hadoop-aws:2.7.1 --driver-class-path ~/spark-2.1.1-bin-hadoop2.7/mysql-connector-java-5.1.42/mysql-connector-java-5.1.42-bin.jar --jars ~/spark-2.1.1-bin-hadoop2.7/mysql-connector-java-5.1.42/mysql-connector-java-5.1.42-bin.jar ~/capstone/spark_analysis.py
#install cron
sudo apt-get install cron
#Schedule task
sudo crontab -e
#Schedule runnning task every 2 minutes
*/2 * * * * source ./capstone/run.sh
#Save cron file
# Check your crontab task
sudo crontab -l
from __future__ import print_function
import sys
import re
from operator import add
import pandas as pd
from pyspark.sql.types import StructField, StructType, StringType
from pyspark.sql import Row
from pyspark.sql.types import *
from pyspark.sql import SQLContext
import json
# navigate to spark folder
cd spark-2.1.1-bin-hadoop2.7/
# Initiate spark console
./bin/pyspark
# Download spark from https://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.7.tgz
wget https://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.7.tgz
# Unzip spark
tar -xvzf spark-2.1.1-bin-hadoop2.7.tgz
# remove tgz file
rm spark-2.1.1-bin-hadoop2.7.tgz
# download mysql_driver from https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.42.tar.gz
wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.42.tar.gz
# unzip mysql driver
tar -xvzf mysql-connector-java-5.1.42.tar.gz
# remove tar.gz
rm mysql-connector-java-5.1.42.tar.gz
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
import time
import numpy as np
import pandas as pd
from google.cloud import language
import json
import boto3
import os
import boto3
from moto import mock_kinesis
import credentials
import sys
from botocore.exceptions import ClientError
aws_key_id = credentials.aws['key_id']
aws_key = credentials.aws['key']
sudo apt-get install pip
sudo pip install boto3
sudo pip install tweepy
sudo pip install time
sudo pip install json
sudo pip install os
sudp pip install uuid
sudo apt-get install python