This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_tweets(): | |
url = 'https://stream.twitter.com/1.1/statuses/filter.json' | |
query_data = [('language', 'en'), ('locations', '-130,-20,100,50')] | |
query_url = url + '?' + '&'.join([str(t[0]) + '=' + str(t[1]) for t in query_data]) | |
response = requests.get(query_url, auth=my_auth, stream=True) | |
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def send_tweets_to_spark(http_resp, tcp_connection): | |
for line in http_resp.iter_lines(): | |
try: | |
full_tweet = json.loads(line) | |
datetime = full_tweet['created_at'][:20] | |
tweet = full_tweet['text'] | |
print (f"---------------{datetime}--------------------------") | |
print(tweet) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def main(): | |
TCP_IP = "localhost" | |
TCP_PORT = 9009 | |
conn = None | |
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
s.bind((TCP_IP, TCP_PORT)) | |
s.listen(1) | |
print("Waiting for TCP connection...") | |
conn, addr = s.accept() | |
print("Connected... Starting getting tweets.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark import SparkContext | |
from pyspark.streaming import StreamingContext | |
def updateCount(newCounts, state): | |
if state == None: | |
return sum(newCounts) | |
else: | |
return state + sum(newCounts) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
from pyspark.sql.functions import explode | |
from pyspark.sql.functions import split | |
def main(): | |
spark = SparkSession\ | |
.builder\ | |
.appName("StructuredNetworkWordCount")\ | |
.getOrCreate() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests_oauthlib | |
def secure(): | |
path = '.../secret/' | |
# Load Twitter API secrets from a json file | |
secrets = json.loads(open(path + 'secrets.json').read()) | |
api_key = secrets['API_key'] | |
api_secret_key = secrets['API_secret_key'] | |
access_token = secrets['access_token'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
import socket | |
import sys | |
import twitter_secure as ts | |
# call the Twitter API URL and return the response for a stream of tweets | |
my_auth = ts.secure() | |
host = "localhost" |