Ryan Wesslen wesslen

## min-char-rnn.py
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

## TwitterConvertEST.R
# from http://stackoverflow.com/questions/4696842/convert-twitter-timestamp-in-r
# assume original field is postedTime

str <- strptime(postedTime, "%Y-%m-%dT%H:%M:%S", tz = "GMT")
dt.gmt <- as.POSIXct(str, tz = "GMT")
postedTime <- format(dt.gmt, tz = "EST", usetz = TRUE)

## CombineMultipleJsonFilesPySpark.py
from pyspark.sql import SQLContext
from pyspark.sql.functions import substring

sqlContext = SQLContext(sc)

jobDir = "/user/rwesslen/tweets/Pres_Tweets/"
jobName = "presTweet"

tweets = sqlContext.read.format('json').load([jobDir + "*.json"])
tweets.coalesce(1).toJSON().saveAsTextFile(jobDir + jobName)

## hashtag-filter.py
# filters all tweets that mention the hashtag #lovetrumpshate from the data frame tweets
activities = tweets.filter((array_contains(tweets.twitter_entities.hashtags.text,"lovetrumpshate")))
activities.count()

## TwitterGnipParseJson.R
# install the streamR package the first time -- no need if you already have it installed
#install.packages("streamR")
library(streamR)

# functions
readGnipTweets <- function(tweets, verbose=TRUE){
  ## checking input is correct
  if (is.null(tweets)){
    stop("Error: you need to specify file or object where tweets text was stored.")
  }

## r-shiny.R
# Step 1: Go to http://play-with-docker and create an instance
# Step 2: Run "docker run -d -p 3838:3838 -p 8787:8787 -e ADD=shiny rocker/rstudio"
#   The 2nd part of Step 2 runs silently and takes about 3 minutes to install shiny-server
# Step 3: Click 8888 link to open in browser. Copy token and press ok. (username/pwd rstudio/rstudio)

shiny::runExample()

## bumper-sticker-experiment.R
# install required packages (only need to run once)
# devtools::install_github("pablobarbera/twitter_ideology/pkg/tweetscores")
# install.packages("twitterR")
library(tweetscores); library(twitteR)

# take one full day 1% streaming data for Sept 28, 2017 -- 2.58MM unique users for 3,423,287 tweets
# for an example of how to pull 1% streaming using streamR package,
# see https://github.com/wesslen/summer2017-socialmedia/blob/master/day1/twitter-streaming.Rmd
# the id file only needs to include the user profile (actor.id) of the users you want to ping
id <- readr::read_csv("./data/userid20170928.csv")

## mongodb-load.py
import glob
import json
from pymongo import MongoClient

# fill in hostname and port
HOST = "hostname"
PORT = 27017

client = MongoClient(HOST, PORT)
# fill in dbname and colname

## tidyquant-demo.R
# install tidyverse if you don't have it
# install.packages("tidyverse")
library(tidyverse)

## Read the csv from a URL
url <- "http://assets.datacamp.com/course/compfin/sbuxPrices.csv"
df <- read_csv(url)

## lubridate package to format the date
# if you get an error below, are you sure you have lubridate?

## twitter-trolls.R
library(tidyverse); library(lubridate)

url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/tweets.csv"
tweets <- read_csv(url)

user.url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/users.csv"
users <- read_csv(user.url)

tweets %>%
  count(Date = as.Date(created_str)) %>%
	"""
	Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
	BSD License
	"""
	import numpy as np

	# data I/O
	data = open('input.txt', 'r').read() # should be simple plain text file
	chars = list(set(data))
	data_size, vocab_size = len(data), len(chars)
	# from http://stackoverflow.com/questions/4696842/convert-twitter-timestamp-in-r
	# assume original field is postedTime

	str <- strptime(postedTime, "%Y-%m-%dT%H:%M:%S", tz = "GMT")
	dt.gmt <- as.POSIXct(str, tz = "GMT")
	postedTime <- format(dt.gmt, tz = "EST", usetz = TRUE)
	from pyspark.sql import SQLContext
	from pyspark.sql.functions import substring

	sqlContext = SQLContext(sc)

	jobDir = "/user/rwesslen/tweets/Pres_Tweets/"
	jobName = "presTweet"

	tweets = sqlContext.read.format('json').load([jobDir + "*.json"])
	tweets.coalesce(1).toJSON().saveAsTextFile(jobDir + jobName)
	# filters all tweets that mention the hashtag #lovetrumpshate from the data frame tweets
	activities = tweets.filter((array_contains(tweets.twitter_entities.hashtags.text,"lovetrumpshate")))
	activities.count()
	# install the streamR package the first time -- no need if you already have it installed
	#install.packages("streamR")
	library(streamR)

	# functions
	readGnipTweets <- function(tweets, verbose=TRUE){
	## checking input is correct
	if (is.null(tweets)){
	stop("Error: you need to specify file or object where tweets text was stored.")
	}
	# Step 1: Go to http://play-with-docker and create an instance
	# Step 2: Run "docker run -d -p 3838:3838 -p 8787:8787 -e ADD=shiny rocker/rstudio"
	# The 2nd part of Step 2 runs silently and takes about 3 minutes to install shiny-server
	# Step 3: Click 8888 link to open in browser. Copy token and press ok. (username/pwd rstudio/rstudio)

	shiny::runExample()
	# install required packages (only need to run once)
	# devtools::install_github("pablobarbera/twitter_ideology/pkg/tweetscores")
	# install.packages("twitterR")
	library(tweetscores); library(twitteR)

	# take one full day 1% streaming data for Sept 28, 2017 -- 2.58MM unique users for 3,423,287 tweets
	# for an example of how to pull 1% streaming using streamR package,
	# see https://github.com/wesslen/summer2017-socialmedia/blob/master/day1/twitter-streaming.Rmd
	# the id file only needs to include the user profile (actor.id) of the users you want to ping
	id <- readr::read_csv("./data/userid20170928.csv")
	import glob
	import json
	from pymongo import MongoClient

	# fill in hostname and port
	HOST = "hostname"
	PORT = 27017

	client = MongoClient(HOST, PORT)
	# fill in dbname and colname
	# install tidyverse if you don't have it
	# install.packages("tidyverse")
	library(tidyverse)

	## Read the csv from a URL
	url <- "http://assets.datacamp.com/course/compfin/sbuxPrices.csv"
	df <- read_csv(url)

	## lubridate package to format the date
	# if you get an error below, are you sure you have lubridate?
	library(tidyverse); library(lubridate)

	url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/tweets.csv"
	tweets <- read_csv(url)

	user.url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/users.csv"
	users <- read_csv(user.url)

	tweets %>%
	count(Date = as.Date(created_str)) %>%