Ryan Wesslen wesslen

## min-char-rnn.py
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

## TwitterConvertEST.R
# from http://stackoverflow.com/questions/4696842/convert-twitter-timestamp-in-r
# assume original field is postedTime

str <- strptime(postedTime, "%Y-%m-%dT%H:%M:%S", tz = "GMT")
dt.gmt <- as.POSIXct(str, tz = "GMT")
postedTime <- format(dt.gmt, tz = "EST", usetz = TRUE)

## LDAvisJsonQuanteda.R

topicmodels_json_ldavis <- function(fitted, dfm, dtm){
  # Required packages
  library(topicmodels)
  library(dplyr)
  library(stringi)
  library(quanteda)
  library(LDAvis)

  # Find required quantities

## CombineMultipleJsonFilesPySpark.py
from pyspark.sql import SQLContext
from pyspark.sql.functions import substring

sqlContext = SQLContext(sc)

jobDir = "/user/rwesslen/tweets/Pres_Tweets/"
jobName = "presTweet"

tweets = sqlContext.read.format('json').load([jobDir + "*.json"])
tweets.coalesce(1).toJSON().saveAsTextFile(jobDir + jobName)

## hashtag-filter.py
# filters all tweets that mention the hashtag #lovetrumpshate from the data frame tweets
activities = tweets.filter((array_contains(tweets.twitter_entities.hashtags.text,"lovetrumpshate")))
activities.count()

## TwitterGnipParseJson.R
# install the streamR package the first time -- no need if you already have it installed
#install.packages("streamR")
library(streamR)

# functions
readGnipTweets <- function(tweets, verbose=TRUE){
  ## checking input is correct
  if (is.null(tweets)){
    stop("Error: you need to specify file or object where tweets text was stored.")
  }

## r-twitter-example.R
# Step 1: Go to http://play-with-docker and create an instance (you will need to sign up for a Docker username/pwd)
# Step 2: Run "docker run -e PASSWORD=<YOUR_PASS> -p 8787:8787 rocker/tidyverse". NOTE: <YOUR_PASS> equals a unique password you set.
# Step 3: Click 8787 link to open in browser. Copy token and press ok. (username/pwd rstudio/<YOUR_PASS>)
# Step 4: Download this file by running:
# download.file("https://gist.githubusercontent.com/wesslen/ae9aca04b491a064764b13239fb17489/raw/8c35e746585f719c62e0437ec095a23c21c44ccb/r-docker.R", destfile = "r-docker.R")

# call tidyverse -- if you get an error, do you have tidyverse installed??
library(tidyverse)

# load tweets

## r-shiny.R
# Step 1: Go to http://play-with-docker and create an instance
# Step 2: Run "docker run -d -p 3838:3838 -p 8787:8787 -e ADD=shiny rocker/rstudio"
#   The 2nd part of Step 2 runs silently and takes about 3 minutes to install shiny-server
# Step 3: Click 8888 link to open in browser. Copy token and press ok. (username/pwd rstudio/rstudio)

shiny::runExample()

## bumper-sticker-experiment.R
# install required packages (only need to run once)
# devtools::install_github("pablobarbera/twitter_ideology/pkg/tweetscores")
# install.packages("twitterR")
library(tweetscores); library(twitteR)

# take one full day 1% streaming data for Sept 28, 2017 -- 2.58MM unique users for 3,423,287 tweets
# for an example of how to pull 1% streaming using streamR package,
# see https://github.com/wesslen/summer2017-socialmedia/blob/master/day1/twitter-streaming.Rmd
# the id file only needs to include the user profile (actor.id) of the users you want to ping
id <- readr::read_csv("./data/userid20170928.csv")

## mongolite-datalake.R
# install mongolite if you do not have it
# see https://github.com/jeroen/mongolite
#install.packages("mongolite")
library(mongolite)

# replace with name of database -- change last folder to database (e.g., "gab")
mongoUrl <- "mongodb://datalake:27017/gab"

# change col to your collection
col <- "id"
	"""
	Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
	BSD License
	"""
	import numpy as np

	# data I/O
	data = open('input.txt', 'r').read() # should be simple plain text file
	chars = list(set(data))
	data_size, vocab_size = len(data), len(chars)
	# from http://stackoverflow.com/questions/4696842/convert-twitter-timestamp-in-r
	# assume original field is postedTime

	str <- strptime(postedTime, "%Y-%m-%dT%H:%M:%S", tz = "GMT")
	dt.gmt <- as.POSIXct(str, tz = "GMT")
	postedTime <- format(dt.gmt, tz = "EST", usetz = TRUE)

	topicmodels_json_ldavis <- function(fitted, dfm, dtm){
	# Required packages
	library(topicmodels)
	library(dplyr)
	library(stringi)
	library(quanteda)
	library(LDAvis)

	# Find required quantities
	from pyspark.sql import SQLContext
	from pyspark.sql.functions import substring

	sqlContext = SQLContext(sc)

	jobDir = "/user/rwesslen/tweets/Pres_Tweets/"
	jobName = "presTweet"

	tweets = sqlContext.read.format('json').load([jobDir + "*.json"])
	tweets.coalesce(1).toJSON().saveAsTextFile(jobDir + jobName)
	# filters all tweets that mention the hashtag #lovetrumpshate from the data frame tweets
	activities = tweets.filter((array_contains(tweets.twitter_entities.hashtags.text,"lovetrumpshate")))
	activities.count()
	# install the streamR package the first time -- no need if you already have it installed
	#install.packages("streamR")
	library(streamR)

	# functions
	readGnipTweets <- function(tweets, verbose=TRUE){
	## checking input is correct
	if (is.null(tweets)){
	stop("Error: you need to specify file or object where tweets text was stored.")
	}
	# Step 1: Go to http://play-with-docker and create an instance (you will need to sign up for a Docker username/pwd)
	# Step 2: Run "docker run -e PASSWORD=<YOUR_PASS> -p 8787:8787 rocker/tidyverse". NOTE: <YOUR_PASS> equals a unique password you set.
	# Step 3: Click 8787 link to open in browser. Copy token and press ok. (username/pwd rstudio/<YOUR_PASS>)
	# Step 4: Download this file by running:
	# download.file("https://gist.githubusercontent.com/wesslen/ae9aca04b491a064764b13239fb17489/raw/8c35e746585f719c62e0437ec095a23c21c44ccb/r-docker.R", destfile = "r-docker.R")

	# call tidyverse -- if you get an error, do you have tidyverse installed??
	library(tidyverse)

	# load tweets
	# Step 1: Go to http://play-with-docker and create an instance
	# Step 2: Run "docker run -d -p 3838:3838 -p 8787:8787 -e ADD=shiny rocker/rstudio"
	# The 2nd part of Step 2 runs silently and takes about 3 minutes to install shiny-server
	# Step 3: Click 8888 link to open in browser. Copy token and press ok. (username/pwd rstudio/rstudio)

	shiny::runExample()
	# install required packages (only need to run once)
	# devtools::install_github("pablobarbera/twitter_ideology/pkg/tweetscores")
	# install.packages("twitterR")
	library(tweetscores); library(twitteR)

	# take one full day 1% streaming data for Sept 28, 2017 -- 2.58MM unique users for 3,423,287 tweets
	# for an example of how to pull 1% streaming using streamR package,
	# see https://github.com/wesslen/summer2017-socialmedia/blob/master/day1/twitter-streaming.Rmd
	# the id file only needs to include the user profile (actor.id) of the users you want to ping
	id <- readr::read_csv("./data/userid20170928.csv")
	# install mongolite if you do not have it
	# see https://github.com/jeroen/mongolite
	#install.packages("mongolite")
	library(mongolite)

	# replace with name of database -- change last folder to database (e.g., "gab")
	mongoUrl <- "mongodb://datalake:27017/gab"

	# change col to your collection
	col <- "id"