This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
BSD License | |
""" | |
import numpy as np | |
# data I/O | |
data = open('input.txt', 'r').read() # should be simple plain text file | |
chars = list(set(data)) | |
data_size, vocab_size = len(data), len(chars) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from http://stackoverflow.com/questions/4696842/convert-twitter-timestamp-in-r | |
# assume original field is postedTime | |
str <- strptime(postedTime, "%Y-%m-%dT%H:%M:%S", tz = "GMT") | |
dt.gmt <- as.POSIXct(str, tz = "GMT") | |
postedTime <- format(dt.gmt, tz = "EST", usetz = TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SQLContext | |
from pyspark.sql.functions import substring | |
sqlContext = SQLContext(sc) | |
jobDir = "/user/rwesslen/tweets/Pres_Tweets/" | |
jobName = "presTweet" | |
tweets = sqlContext.read.format('json').load([jobDir + "*.json"]) | |
tweets.coalesce(1).toJSON().saveAsTextFile(jobDir + jobName) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# filters all tweets that mention the hashtag #lovetrumpshate from the data frame tweets | |
activities = tweets.filter((array_contains(tweets.twitter_entities.hashtags.text,"lovetrumpshate"))) | |
activities.count() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install the streamR package the first time -- no need if you already have it installed | |
#install.packages("streamR") | |
library(streamR) | |
# functions | |
readGnipTweets <- function(tweets, verbose=TRUE){ | |
## checking input is correct | |
if (is.null(tweets)){ | |
stop("Error: you need to specify file or object where tweets text was stored.") | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Step 1: Go to http://play-with-docker and create an instance | |
# Step 2: Run "docker run -d -p 3838:3838 -p 8787:8787 -e ADD=shiny rocker/rstudio" | |
# The 2nd part of Step 2 runs silently and takes about 3 minutes to install shiny-server | |
# Step 3: Click 8888 link to open in browser. Copy token and press ok. (username/pwd rstudio/rstudio) | |
shiny::runExample() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install required packages (only need to run once) | |
# devtools::install_github("pablobarbera/twitter_ideology/pkg/tweetscores") | |
# install.packages("twitterR") | |
library(tweetscores); library(twitteR) | |
# take one full day 1% streaming data for Sept 28, 2017 -- 2.58MM unique users for 3,423,287 tweets | |
# for an example of how to pull 1% streaming using streamR package, | |
# see https://github.com/wesslen/summer2017-socialmedia/blob/master/day1/twitter-streaming.Rmd | |
# the id file only needs to include the user profile (actor.id) of the users you want to ping | |
id <- readr::read_csv("./data/userid20170928.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import json | |
from pymongo import MongoClient | |
# fill in hostname and port | |
HOST = "hostname" | |
PORT = 27017 | |
client = MongoClient(HOST, PORT) | |
# fill in dbname and colname |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install tidyverse if you don't have it | |
# install.packages("tidyverse") | |
library(tidyverse) | |
## Read the csv from a URL | |
url <- "http://assets.datacamp.com/course/compfin/sbuxPrices.csv" | |
df <- read_csv(url) | |
## lubridate package to format the date | |
# if you get an error below, are you sure you have lubridate? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse); library(lubridate) | |
url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/tweets.csv" | |
tweets <- read_csv(url) | |
user.url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/users.csv" | |
users <- read_csv(user.url) | |
tweets %>% | |
count(Date = as.Date(created_str)) %>% |
OlderNewer