Skip to content

Instantly share code, notes, and snippets.

import numpy as np
import pandas as pd
import os
os.chdir("/Users/shuozhang/Desktop/data")
df=pd.read_csv('nycmodeldata.csv', sep='\t', encoding='utf-8')
#### create the prediction data set
zipcode=list(set(df['zipcode']))
Weekday=list(set(df['Weekday']))
#### linear regression
import numpy as np
import pandas as pd
import os
os.chdir("/Users/shuozhang/Desktop/data")
nycmodel=pd.read_csv('nycmodeldata.csv', sep='\t', index_col=False, dtype={'zipcode':'S10'})
import statsmodels.api as sm
add_dummies = pd.get_dummies(nycmodel['zipcode'])
add_dummies=add_dummies.applymap(np.int)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
#### yellow taxi: feature selection
os.chdir("/Users/shuozhang/Desktop/capstone/yellow taxi")
yellow_raw=pd.read_csv("yellow_tripdata_2014-05.csv", index_col=False)
yellow_raw.columns = [c.replace(" ", "") for c in yellow_raw.columns] # remove space in column name
setwd("~/Desktop/web scraping")
library(dplyr)
library(ggplot2)
library(dygraphs)
library(plotly)
event=read.csv('gender2.txt', header=T, stringsAsFactors = F, sep=',')
nrow(event)
View(event)
#length(event$Event)
setwd("~/Desktop/web scraping")
library(ggplot2)
library(data.table)
library(dygraphs)
library(dplyr)
swimming=read.csv('swimming.txt', header=T, stringsAsFactors = F)
nrow(swimming)
View(swimming)
summary(swimming)
ss=swimming%>%
setwd("~/Desktop/web scraping")
library(dplyr)
library(dygraphs)
library(reshape2)
library(ggplot2)
library(plotly)
library(car)
library(TSA)
library(forecast)
library(xts)
from bs4 import BeautifulSoup
import urllib2
web='http://www.sports-reference.com/olympics/summer/2012/'
req = urllib2.Request(web)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page, "lxml")
table = soup.find("div", { "class" : "table_container" })
cells=[]
for row in table.findAll("tr"):
result = row.findAll("td")
library(shiny)
library(dplyr)
library(leaflet)
library(ggplot2)
library(plotly)
library(googleVis)
library(wordcloud)
library(RColorBrewer)
library(shinythemes)
library(shiny)
library(shinydashboard)
library(dplyr)
library(leaflet)
library(googleVis)
library(plotly)
library(wordcloud)
library(RColorBrewer)
library(shinythemes)
library(dplyr)
library(wordcloud)
library(RColorBrewer)
library(shinythemes)
device_data=read.csv('./data/phonedata.csv', header=T, stringsAsFactors = F)
#str(device_data)
device_map=dplyr::filter(device_data, !is.na(longitude), !is.na(latitude), !is.na(group))%>%
dplyr::filter(longitude>=73, longitude<136, latitude>=4, latitude<54)
#str(device_map)