Skip to content

Instantly share code, notes, and snippets.

@yogsp
yogsp / fwiki.R
Last active May 17, 2018 11:28
forbes, wikipedia, ggpubr
library(pacman)
pacman::p_load(data.table, plyr, dplyr, ggpubr)
forbes <- fread("forbes_celebrity_100_w_title.csv")
wiki <- fread("wikipedia_edits_forbes_celebrity_100.csv")
View(head(forbes))
View(head(wiki))
unique(forbes$Title)
unique(forbes$Career)
#Who has ranked number 1 the most number of times
aggforbes <-
@yogsp
yogsp / Frequency Tables.R
Created May 1, 2018 02:20
Create frequency tables using base R, prop.table, round, sort
#Calculating Frequencies for Categorical Variables
groups <- c(rep("blue", 3990),
rep("red", 4140),
rep("orange", 1890),
rep("green", 3770),
rep("purple", 855))
#Create frequency tables
groups.t1 <- table(groups) #as table
groups.t1
groupsdf <- as.data.frame(groups.t1) #as data frame
@yogsp
yogsp / barplot, histogram, boxplot.R
Last active April 30, 2018 15:07
barplot, table, order, par, hist, boxplot, lines, curve, rug
#BAR CHARTS for categorical variables
data(chickwts)
head(chickwts)
plot(chickwts$feed)
barplot(chickwts$feed) #This doesn't work
#Create a summary table so R can make a barplot
feeds <- table(chickwts$feed)
feeds
barplot(feeds)
#Arrange in descending order
@yogsp
yogsp / Working with Colors.R
Created April 30, 2018 06:02
RColorBrewer, barplot, palette
#Working with Colors
x <- c(12,4,21,17,13,9)
barplot(x) #all gray and basic
browseURL("http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf")
colors() #list of colors
barplot(x, col="moccasin")
barplot(x, col= colors()[633]) #index number of tomato3 from colors()
#RGB Hexcodes
#Can also use shortcut hexcodes (base 16), which are equivalent to
#RGB on the 0-255 scale, as FF in hex equals 255 in decimal
@yogsp
yogsp / mtcars regression.R
Created April 10, 2018 13:38
mtcars, regression, gvlma,rmse, vif, vic, stepwise, aic, bic
> df <- mtcars
> dimnames(mtcars)
[[1]]
[1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive"
[5] "Hornet Sportabout" "Valiant" "Duster 360" "Merc 240D"
[9] "Merc 230" "Merc 280" "Merc 280C" "Merc 450SE"
[13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood" "Lincoln Continental"
[17] "Chrysler Imperial" "Fiat 128" "Honda Civic" "Toyota Corolla"
[21] "Toyota Corona" "Dodge Challenger" "AMC Javelin" "Camaro Z28"
[25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2" "Lotus Europa"
@yogsp
yogsp / Reshaping Data Using plyr, reshape, and reshape2.r
Created March 11, 2018 17:25
dcast. melt, na.omit, stack, unstack, plyr, reshape, reshape2
> #RESHAPING DATA
> #similar to creating a contingency table which enables the user to aggregate data
> library(reshape)
> library(reshape2)
> #use "dcast" function to transform data from long to wide
> #Using dcast, we specify that we want the data to transform as salaries in the forst parameter
> wide_salaries <-
+ dcast(salaries, emp_no ~ year(ymd(from_date)),
+ value.var = "salary")
> wide_salaries[1:3, 1:7]
@yogsp
yogsp / Guess Game.py
Last active January 22, 2018 12:47
Python, for, print, input(), random, for, elif, else
#use random package to create random numbers
import random
print ("Hello! What's your name?")
#let the user type in name and store it
name = input()
print ("Hello " +name+", can you guess the number i'm thinking of? It's between 1-20")
#Generate the random integer inside a range
secret = random.randint(1,20)
print ("It's between 1-20.")
#We only want to give a specific number (6) of tries to guess the number so use for loop
@yogsp
yogsp / Cleaning NAs.r
Created January 4, 2018 14:26
mean, complete.cases, attach, lm
class <- read.csv(file.choose())
attach(class)
head(class)
mean(Final)
#Calculate mean by ignoring NA values
mean(Final, na.rm = TRUE)
#Check which columns have NAs
summary(class)
#TakeHome and Final have NAs
list(Final)
@yogsp
yogsp / postgres dvdrental.sql
Last active December 21, 2017 09:02
postgres, dvdrental, join
SELECT * FROM cd.bookings LIMIT 5;
SELECT * FROM cd.facilities LIMIT 5;
SELECT * FROM cd.members LIMIT 5;
SELECT facid, SUM(slots) AS total_slots
FROM cd.bookings
WHERE starttime >= '2012-09-01' and starttime <= '2012-10-01'
GROUP BY facid
ORDER BY SUM(slots);
SELECT facid, SUM(slots) AS "total_slots"
FROM cd.bookings
> library(ggplot2)
> summary(diamonds)
carat cut color clarity depth table price
Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065 Min. :43.00 Min. :43.00 Min. : 326
1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950
Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194 Median :61.80 Median :57.00 Median : 2401
Mean :0.7979 Premium :13791 G:11292 VS1 : 8171 Mean :61.75 Mean :57.46 Mean : 3933
3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324
Max. :5.0100 I: 5422 VVS1 : 3655 Max. :79.00 Max. :95.00 Max. :18823
J: 2808 (Other): 2531