View egypt_pop_2006.csv
gov population
Alexandria 4716000
Aswan 1395000
Asyout 4123000
Beheira 5647000
Beni-Suef 2771000
Cairo 9102000
Dakahlia 5818000
Damietta 1301000
El Wadi Elgedid 220000
View regex_snippets.R
# removes the table name from hive exporeted dataframe
gsub('.*?\\.(.*)' , '\\1', 'tablename.columnaname.suffix') # => columnaname.suffix
remove_table_name = function(df) {
names(df) = gsub('.*?\\.(.*)' , '\\1', names(df)) # => columnaname.suffix
df
}
# GTEST of two vectors (range 0, 1)
View sunburst.R
library(ggplot2)
library(dplyr)
library(magrittr)
library(scales)
pop.eg = read.csv('egypt_pop_2006.csv')
sum_total_pop = sum(pop.eg$population)
firstLevel = pop.eg %>% summarize(total_pop=sum(population))
View jolly_plots.R
library(ggplot2)
library(dplyr)
library(tidyr)
jolly = read.csv('jolly.csv')
jolly = jolly %>% gather(key, flow, skill, challenge)
# dataframe columns are now [game, level, key, flow]
# game level key flow up
View perf_sub_two.csv
tx_merchant_clean month_number year(Post_date) key value
A/D POLICE(FINES DEPT) 1 2014 Sum 946460
A/D POLICE(FINES DEPT) 2 2014 Sum 645740
A/D POLICE(FINES DEPT) 3 2014 Sum 830610
A/D POLICE(FINES DEPT) 4 2014 Sum 422565
A/D POLICE(FINES DEPT) 9 2013 Sum 671820
A/D POLICE(FINES DEPT) 10 2013 Sum 200645
A/D POLICE(FINES DEPT) 11 2013 Sum 323660
A/D POLICE(FINES DEPT) 12 2013 Sum 457225.55
ACE 1 2014 Sum 410792.24
View march.html
<html>
<style>
.group{
width: 100%;
margin: 20px auto 20px auto;
vertical-align: middle;
}
.btn{
background: white;
View vis_tut.R
library(ggplot2)
# 1- gentle qplot
#qplot(x, y, data=, color=, shape=, size=, alpha=, geom=, method=, formula=, facets=, xlim=, ylim= xlab=, ylab=, main=, sub=)
# normal
plot(iris$Sepal.Length, iris$Petal.Length, col=iris$Specie)
qplot(Sepal.Length, Petal.Length, data=iris, color=Species)
#
View boys_and_girls
library(ggplot2)
# boy 1, girl 0
b = sapply(1:100, function(x){
births = lapply(1:num_sim, function(x){rbinom(10, 1, 0.5)})
num_boys = length(births)
num_girls = sum(sapply(births, function(birth){match(1, birth) - 1} ))
num_boys / num_girls
})
View Penny's game simulation
# returns % of losses of the first player
play = function(first, second, num_sim=200){
games = lapply(1:num_sim, function(x){rbinom(1000, 1, 0.5)})
games = sapply(games, function(x) {do.call(paste0, as.list(x))})
#first = paste(paste0, as.list(first))
#second = paste(paste0, as.list(second))
first_m = sapply(games, function(game){ regexpr(first, game)})
first_m[first_m == -1] = Inf
View cluster_visual
require('ggplot2')
require("LICORS") #library that implements kmeans++
plotClusters = function(df, cluster, name){
num_centers = length(cluster$centers)
gr = ggplot(df, aes(x=X, y=Y))
before_cluster = gr + geom_point(size=3, alpha=0.7) + labs(title='Before Clustering'); before_cluster
after_cluster= gr + geom_point(aes(col=factor(cluster$cluster)), size=3, alpha=0.7) + guides(colour=F)
after_cluster = after_cluster + scale_color_hue(l=50)