Skip to content

Instantly share code, notes, and snippets.

View gonzalezgouveia's full-sized avatar

Rafael Gonzalez Gouveia gonzalezgouveia

View GitHub Profile
@gonzalezgouveia
gonzalezgouveia / wine_cleaning.R
Created February 5, 2019 19:47
cleaning data with dplyr
clean_data <- winedata %>%
select(country, points, price) %>%
drop_na() %>% # quitando los nulos
group_by(country) %>%
filter(n()>2000) %>% # filtrando
ungroup() %>%
mutate(log_price = log(price)) # log price
@gonzalezgouveia
gonzalezgouveia / wine_clean_plot.R
Last active February 5, 2019 19:53
cleaned plot
set.seed(12321)
ggplot(sample_n(clean_data, size=1000),
aes(x=jitter(log_price, factor = 3),
y=jitter(points, factor = 3),
color=country)) +
geom_point(size=2) +
xlab('log(Precio)') +
ylab('Puntuación') +
ggtitle('Revisiones de vinos por país')
ggsave('./path/to/data/clean_wine_country.png')
all_models <- clean_data %>%
group_by(country) %>%
summarise(n_obs = n(),
b = lm(points ~ log(price))$coefficients[1],
m = lm(points ~ log(price))$coefficients[2])
@gonzalezgouveia
gonzalezgouveia / wine_trelliscope.R
Last active February 5, 2019 20:16
trelliscope dashboard
path_trelliscope <- "./path/to/folder/trelliscope/"
ggplot(clean_data,
aes(x=log(price), y=points)) +
geom_point() +
geom_smooth(method=lm, se = FALSE) +
facet_trelliscope(~ country, nrow = 1, ncol = 3,
path = path_trelliscope,
self_contained=TRUE)
@gonzalezgouveia
gonzalezgouveia / mediumStoriesStats.js
Created February 14, 2019 19:17 — forked from epintos/mediumStoriesStats.js
Export Medium Stats Stores to CSV
// Run Inspector Console in chrome and copy and paste the following code in the /stats/stories view
function download(filename, text) {
var pom = document.createElement('a');
pom.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
pom.setAttribute('download', filename);
if (document.createEvent) {
var event = document.createEvent('MouseEvents');
event.initEvent('click', true, true);
library(tidyverse)
gdpPercap <- read_csv('/path/to/data/income_..._adjusted.csv'))
lifeExp <- read_csv('/path/to/data/life_expectancy_years.csv'))
# Filter for HispAm countries
hispam_vec <- c(
'Argentina', 'Brazil', 'Bolivia', 'Chile', 'Colombia',
'Costa Rica', 'Cuba', 'Dominican Republic', 'Ecuador',
'El Salvador', 'Guatemala', 'Honduras', 'Mexico',
'Nicaragua', 'Panama', 'Paraguay', 'Peru', 'Uruguay',
'Spain', 'Puerto Rico', 'Venezuela' )
# dplyr::filter countries in hispam
######
# install.packages(gganimate)
library(gganimate)
# devtools::install_github('rensa/ggflags')
library(ggflags)
# install.packages("gifski")
library(gifski)
# define animation object
anim <- gapminder_hispam %>%
filter(year >= 1900) %>%
import pandas as pd
import numpy as np
# data from Repo https://github.com/marcboquet/spanish-names
name_men = pd.read_csv('./hombres.csv')
name_men_list = name_men['nombre'].tolist()
name_women = pd.read_csv('./mujeres.csv')
name_women_list = name_women['nombre'].tolist()
# set simulation parameters
low_age = 18
high_age = 60
low_height = 1.5
high_height = 2
decimals = 2
genders = ['M', 'F']