Skip to content

Instantly share code, notes, and snippets.

View psobczyk's full-sized avatar

Piotr Sobczyk psobczyk

View GitHub Profile
@psobczyk
psobczyk / scraping_ncn.R
Created May 25, 2017 09:40
Ściąganie danych - wyniki konkursów NCN
library(httr)
library(XML)
#this is copied from my browser - thus Mozilla and Mac OS
headers <- ('{"Host":"www.ncn.gov.pl","User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:54.0) Gecko/20100101 Firefox/54.0","Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language":"en-US,en;q=0.5","Accept-Encoding":"gzip, deflate, br","Content-Type":"application/x-www-form-urlencoded","Content-Length":"156","Referer":"https://www.ncn.gov.pl/statystyki/","Cookie":"has_js=1; __utma=198221488.1697332473.1495532703.1495532703.1495532703.1; __utmc=198221488; __utmz=198221488.1495532703.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)","Connection":"keep-alive","Upgrade-Insecure-Requests":"1"}')
body <- list(zestawienie = "kwota", podzial = "wojewodztwo", 'zakres'='rok',
konkursid = '', 'panelid'= '', typid = '', rokid= '1',
wojewodztwoid= '', plecid = '', tytulid= '', rodzajejednostekid = '1')
granty <- vector(mode = "list", 6)
@psobczyk
psobczyk / plot_ncn_grants.R
Created May 25, 2017 09:53
Dofinansowanie przypadające na województwa
library(dplyr)
library(ggplot2)
library(ggthemes)
top_percent <- granty %>%
group_by(rok) %>%
mutate(procent = kwota/sum(kwota)) %>%
arrange(desc(procent)) %>%
top_n(7, procent) %>%
mutate(pozycja = row_number(),
@psobczyk
psobczyk / analiza_wynikow_polmaraton_wroclaw_2017.R
Created June 18, 2017 10:23
Analiza wyników 5. PKO Wrocław Półmaratonu
library(XML)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggthemes)
url_address <- 'wyniki.txt' #tabela sciagnieta ze strony http://wyniki.datasport.pl/results2200/
doc.html = htmlTreeParse(url_address, useInternal = TRUE, encoding = "UTF-8")
# data can be downloaded from http://ghdx.healthdata.org/gbd-2015
obesity <- read.csv("IHME_GBD_2015_OBESITY_PREVALENCE_1980_2015_1/IHME_GBD_2015_OBESITY_PREVALENCE_1980_2015_Y2017M06D12.CSV")
obesity %>%
filter(location_name %in% c("United States", "Poland", "Sweden"),
metric == "Percent",
sex == "Both",
age_group_name %in% c('15 to 19', '20 to 24',
'30 to 34', '35 to 39',
@psobczyk
psobczyk / plot_temperature_hdi.R
Created August 24, 2017 15:08
Code for visualizations in blog post http://szychtawdanych.pl/?p=1704
library(rvest)
library(ggplot2)
library(ggthemes)
library(highcharter)
avg_temp <- read_html('https://en.wikipedia.org/wiki/List_of_countries_by_average_yearly_temperature')
avg_temp <- html_table(avg_temp, fill = T)[[2]]
gdp <- read_html('https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(PPP)_per_capita')
@psobczyk
psobczyk / scraping_timeanddata.R
Created June 7, 2018 07:44
scraping data from timeanddate.com
library(rvest)
library(dplyr)
main_page <- read_html('https://www.timeanddate.com/holidays/')
all_countries <- main_page %>%
html_nodes(xpath = '//div[@class="row"]//li//a') %>%
html_attr(name = 'href')
holidays <- NULL
@psobczyk
psobczyk / knock_off_fixtures.csv
Created June 19, 2018 06:35
from data scraping to World Cup winner predictions
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
match;group.x;position.x;group.y;position.y
49;A;1;B;2
50;C;1;D;2
51;B;1;A;2
52;D;1;C;2
53;E;1;F;2
54;G;1;H;2
55;F;1;E;2
56;H;1;G;2
57;;49;;50
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
Województwo;Ogółem;Mężczyźni;Kobiety;PIS;PO;Nowoczesna;Kukiz;SLD;PSL
MAZOWIECKIE;5 349 114;2 559 241;2 789 873;41.7;20.2;7.2;8.1;2.8;4.9
ŚLĄSKIE;4 570 849;2 204 972;2 365 877;37.4;20.0;6.1;10.1;4.3;1.8
WIELKOPOLSKIE;3 475 323;1 690 930;1 784 393;34.3;27.4;6.6;6.7;4.4;5.0
MAŁOPOLSKIE;3 372 618;1 636 707;1 735 911;50.1;12.0;4.3;6.8;3.0;1.7
DOLNOŚLĄSKIE;2 904 207;1 396 318;1 507 889;32.0;26.9;8.4;10.3;7.0;3.3
ŁÓDZKIE;2 493 603;1 188 118;1 305 485;36.4;19.1;6.6;6.9;5.6;3.7
POMORSKIE;2 307 710;1 124 377;1 183 333;30.1;28.5;9.9;5.5;4.1;3.0
LUBELSKIE;2 139 726;1 037 052;1 102 674;45.3;13.2;4.6;7.2;3.2;8.0
PODKARPACKIE;2 127 657;1 041 779;1 085 878;53.5;14.3;3.2;8.5;2.3;4.7
@psobczyk
psobczyk / parliament_seats.R
Created October 10, 2018 19:46
analysing number of parliament members worldwide
require(dplyr)
require(rvest)
require(ggplot2)
require(ggthemes)
url <- "https://en.wikipedia.org/wiki/List_of_legislatures_by_number_of_members"
html_downloaded <- read_html(url)
tables<- url %>% read_html %>%
@psobczyk
psobczyk / wedding_vignette.R
Last active September 14, 2019 13:15
customised, automatised wedding vignettes
library(png)
#' Print Table Vignette
#'
#' @param out_dir
#' @param img
#' @param name
#' @param file_name
#' @param two_sided
#'