Skip to content

Instantly share code, notes, and snippets.

View psobczyk's full-sized avatar

Piotr Sobczyk psobczyk

View GitHub Profile
@psobczyk
psobczyk / url-mapping-disqus.R
Created September 19, 2020 12:57
Creating an url mapping when migrating from Wordpress to Hugo
library(xml2)
library(rvest)
doc <- read_html(x = "path_to_xml_file")
threads <- xml_find_all(doc, ".//thread")
old_links <- gsub(".* (http.*)", "\\1", xml_text(xml_find_all(threads, xpath = ".//id")))
old_titles <- xml_text(xml_find_all(threads, xpath = ".//title"))
@psobczyk
psobczyk / sejm.R
Created October 22, 2019 22:04
Analysis of polish general elections 2019
# Files with data come from https://wybory.gov.pl/sejmsenat2019/pl/dane_w_arkuszach
require(dplyr)
require(magrittr)
require(tidyr)
require(broom)
require(ggparliament)
# Sejm elections. Could void votes affect results?
@psobczyk
psobczyk / wedding_vignette.R
Last active September 14, 2019 13:15
customised, automatised wedding vignettes
library(png)
#' Print Table Vignette
#'
#' @param out_dir
#' @param img
#' @param name
#' @param file_name
#' @param two_sided
#'
@psobczyk
psobczyk / parliament_seats.R
Created October 10, 2018 19:46
analysing number of parliament members worldwide
require(dplyr)
require(rvest)
require(ggplot2)
require(ggthemes)
url <- "https://en.wikipedia.org/wiki/List_of_legislatures_by_number_of_members"
html_downloaded <- read_html(url)
tables<- url %>% read_html %>%
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
Województwo;Ogółem;Mężczyźni;Kobiety;PIS;PO;Nowoczesna;Kukiz;SLD;PSL
MAZOWIECKIE;5 349 114;2 559 241;2 789 873;41.7;20.2;7.2;8.1;2.8;4.9
ŚLĄSKIE;4 570 849;2 204 972;2 365 877;37.4;20.0;6.1;10.1;4.3;1.8
WIELKOPOLSKIE;3 475 323;1 690 930;1 784 393;34.3;27.4;6.6;6.7;4.4;5.0
MAŁOPOLSKIE;3 372 618;1 636 707;1 735 911;50.1;12.0;4.3;6.8;3.0;1.7
DOLNOŚLĄSKIE;2 904 207;1 396 318;1 507 889;32.0;26.9;8.4;10.3;7.0;3.3
ŁÓDZKIE;2 493 603;1 188 118;1 305 485;36.4;19.1;6.6;6.9;5.6;3.7
POMORSKIE;2 307 710;1 124 377;1 183 333;30.1;28.5;9.9;5.5;4.1;3.0
LUBELSKIE;2 139 726;1 037 052;1 102 674;45.3;13.2;4.6;7.2;3.2;8.0
PODKARPACKIE;2 127 657;1 041 779;1 085 878;53.5;14.3;3.2;8.5;2.3;4.7
@psobczyk
psobczyk / knock_off_fixtures.csv
Created June 19, 2018 06:35
from data scraping to World Cup winner predictions
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
match;group.x;position.x;group.y;position.y
49;A;1;B;2
50;C;1;D;2
51;B;1;A;2
52;D;1;C;2
53;E;1;F;2
54;G;1;H;2
55;F;1;E;2
56;H;1;G;2
57;;49;;50
@psobczyk
psobczyk / scraping_timeanddata.R
Created June 7, 2018 07:44
scraping data from timeanddate.com
library(rvest)
library(dplyr)
main_page <- read_html('https://www.timeanddate.com/holidays/')
all_countries <- main_page %>%
html_nodes(xpath = '//div[@class="row"]//li//a') %>%
html_attr(name = 'href')
holidays <- NULL
@psobczyk
psobczyk / plot_temperature_hdi.R
Created August 24, 2017 15:08
Code for visualizations in blog post http://szychtawdanych.pl/?p=1704
library(rvest)
library(ggplot2)
library(ggthemes)
library(highcharter)
avg_temp <- read_html('https://en.wikipedia.org/wiki/List_of_countries_by_average_yearly_temperature')
avg_temp <- html_table(avg_temp, fill = T)[[2]]
gdp <- read_html('https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(PPP)_per_capita')
# data can be downloaded from http://ghdx.healthdata.org/gbd-2015
obesity <- read.csv("IHME_GBD_2015_OBESITY_PREVALENCE_1980_2015_1/IHME_GBD_2015_OBESITY_PREVALENCE_1980_2015_Y2017M06D12.CSV")
obesity %>%
filter(location_name %in% c("United States", "Poland", "Sweden"),
metric == "Percent",
sex == "Both",
age_group_name %in% c('15 to 19', '20 to 24',
'30 to 34', '35 to 39',
@psobczyk
psobczyk / analiza_wynikow_polmaraton_wroclaw_2017.R
Created June 18, 2017 10:23
Analiza wyników 5. PKO Wrocław Półmaratonu
library(XML)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggthemes)
url_address <- 'wyniki.txt' #tabela sciagnieta ze strony http://wyniki.datasport.pl/results2200/
doc.html = htmlTreeParse(url_address, useInternal = TRUE, encoding = "UTF-8")