Moritz Zajonz enigmoe

## analyse_large_text_file_chunked.R
library(chunked)
library(tidyverse)

# I want to look at the daily page views of Wikipedia articles
# before 2015... I can get zipped log files
# from here: https://dumps.wikimedia.org/other/pagecounts-ez/merged/2012/2012-12/
# I get bz file, unzip to get this:

my_file <- 'pagecounts-2012-12-14/pagecounts-2012-12-14'

## sparkbar.R
# Takes an ordered vector of numeric values and returns a small bar chart made
# out of Unicode block elements. Works well inside dplyr mutate() or summarise()
# calls on grouped data frames.

sparkbar <- function(values) {
  span <- max(values) - min(values)
  if(span > 0 & !is.na(span)) {
    steps <- round(values / (span /  7))
    blocks <- c('▁', '▂', '▃', '▄', '▅', '▆', '▇', '█')
    paste(sapply(steps - (min(steps) - 1), function(i) blocks[i]), collapse = '')

## populationCurves.R
# Data is the UN's Medium-variant population projections, available at https://population.un.org/wpp/

data %>%
  filter(Sex != "Both" & A3 %in% c("GBR", "RUS", "IND", "CHN", "RWA", "GRC") & Year %in% 2018:2060) %>%
  as.tibble %>%
  mutate(
    group = paste0(Year, Sex), AgeGrp = as.numeric(AgeGrp),
    Location = Location %>% gsub("n Federation","",.)
    ) %>%
  ggplot(aes(AgeGrp, Value, col=Sex, group=group)) +

## federer-ATP-100.R
# Load the packages we’re going to be using:
# Alongside the usual stuff like tidyverse and magrittr, we’ll be using rvest for some web-scraping, jsonline to parse some JSON, and extrafont to load some nice custom fonts
needs(tidyverse, magrittr, rvest, jsonlite, extrafont)

# Before we go on, two things to note:

# First, on web scraping:
# You should always check the terms of the site you are extracting data from, to make sure scraping (often referred to as `crawling`) is not prohibited. One way to do this is to visit the website’s `robots.txt` page, and ensure that a) there is nothing explicitly stating that crawlers are not permitted, and b) ideally, the site simply states that all user agents are permitted (indicated by a line saying `User-Agect: *`). Both of those are the case for our use-case today (see https://www.ultimatetennisstatistics.com/robots.txt).

# And second, about those custom fonts:

## README.md

      
              2 files
            
          
              2 forks
            
          
              2 comments
            
          
              5 stars
            
          
                richpauloo
                / README.md
            
            
              Last active
              December 24, 2019 01:43
            
              
                Cumulative Variable Importance for Random Forest Models
              
          
    Cumulative Variable Importance for Random Forest (RF) 🌲🌳 Models

Motivation

What does an interpretable RF visualization look like? Out-of-the-box 📦 RF implementations in R and Python compute variable importance over all trees, but how do we get there?
In other words, what would a cumulative variable importance for a RF look like?
Approach


## twitterbot.py
import tweepy
import random
import pandas as pd
import time

#get your codes from https://apps.twitter.com/
consumer_key = 'your_code_here'
consumer_secret = 'your_code_here'
access_token = 'your_code_here'
access_token_secret = 'your_code_here'

## animate_labels.R
library(ggplot2) # requires 2.3.0
library(purrr)

make_plot <- function(frame) {
  ggplot(mtcars, aes(mpg, hp, color = factor(cyl))) +
    geom_point() +
    scale_color_brewer(
      palette = 2, type = "qual", name = "cyl",
      guide = guide_legend(
        direction = "horizontal",

## life_expect_world_map_gif.md

      
              1 file
            
          
              0 forks
            
          
              5 comments
            
          
              9 stars
            
          
                rafapereirabr
                / life_expect_world_map_gif.md
            
            
              Last active
              November 15, 2020 06:31
            
              
                creating an animated (gif) world map of life expectancy using ggplot2
              
          
    This gist shows how to create an animated world map of life expectancy using R. The data comes the UN World Population Prospects, 2015 Revision, and it brings life expectancy data from 1950 untill 2015 and projeceted data up to 2100. Thanks Topi Tjukanov, who reminded me of the UN DESA data portal, where you can find this dataset and many others
The idea is to use open data to create a GIF, much like the ones created by Aron Strandberg but his maps look much nicer. The output of this script is a map like this one:
[![enter image description here][1]][1]
Now diving into the code. First, let's load the necessary libraries and get the data. As of this writing, the current version of gganimate has a bug that messes the aesthetics of the .gif file. As a temporary solution, I've intalled an older version of the package, [as recommended by the author of the gganimate, David Ro

  
## gist:0710d51c5ba51e0aeb2d905f1e3a38f3
# Use xinput to find your keyboards.

$ xinput
⎡ Virtual core pointer                    	id=2	[master pointer  (3)]
⎜   ↳ Virtual core XTEST pointer              	id=4	[slave  pointer  (2)]
⎜   ↳ SynPS/2 Synaptics TouchPad              	id=11	[slave  pointer  (2)]
⎜   ↳ TPPS/2 IBM TrackPoint                   	id=12	[slave  pointer  (2)]
⎜   ↳ Logitech Performance MX                 	id=14	[slave  pointer  (2)]
⎜   ↳ E-Signal USB Gaming Keyboard            	id=16	[slave  pointer  (2)]
⎣ Virtual core keyboard                   	id=3	[master keyboard (2)]

## disease_outbreaks_camelot.ipynb

      
              1 file
            
          
              7 forks
            
          
              2 comments
            
          
              18 stars
            
          
                vinayak-mehta
                / disease_outbreaks_camelot.ipynb
            
            
              Last active
              November 5, 2023 18:54
            
              
                A jupyter notebook showing how Camelot can be used to extract tables from PDFs scraped from the IDSP website.
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	library(chunked)
	library(tidyverse)

	# I want to look at the daily page views of Wikipedia articles
	# before 2015... I can get zipped log files
	# from here: https://dumps.wikimedia.org/other/pagecounts-ez/merged/2012/2012-12/
	# I get bz file, unzip to get this:

	my_file <- 'pagecounts-2012-12-14/pagecounts-2012-12-14'
	# Takes an ordered vector of numeric values and returns a small bar chart made
	# out of Unicode block elements. Works well inside dplyr mutate() or summarise()
	# calls on grouped data frames.

	sparkbar <- function(values) {
	span <- max(values) - min(values)
	if(span > 0 & !is.na(span)) {
	steps <- round(values / (span / 7))
	blocks <- c('▁', '▂', '▃', '▄', '▅', '▆', '▇', '█')
	paste(sapply(steps - (min(steps) - 1), function(i) blocks[i]), collapse = '')
	# Data is the UN's Medium-variant population projections, available at https://population.un.org/wpp/

	data %>%
	filter(Sex != "Both" & A3 %in% c("GBR", "RUS", "IND", "CHN", "RWA", "GRC") & Year %in% 2018:2060) %>%
	as.tibble %>%
	mutate(
	group = paste0(Year, Sex), AgeGrp = as.numeric(AgeGrp),
	Location = Location %>% gsub("n Federation","",.)
	) %>%
	ggplot(aes(AgeGrp, Value, col=Sex, group=group)) +
	# Load the packages we’re going to be using:
	# Alongside the usual stuff like tidyverse and magrittr, we’ll be using rvest for some web-scraping, jsonline to parse some JSON, and extrafont to load some nice custom fonts
	needs(tidyverse, magrittr, rvest, jsonlite, extrafont)

	# Before we go on, two things to note:

	# First, on web scraping:
	# You should always check the terms of the site you are extracting data from, to make sure scraping (often referred to as `crawling`) is not prohibited. One way to do this is to visit the website’s `robots.txt` page, and ensure that a) there is nothing explicitly stating that crawlers are not permitted, and b) ideally, the site simply states that all user agents are permitted (indicated by a line saying `User-Agect: *`). Both of those are the case for our use-case today (see https://www.ultimatetennisstatistics.com/robots.txt).

	# And second, about those custom fonts:
	import tweepy
	import random
	import pandas as pd
	import time

	#get your codes from https://apps.twitter.com/
	consumer_key = 'your_code_here'
	consumer_secret = 'your_code_here'
	access_token = 'your_code_here'
	access_token_secret = 'your_code_here'
	library(ggplot2) # requires 2.3.0
	library(purrr)

	make_plot <- function(frame) {
	ggplot(mtcars, aes(mpg, hp, color = factor(cyl))) +
	geom_point() +
	scale_color_brewer(
	palette = 2, type = "qual", name = "cyl",
	guide = guide_legend(
	direction = "horizontal",
	# Use xinput to find your keyboards.

	$ xinput
	⎡ Virtual core pointer id=2 [master pointer (3)]
	⎜ ↳ Virtual core XTEST pointer id=4 [slave pointer (2)]
	⎜ ↳ SynPS/2 Synaptics TouchPad id=11 [slave pointer (2)]
	⎜ ↳ TPPS/2 IBM TrackPoint id=12 [slave pointer (2)]
	⎜ ↳ Logitech Performance MX id=14 [slave pointer (2)]
	⎜ ↳ E-Signal USB Gaming Keyboard id=16 [slave pointer (2)]
	⎣ Virtual core keyboard id=3 [master keyboard (2)]