patternproject

## code.R
library(maps)

unemp <- read.csv('unemployment09.csv',header=FALSE, stringsAsFactors=FALSE,
    col.names=c("blsid", "stfips", "cofips", "name", "year", "pop1", "pop2", "unempraw", "unemppct"))

unemp$mpname <- tolower(paste(state.name[match(sub("^.*([A-Z][A-Z])$","\\1",unemp$name,fixed=FALSE),state.abb)],
sub("^(.*) (County|[Cc]ity|Parish), ..$","\\1", unemp$name),sep=","))

unemp$ri <- as.numeric(cut(unemp$unemppct,c(seq(0,10,by=2),max(unemp$unemppct))))

## Heatmap.R
doInstall <- TRUE  # Change to FALSE if you don't want packages installed.
toInstall <- c("ggplot2", "reshape2", "RColorBrewer")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)

# Generate a random matrix
# This can be any type of numeric matrix,
# though we often see heatmaps of square correlation matrices.
nRow <- 9
nCol <- 16

## lda.R
# Brian Abelson @brianabelson
# Harmony Institute
# December 5, 2012

# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package
# it fits topic models using latent dirichlet allocation
# it provides arguments for cleaning the input text and tuning the parameters of the model
# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data
# this allows you to easily model outcomes based on the distribution of topics within a collection of texts

## gist:5711584
# load the package and data set "Teams"
install.packages("Lahman")
library("Lahman")
data(Teams)
#
#
# CREATE LEAGUE SUMMARY TABLES
# ============================
#
# select a sub-set of teams from 1901 [the establishment of the American League] forward to 2012

## README.md

      
              1 file
            
          
              1315 forks
            
          
              105 comments
            
          
              2892 stars
            
          
                hofmannsven
                / README.md
            
            
              Last active
              May 3, 2024 15:30
            
              
                Git CLI Cheatsheet
              
          
    Git

Global Settings


Related Setup: https://gist.github.com/hofmannsven/6814278
Related Pro Tips: https://ochronus.com/git-tips-from-the-trenches/
Interactive Beginners Tutorial: http://try.github.io/
Git Cheatsheet by GitHub: https://services.github.com/on-demand/downloads/github-git-cheat-sheet/

Reminder

Press minus + shift + s and return to chop/fold long lines!

  
## poynter-income-choropleth-facets.R
library(rgdal)
library(dplyr)
library(readr)
library(stringi)
library(stringr)
library(tidyr)
library(grid)
library(scales)
library(ggplot2)
library(ggthemes)

## data_.csv

          
            seasonStart
            group
            total
            share
            barTops
            barBases

            
              1
              1992
              Other EU
              22536
              0.024715160408366
              0.024715160408366
              0

            
              2
              1992
              non-EU
              41531
              0.0455469172399649
              0.070262077648331
              0.024715160408366

            
              3
              1992
              Ireland
              63597
              0.0697466301247273
              0.140008707773058
              0.070262077648331

            
              4
              1992
              Rest of UK
              146838
              0.16103677334237
              0.301045481115428
              0.140008707773058

            
              5
              1992
              England
              637327
              0.698954518884572
              1
              0.301045481115428

            
              6
              1993
              Other EU
              27526
              0.0301765248869175
              0.0301765248869175
              0

            
              7
              1993
              non-EU
              62492
              0.0685094599009391
              0.0986859847878566
              0.0301765248869175

            
              8
              1993
              Ireland
              54388
              0.0596251120958247
              0.158311096883681
              0.0986859847878566

            
              9
              1993
              Rest of UK
              124047
              0.135991694494204
              0.294302791377885
              0.158311096883681

## DF string split and concatenate.r
lut_match <- function(LUT, code_field, desc_col = 2, is_NA = "NA", no_code = "N/A"){
require(stringr)
require(splitstackshape)
  # this loops through all unique codes in code_field.  Potentially slow
  code_field[which(code_field == "")] <- NA
  unique_code <- unique(code_field)
  coded <- code_field
  for(i in seq_along(unique_code)){
    if(is.na(unique_code[i])){
      coded[is.na(coded)] <- is_NA # set this to whatever works

## badmail.Rmd
---
title: "Visualizing the Clinton Email Network in R"
author: "hrbrmstr"
date: "`r Sys.Date()`"
output: html_document
---
```{r include=FALSE}
knitr::opts_chunk$set(
  collapse=TRUE,
  comment="#>",

## README.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              6 stars
            
          
                timelyportfolio
                / README.md
            
            
              Last active
              December 9, 2017 18:45
            
              
                nesting and summarizing in R and d3.js
              
          
    Built with blockbuilder.org
Nesting and summarizing data is a very common task for data analysis.  I thought it would be nice to view parallel ways of nesting and summarizing with both

R | tidyr and dplyr
JavaScript | d3.js ...v4 for fun

To avoid context switching, I'll take advantage of the R package V8.  If you are an R user, then these d3.js gists might be helpful d3 nest examples and Mister Nester.
	library(maps)

	unemp <- read.csv('unemployment09.csv',header=FALSE, stringsAsFactors=FALSE,
	col.names=c("blsid", "stfips", "cofips", "name", "year", "pop1", "pop2", "unempraw", "unemppct"))

	unemp$mpname <- tolower(paste(state.name[match(sub("^.*([A-Z][A-Z])$","\\1",unemp$name,fixed=FALSE),state.abb)],
	sub("^(.*) (County\|[Cc]ity\|Parish), ..$","\\1", unemp$name),sep=","))

	unemp$ri <- as.numeric(cut(unemp$unemppct,c(seq(0,10,by=2),max(unemp$unemppct))))
	doInstall <- TRUE # Change to FALSE if you don't want packages installed.
	toInstall <- c("ggplot2", "reshape2", "RColorBrewer")
	if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
	lapply(toInstall, library, character.only = TRUE)

	# Generate a random matrix
	# This can be any type of numeric matrix,
	# though we often see heatmaps of square correlation matrices.
	nRow <- 9
	nCol <- 16
	# Brian Abelson @brianabelson
	# Harmony Institute
	# December 5, 2012

	# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package
	# it fits topic models using latent dirichlet allocation
	# it provides arguments for cleaning the input text and tuning the parameters of the model
	# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data
	# this allows you to easily model outcomes based on the distribution of topics within a collection of texts
	# load the package and data set "Teams"
	install.packages("Lahman")
	library("Lahman")
	data(Teams)
	#
	#
	# CREATE LEAGUE SUMMARY TABLES
	# ============================
	#
	# select a sub-set of teams from 1901 [the establishment of the American League] forward to 2012
	library(rgdal)
	library(dplyr)
	library(readr)
	library(stringi)
	library(stringr)
	library(tidyr)
	library(grid)
	library(scales)
	library(ggplot2)
	library(ggthemes)
	seasonStart	group	total	share	barTops	barBases
1	1992	Other EU	22536	0.024715160408366	0.024715160408366	0
2	1992	non-EU	41531	0.0455469172399649	0.070262077648331	0.024715160408366
3	1992	Ireland	63597	0.0697466301247273	0.140008707773058	0.070262077648331
4	1992	Rest of UK	146838	0.16103677334237	0.301045481115428	0.140008707773058
5	1992	England	637327	0.698954518884572	1	0.301045481115428
6	1993	Other EU	27526	0.0301765248869175	0.0301765248869175	0
7	1993	non-EU	62492	0.0685094599009391	0.0986859847878566	0.0301765248869175
8	1993	Ireland	54388	0.0596251120958247	0.158311096883681	0.0986859847878566
9	1993	Rest of UK	124047	0.135991694494204	0.294302791377885	0.158311096883681
	lut_match <- function(LUT, code_field, desc_col = 2, is_NA = "NA", no_code = "N/A"){
	require(stringr)
	require(splitstackshape)
	# this loops through all unique codes in code_field. Potentially slow
	code_field[which(code_field == "")] <- NA
	unique_code <- unique(code_field)
	coded <- code_field
	for(i in seq_along(unique_code)){
	if(is.na(unique_code[i])){
	coded[is.na(coded)] <- is_NA # set this to whatever works
	---
	title: "Visualizing the Clinton Email Network in R"
	author: "hrbrmstr"
	date: "`r Sys.Date()`"
	output: html_document
	---
	```{r include=FALSE}
	knitr::opts_chunk$set(
	collapse=TRUE,
	comment="#>",