vapniks

## fixef_of_time_invariant_vars.R
## Function to extract fixed effects from plm or fixest model, and match with values of time invariant variables.
## Arguments:
##  model = a plm or fixest fixed effects model object
##  factors = either a data.frame/pdata.frame/data.table object containing time-invariant variables and a panel index variable,
##            or in the case that the model arg is a plm object, factors can be a vector of names of time-invariant variables
##            that will be extracted from the model object (along with the panel index).
##  idvar = the name of the panel index variable; by default this will be set to the name of the model index if model is
##          a plm object, or the name of the factors index if factors is a pdata.frame index. Otherwise if model is a fixest
##          object then idvar must specify a variable in factors (a data.frame object).
## For example of usage see code after this function.

## gist:77b22cb68566820ce310bc81ef46c9b3
REGRESSION TEST:
  ./test/cases
Using executable: ./mlr

PASS ./test/cases/auxents
FAIL ./test/cases/chaining
PASS ./test/cases/cli-csv-rs-environment-defaults
PASS ./test/cases/cli-getoptish
PASS ./test/cases/cli-help
PASS ./test/cases/cli-mfrom

## reshape_world-bank_data.do
/* This file reshapes world bank data that has been downloaded into stata using the wbopendata command. */
/* It will reshape from wide to long format that can be used in panel data models */

/* First read the file into Stata, e.g: */
/* (replace this command if necessary) */
/* wbopendata, topics(2 - Aid Effectiveness) clear  */
use

/* and then, issue the following commands */
drop indicatorcode /* replace this with the name of the appropriate variable */

## csv_load_db.pl
#!/usr/bin/perl

use DBI;

#example running
#csv_load_db.pl Changed.csv pat_regan_combined create_table.sql

$host="localhost";
$port=5433;
$db="geopolitical";

## reshape_freedom-house_data.R
# convert wide-form Freedom House Good Governance data to long-form

# Read the data
wide <- read.csv("freedom_house_good_governance.csv")

# Get the columns corresponding to each wide-form variable that will be converted to long form.
PRvars <- names(wide)[(1:40)*3-1]
CLvars <- names(wide)[(1:40)*3]
Statusvars <- names(wide)[(1:40)*3+1]
# Get the times associated with the wide-form variables

## check_csv.awk
#!/usr/bin/awk -f

BEGIN {FS=","}
$1 !~ /^[0-9]+$/ {print "Line "NR": Field 1 invalid"}
$2 !~ /^"?[a-zA-Z][^,]+"?$/ {print "Line "NR": Field 2 invalid"}
$3 !~ /^[0-9.]+$/ {print "Line "NR": Field 3 invalid"}
$4 !~ /[0-9]+/ {print "Line "NR": Field 4 invalid"}
$5 !~ /[0-9](\.[0-9])? - [0-9](\.[0-9])?/ {print "Line "NR": Field 5 invalid"}
/^$/ { print "Line "NR" is empty" }

## geocode_examples.R
# Some examples of how to use geocode data of different formats.
## load libraries
library("magrittr")
library("eurostat")
library("eurostat")
library("ggplot2")
library("countrycode")
library("rgdal")
library("colorbrewer")
## plotting NUTS shape files

## eurovision_voting_analysis.R
## Analysis of voting patterns for 2016 Eurovision song contest

library(clusterfly)
library(igraph)
library(magrittr)

## load the voting data
votedata <- read.csv("eurovision-votes_2016.csv")
## remove total votes column
votedata$total_votes <- NULL

## extract_law-enforcement_data.el
;; This file contains an example of how to extract data from pdf files using `extract-text-from-files'
;; It extracts state-by-state data on total number of law enforcement employees from pdf files
;; downloaded from the FBI website.
;; There is a lot more data available in these files, but I only need total employees for now.

;; PDF files must first be downloaded from these URLs:

;; https://www.fbi.gov/about-us/cjis/ucr/crime-in-the-u.s/1995/95sec6.pdf
;; https://www.fbi.gov/about-us/cjis/ucr/crime-in-the-u.s/1996/96sec6.pdf
;; https://www.fbi.gov/about-us/cjis/ucr/crime-in-the-u.s/1997/97sec6.pdf

## reshape_UK_crime_data.awk
#!/usr/bin/awk -f

# Create variables containing counts of the number of different incident types within each area,
# where an area is defined as a unique longitude/latitude pair to the nearest 2 decimal places

BEGIN {
    # Define csv fields
    FPAT="\"[^\"]*\"|[^\",]*";
    PROCINFO["sorted_in"] = "@ind_str_asc";
}
	## Function to extract fixed effects from plm or fixest model, and match with values of time invariant variables.
	## Arguments:
	## model = a plm or fixest fixed effects model object
	## factors = either a data.frame/pdata.frame/data.table object containing time-invariant variables and a panel index variable,
	## or in the case that the model arg is a plm object, factors can be a vector of names of time-invariant variables
	## that will be extracted from the model object (along with the panel index).
	## idvar = the name of the panel index variable; by default this will be set to the name of the model index if model is
	## a plm object, or the name of the factors index if factors is a pdata.frame index. Otherwise if model is a fixest
	## object then idvar must specify a variable in factors (a data.frame object).
	## For example of usage see code after this function.
	REGRESSION TEST:
	./test/cases
	Using executable: ./mlr

	PASS ./test/cases/auxents
	FAIL ./test/cases/chaining
	PASS ./test/cases/cli-csv-rs-environment-defaults
	PASS ./test/cases/cli-getoptish
	PASS ./test/cases/cli-help
	PASS ./test/cases/cli-mfrom
	/* This file reshapes world bank data that has been downloaded into stata using the wbopendata command. */
	/* It will reshape from wide to long format that can be used in panel data models */

	/* First read the file into Stata, e.g: */
	/* (replace this command if necessary) */
	/* wbopendata, topics(2 - Aid Effectiveness) clear */
	use

	/* and then, issue the following commands */
	drop indicatorcode /* replace this with the name of the appropriate variable */
	#!/usr/bin/perl

	use DBI;

	#example running
	#csv_load_db.pl Changed.csv pat_regan_combined create_table.sql

	$host="localhost";
	$port=5433;
	$db="geopolitical";
	# convert wide-form Freedom House Good Governance data to long-form

	# Read the data
	wide <- read.csv("freedom_house_good_governance.csv")

	# Get the columns corresponding to each wide-form variable that will be converted to long form.
	PRvars <- names(wide)[(1:40)*3-1]
	CLvars <- names(wide)[(1:40)*3]
	Statusvars <- names(wide)[(1:40)*3+1]
	# Get the times associated with the wide-form variables
	#!/usr/bin/awk -f

	BEGIN {FS=","}
	$1 !~ /^[0-9]+$/ {print "Line "NR": Field 1 invalid"}
	$2 !~ /^"?[a-zA-Z][^,]+"?$/ {print "Line "NR": Field 2 invalid"}
	$3 !~ /^[0-9.]+$/ {print "Line "NR": Field 3 invalid"}
	$4 !~ /[0-9]+/ {print "Line "NR": Field 4 invalid"}
	$5 !~ /[0-9](\.[0-9])? - [0-9](\.[0-9])?/ {print "Line "NR": Field 5 invalid"}
	/^$/ { print "Line "NR" is empty" }
	# Some examples of how to use geocode data of different formats.
	## load libraries
	library("magrittr")
	library("eurostat")
	library("eurostat")
	library("ggplot2")
	library("countrycode")
	library("rgdal")
	library("colorbrewer")
	## plotting NUTS shape files
	## Analysis of voting patterns for 2016 Eurovision song contest

	library(clusterfly)
	library(igraph)
	library(magrittr)

	## load the voting data
	votedata <- read.csv("eurovision-votes_2016.csv")
	## remove total votes column
	votedata$total_votes <- NULL
	;; This file contains an example of how to extract data from pdf files using `extract-text-from-files'
	;; It extracts state-by-state data on total number of law enforcement employees from pdf files
	;; downloaded from the FBI website.
	;; There is a lot more data available in these files, but I only need total employees for now.

	;; PDF files must first be downloaded from these URLs:

	;; https://www.fbi.gov/about-us/cjis/ucr/crime-in-the-u.s/1995/95sec6.pdf
	;; https://www.fbi.gov/about-us/cjis/ucr/crime-in-the-u.s/1996/96sec6.pdf
	;; https://www.fbi.gov/about-us/cjis/ucr/crime-in-the-u.s/1997/97sec6.pdf
	#!/usr/bin/awk -f

	# Create variables containing counts of the number of different incident types within each area,
	# where an area is defined as a unique longitude/latitude pair to the nearest 2 decimal places

	BEGIN {
	# Define csv fields
	FPAT="\"[^\"]\"\|[^\",]";
	PROCINFO["sorted_in"] = "@ind_str_asc";
	}