Nick Doiron mapmeld

## dv-wave.py
from simpletransformers.classification import ClassificationModel

# set use_cuda=False on CPU-only platforms
model = ClassificationModel('bert', 'monsoon-nlp/dv-wave', num_labels=8, use_cuda=True, args={
    'reprocess_input_data': True,
    'use_cached_eval_features': False,
    'overwrite_output_dir': True,
    'num_train_epochs': 3,
    'silent': True
})

## add_to_shapefile.py
# pip install gdal
import json
from osgeo import ogr

# depends on your shapefile
target_shapefile = 'tl_2010_sample_shapefile.shp'
fips_id = 'GEOID10'

saveblocks = json.loads(open('savefile.json', 'r').read())

## load_acs.py
# pip install requests
import time, json
import requests

api_key = "API_KEY_STRING"

# look up FIPS for state and county:
# https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697
state = '12'
county_fips = ['086']

## links.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mapmeld
                / links.md
            
            
              Last active
              May 13, 2020 04:19
            
              
                References and links for Spanish counterfactuals
              
          
    Related Research

Towards Debiasing Sentence Representations (Liang et al., 2020)
English BERT and ELMO

https://cs.cmu.edu/~pliang/papers/acl2020_debiasing.pdf
https://github.com/pliang279/sent_debias

Measuring Bias in Contextualized Word Representations
(Kurita et al., 2019) analyzes English BERT with word-association
https://aclweb.org/anthology/W19-3823/

  
## AutoKeras_image_regression.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mapmeld
                / AutoKeras_image_regression.ipynb
            
            
              Created
              April 28, 2020 21:22
            
              
                AutoKeras Image Regression
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## yolo.py
# -*- coding: utf-8 -*-
"""
Class definition of YOLO_v3 style detection model on image and video
"""

import colorsys
import os
from timeit import default_timer as timer

import numpy as np

## Baby-Hindi-Model.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mapmeld
                / Baby-Hindi-Model.md
            
            
              Last active
              April 26, 2020 20:44
            
          
    Releasing Hindi ELECTRA model

This is a first attempt at a Hindi language model trained with Google Research's ELECTRA.  I don't modify ELECTRA until we get into finetuning, and only then because there's hardcoded train and test files
CoLab: https://colab.research.google.com/drive/1R8TciRSM7BONJRBc9CBZbzOmz39FTLl_
Additional background: https://medium.com/@mapmeld/teaching-hindi-to-electra-b11084baab81
It's available on HuggingFace: https://huggingface.co/monsoon-nlp/hindi-bert - sample usage: https://colab.research.google.com/drive/1mSeeSfVSOT7e-dVhPlmSsQRvpn6xC05w

  
## download_glue_data.py
''' Script for downloading all GLUE data.

Note: for legal reasons, we are unable to host MRPC.
You can either use the version hosted by the SentEval team, which is already tokenized,
or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually.
For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example).
You should then rename and place specific files in a folder (see below for an example).

mkdir MRPC
cabextract MSRParaphraseCorpus.msi -d MRPC

## issues.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mapmeld
                / issues.md
            
            
              Last active
              February 28, 2020 22:17
            
              
                Nevada delegate issues
              
          
    Assuming the final delegate counts and viability number are correct
Unusual


Carson City 107: extra delegate, Biden's 2nd
Carson City 407: delegate should have been added to Biden, not Klobuchar
Clark 1621: needs to add 1 leftover delegate each to Buttigieg and Sanders
Clark 1642: unclear, assigned too many delegates instead of a +1 to Sanders
Clark 1643: removed Klobuchar's 1 delegate to match expected delegates, even though viable; all had 1 delegate
Clark 1645: removed Warren's 1 delegate though viable


## calc_districtr_plans.py
# calculate number of plans, by state
import json

plans = open('districtr_full_export.json', 'r').read().strip().split("\n")
places = {}
for raw in plans:
    plan = json.loads(raw)
    if ("plan" in plan) and ("placeId" in plan["plan"]):
        place = plan["plan"]["placeId"]
        if place in places:
	from simpletransformers.classification import ClassificationModel

	# set use_cuda=False on CPU-only platforms
	model = ClassificationModel('bert', 'monsoon-nlp/dv-wave', num_labels=8, use_cuda=True, args={
	'reprocess_input_data': True,
	'use_cached_eval_features': False,
	'overwrite_output_dir': True,
	'num_train_epochs': 3,
	'silent': True
	})
	# pip install gdal
	import json
	from osgeo import ogr

	# depends on your shapefile
	target_shapefile = 'tl_2010_sample_shapefile.shp'
	fips_id = 'GEOID10'

	saveblocks = json.loads(open('savefile.json', 'r').read())
	# pip install requests
	import time, json
	import requests

	api_key = "API_KEY_STRING"

	# look up FIPS for state and county:
	# https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697
	state = '12'
	county_fips = ['086']
	# -- coding: utf-8 --
	"""
	Class definition of YOLO_v3 style detection model on image and video
	"""

	import colorsys
	import os
	from timeit import default_timer as timer

	import numpy as np
	''' Script for downloading all GLUE data.

	Note: for legal reasons, we are unable to host MRPC.
	You can either use the version hosted by the SentEval team, which is already tokenized,
	or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually.
	For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example).
	You should then rename and place specific files in a folder (see below for an example).

	mkdir MRPC
	cabextract MSRParaphraseCorpus.msi -d MRPC
	# calculate number of plans, by state
	import json

	plans = open('districtr_full_export.json', 'r').read().strip().split("\n")
	places = {}
	for raw in plans:
	plan = json.loads(raw)
	if ("plan" in plan) and ("placeId" in plan["plan"]):
	place = plan["plan"]["placeId"]
	if place in places: