Allen Nie windweller

## GCloud_PyTorch_cuda_startup.sh
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh

# CUDA installation
vim cuda.sh

wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin
sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-ubuntu1604-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu1604-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb
sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub

## geo_checker_in_text.py
import urllib.request
import csv
from collections import defaultdict
import nltk

class GeoExtractor(object):
    def __init__(self):
        self.zipcode_to_state = {}
        self.statenames_to_state = {}
        self.countynames_to_states = defaultdict(set)

## split_image_4_quarters.py
from PIL import Image
import os
import glob
import numpy as np


def crop(im, height, width):
    # im = Image.open(infile)
    imgwidth, imgheight = im.size
    rows = np.int(imgheight/height)

## index_to_array.py
###########################################
# serialization of indexes to byte arrays
###########################################

def serialize_index(index):
    """ convert an index to a numpy uint8 array  """
    writer = faiss.VectorIOWriter()
    faiss.write_index(index, writer)
    return faiss.vector_to_array(writer.data)

## latex_to_csv.py
"""
We write a way to convert LaTex to CSV
"""
import csv
import re

def to_csv(latex_text, file_name):
    """We learn to parse the text.
    We assume the very first line tells the format of the table!


## AmortizedRSA.py
from collections import defaultdict

utterances = ["blue", "green", "square", "circle"]
objects = ['blue square', 'blue circle', 'green square']

def meaning(utt, obj):
  return int(utt in obj)

def normalize(space):
  denom = sum(space.values())

## preventable_admission_categories_icd9_v5_2015.py
# PQI V5.0 does not really have lower-level codes...
# PQI 05 and PQI 15 are actually different (contain different codes), but must use AGE as a differentiator
codes = {
  "PQI 01 Diabetes Short-term Complications Admission Rate": {
    "Diabetes short-term complications diagnosis codes": ["25010","25022","25011","25023","25012","25030","25013","25031","25020","25032","25021","25033"]
  },
  "PQI 02 Perforated Appendix Admission Rate": {
    "Perforations or abscesses of appendix diagnosis codes": ["5400", "5401"],
    "Appendicitis diagnosis codes": ["5400", "5401", "5409", "541"]
  },

## preventable_admission_categories_icd10_v2018.py
# PQI 05 and PQI 15 is age-based. PQI 15 has age 18 through 39 years. PQI 05 has age 40 years and older.

codes = {
  "PQI 01 Diabetes Short-term Complications Admission Rate": {
  "Diabetes short-term complications diagnosis codes: (ACDIASD)":
  ["E1010", "E1011", "E10641", "E1100", "E1101", "E11641", "E1110", "E1111"]
  },
  "PQI 02 Perforated Appendix Admission Rate": {
    "Perforations or abscesses of appendix diagnosis codes: (ACSAPPD)":
    ["K3580", "K3589", "K37"],

## pytorch-regression.py
# Traditional python regression packages like sklearn and statsmodel can't handle number of examples as large as >1M
# or when the feature space
# Currently this method uses mini-batch gradient optimization method (Adam)

# We also have a NullLogit model that only has intercept (used to compute pseudo R-squred for Logit model)

import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
import torch.nn as nn
from scipy.spatial.distance import cosine

## report-bleu.py
import argparse

"""
python report_bleu.py --target_file /mnt/fs5/anie/OpenNMT-py/data/twitter/twitter-seq2seq-2019mar3-clean-tgt-test.txt \
--generated_file /mnt/fs5/anie/OpenNMT-py/save/twitter_transformer_clean_char/twitter_test_mar4_step50000_greedy_word_level.txt \
--base_dir /home/anie/OpenNMT-py
"""

parser = argparse.ArgumentParser(description='Clean Seq2Seq data')
parser.add_argument('--target_file', type=str, help="target evaluation file")
	wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh

	# CUDA installation
	vim cuda.sh

	wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin
	sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600
	wget https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-ubuntu1604-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb
	sudo dpkg -i cuda-repo-ubuntu1604-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb
	sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub
	import urllib.request
	import csv
	from collections import defaultdict
	import nltk

	class GeoExtractor(object):
	def __init__(self):
	self.zipcode_to_state = {}
	self.statenames_to_state = {}
	self.countynames_to_states = defaultdict(set)
	from PIL import Image
	import os
	import glob
	import numpy as np


	def crop(im, height, width):
	# im = Image.open(infile)
	imgwidth, imgheight = im.size
	rows = np.int(imgheight/height)
	###########################################
	# serialization of indexes to byte arrays
	###########################################

	def serialize_index(index):
	""" convert an index to a numpy uint8 array """
	writer = faiss.VectorIOWriter()
	faiss.write_index(index, writer)
	return faiss.vector_to_array(writer.data)
	"""
	We write a way to convert LaTex to CSV
	"""
	import csv
	import re

	def to_csv(latex_text, file_name):
	"""We learn to parse the text.
	We assume the very first line tells the format of the table!
	from collections import defaultdict

	utterances = ["blue", "green", "square", "circle"]
	objects = ['blue square', 'blue circle', 'green square']

	def meaning(utt, obj):
	return int(utt in obj)

	def normalize(space):
	denom = sum(space.values())
	# PQI V5.0 does not really have lower-level codes...
	# PQI 05 and PQI 15 are actually different (contain different codes), but must use AGE as a differentiator
	codes = {
	"PQI 01 Diabetes Short-term Complications Admission Rate": {
	"Diabetes short-term complications diagnosis codes": ["25010","25022","25011","25023","25012","25030","25013","25031","25020","25032","25021","25033"]
	},
	"PQI 02 Perforated Appendix Admission Rate": {
	"Perforations or abscesses of appendix diagnosis codes": ["5400", "5401"],
	"Appendicitis diagnosis codes": ["5400", "5401", "5409", "541"]
	},
	# PQI 05 and PQI 15 is age-based. PQI 15 has age 18 through 39 years. PQI 05 has age 40 years and older.

	codes = {
	"PQI 01 Diabetes Short-term Complications Admission Rate": {
	"Diabetes short-term complications diagnosis codes: (ACDIASD)":
	["E1010", "E1011", "E10641", "E1100", "E1101", "E11641", "E1110", "E1111"]
	},
	"PQI 02 Perforated Appendix Admission Rate": {
	"Perforations or abscesses of appendix diagnosis codes: (ACSAPPD)":
	["K3580", "K3589", "K37"],
	# Traditional python regression packages like sklearn and statsmodel can't handle number of examples as large as >1M
	# or when the feature space
	# Currently this method uses mini-batch gradient optimization method (Adam)

	# We also have a NullLogit model that only has intercept (used to compute pseudo R-squred for Logit model)

	import torch
	from torch.utils.data import TensorDataset, DataLoader, RandomSampler
	import torch.nn as nn
	from scipy.spatial.distance import cosine
	import argparse

	"""
	python report_bleu.py --target_file /mnt/fs5/anie/OpenNMT-py/data/twitter/twitter-seq2seq-2019mar3-clean-tgt-test.txt \
	--generated_file /mnt/fs5/anie/OpenNMT-py/save/twitter_transformer_clean_char/twitter_test_mar4_step50000_greedy_word_level.txt \
	--base_dir /home/anie/OpenNMT-py
	"""

	parser = argparse.ArgumentParser(description='Clean Seq2Seq data')
	parser.add_argument('--target_file', type=str, help="target evaluation file")