Ryan McAndrews ramcandrews

## iterate and pop dict.py
origDict = {
    'First name': 'Ryan',
    'Last name': 'M',
    'Subject': 'AI',
    'task': 'Cleaning Data'
 }
removedItem = origDict.pop('Last name') # this is normal usage of pop()
print(origDict)
print('value = ' + removedItem)

## pytorch_image_folder_with_file_paths.py
import torch
from torchvision import datasets

class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):

## Google colab load data from google drive
from google.colab import drive
drive.mount('/content/drive/')

data_dir = '/content/drive/My Drive/Colab Notebooks/<your assets>/'

## pytorch chunk for RNN.py
from torch.utils.data import TensorDataset, DataLoader

import torch

# Check for a GPU
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('No GPU found. Please use a GPU to train your neural network.')

def batch_data(words, sequence_length, batch_size):

## download series of files.bash
# this is mor than 100 years of global weather data 110GB
wget https://www.ncei.noaa.gov/data/global-hourly/archive/csv/{1901..2020}.tar.gz

## convert GDB file to SQlite
first create a spatial lite db file. the sqlite file will be more than twice as large as the GDB directory.
ogr2ogr -f SQlite db.sqlite -f OpenFileGDB -overwrite tlgdb_2019_a_us_areawater.gdb

## regex-japanese.txt
Regex for matching ALL Japanese common & uncommon Kanji (4e00 – 9fcf) ~ The Big Kahuna!
([一-龯])

Regex for matching Hirgana or Katakana
([ぁ-んァ-ン])

Regex for matching Non-Hirgana or Non-Katakana
([^ぁ-んァ-ン])

Regex for matching Hirgana or Katakana or basic punctuation (、。’)

## email regex
(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])

## gist:07b86e366bc40f4328fe97b279e853fd
 [^@ \t\r\n]+@[^@ \t\r\n]+\.[^@ \t\r\n]+

 [^@ \\t\\r\\n] matches for anything other than @, space, tab, new lines repetitions of a non-whitespace character.

 https://ihateregex.io/expr/email/ (04/17/2022)


## regexJP.py
import re

with open(rootdir + "something in japanese.html", encoding='utf-8', errors='ignore') as reader:
    for line in reader:
        words = re.findall(r"[一-龯ぁ-んァ-ン！：／・（）ー]*", line)
        for word in words:
            if word:
                print(word)
	origDict = {
	'First name': 'Ryan',
	'Last name': 'M',
	'Subject': 'AI',
	'task': 'Cleaning Data'
	}
	removedItem = origDict.pop('Last name') # this is normal usage of pop()
	print(origDict)
	print('value = ' + removedItem)
	import torch
	from torchvision import datasets

	class ImageFolderWithPaths(datasets.ImageFolder):
	"""Custom dataset that includes image file paths. Extends
	torchvision.datasets.ImageFolder
	"""

	# override the __getitem__ method. this is the method that dataloader calls
	def __getitem__(self, index):
	from google.colab import drive
	drive.mount('/content/drive/')

	data_dir = '/content/drive/My Drive/Colab Notebooks/<your assets>/'
	from torch.utils.data import TensorDataset, DataLoader

	import torch

	# Check for a GPU
	train_on_gpu = torch.cuda.is_available()
	if not train_on_gpu:
	print('No GPU found. Please use a GPU to train your neural network.')

	def batch_data(words, sequence_length, batch_size):
	# this is mor than 100 years of global weather data 110GB
	wget https://www.ncei.noaa.gov/data/global-hourly/archive/csv/{1901..2020}.tar.gz
	first create a spatial lite db file. the sqlite file will be more than twice as large as the GDB directory.
	ogr2ogr -f SQlite db.sqlite -f OpenFileGDB -overwrite tlgdb_2019_a_us_areawater.gdb
	Regex for matching ALL Japanese common & uncommon Kanji (4e00 – 9fcf) ~ The Big Kahuna!
	([一-龯])

	Regex for matching Hirgana or Katakana
	([ぁ-んァ-ン])

	Regex for matching Non-Hirgana or Non-Katakana
	([^ぁ-んァ-ン])

	Regex for matching Hirgana or Katakana or basic punctuation (、。’)
	[^@ \t\r\n]+@[^@ \t\r\n]+\.[^@ \t\r\n]+

	[^@ \\t\\r\\n] matches for anything other than @, space, tab, new lines repetitions of a non-whitespace character.

	https://ihateregex.io/expr/email/ (04/17/2022)
	import re

	with open(rootdir + "something in japanese.html", encoding='utf-8', errors='ignore') as reader:
	for line in reader:
	words = re.findall(r"[一-龯ぁ-んァ-ン！：／・（）ー]*", line)
	for word in words:
	if word:
	print(word)