Skip to content

Instantly share code, notes, and snippets.

'''
A simple python script to figure out which 4 types of attacks will be
super effective to maximum types of pokemons.
Inorder to run this script you will need:
- numpy
- pandas
- pokemon chart available here: https://github.com/zonination/pokemon-chart
Clone the pokemon chart repo and keep it in the current working directory
from torch.utils.data import Dataset
class CustomDataset(Dataset):
# A pytorch dataset class for holding data for a text classification task.
def __init__(self, filename):
'''
Takes as input the name of a file containing sentences with a classification label (comma separated) in each line.
Stores the text data in a member variable X and labels in y
'''
from torch.utils.data import DataLoader
#Create a dataset object
dataset = CutsomDataset('path_to/somefile')
#Wrap it around a dataloader
dataloader = DataLoader(dataset, batch_size = 64, num_workers = 5)
for X, y in dataloader:
print(len(X)) # 64
print(y.shape) # (64,)
###Do something with X and y
###
file = open('path_to/somefile') #Creates an iterator pointing to the first element of a file
for line in file:
print(line)
file.close()
from torch.utils.data import IterableDataset
class CustomIterableDataset(IterableDataset):
def __init__(self, filename):
#Store the filename in object's memory
self.filename = filename
#And that's it, we no longer need to store the contents in the memory
#Creating the iterable dataset object
dataset = CustomIterableDataset('path_to/somefile')
#Creating the dataloader
dataloader = DataLoader(dataset, batch_size = 64)
for data in dataloader:
#Data is a list containing 64 (=batch_size) consecutive lines of the file
print(len(data)) #[64,]
#We still need to separate the text and labels from each other and preprocess the text
def square(x):
return x**2
itr1 = range(5)
for i in itr1:
print(i)
'''
Prints
0
1
class CustomIterableDatasetv1(IterableDataset):
def __init__(self, filename):
#Store the filename in object's memory
self.filename = filename
#And that's it, we no longer need to store the contents in the memory
def preprocess(self, text):
dataset = CustomIterableDatasetv1('path_to/somefile')
dataloader = DataLoader(dataset, batch_size = 64)
for X, y in dataloader:
print(len(X)) # 64
print(y.shape) # (64,)
### Do something with X and y
###