Skip to content

Instantly share code, notes, and snippets.

View HaritzPuerto's full-sized avatar

Haritz Puerto HaritzPuerto

View GitHub Profile
import zstandard as zstd
import json
from tqdm.notebook import tqdm
from datasets import load_dataset
pile_path = "the_pile/train/00.jsonl.zst"
arxiv = []
with zstd.open(pile_path, 'r') as f:
for i, line in enumerate(tqdm(f)):
@HaritzPuerto
HaritzPuerto / confusion_matrix_between_two_pytorch_tensors.py
Created June 6, 2020 06:43 — forked from the-bass/confusion_matrix_between_two_pytorch_tensors.py
Calculating the confusion matrix between two PyTorch tensors (a batch of predictions) - Last tested with PyTorch 0.4.1
import torch
def confusion(prediction, truth):
""" Returns the confusion matrix for the values in the `prediction` and `truth`
tensors, i.e. the amount of positions where the values of `prediction`
and `truth` are
- 1 and 1 (True Positive)
- 1 and 0 (False Positive)
- 0 and 0 (True Negative)
@HaritzPuerto
HaritzPuerto / pyLDAvisException.py
Last active September 10, 2016 15:10
Code to reproduce an exception arisen in pyLDAvis code.
#First, retrieve documents
setDocs1 = []
allDocuments = []
for file_name in os.listdir("/home/vagrant/shared/Test/1"):
file = codecs.open("/home/vagrant/shared/Test/1/" + file_name, "r", "utf-8")
aux = file.read()
setDocs1.append(aux)
allDocuments.append(aux)