Skip to content

Instantly share code, notes, and snippets.

@gautierdag
gautierdag / model_with_noam.py
Last active September 25, 2023 02:27
pytorch-lightning Transformer (noam) lr policy
import torch
import pytorch_lightning as pl
class MyTransformer(pl.LightningModule):
def __init__(
self,
learning_rate=0.001,
warmup=4000,
):
self.learning_rate = learning_rate
@j-adamczyk
j-adamczyk / kmeans_with_faiss.py
Last active May 31, 2022 01:03
K-Means clustring with faiss library
import faiss
import numpy as np
class FaissKMeans:
def __init__(self, n_clusters=8, n_init=10, max_iter=300):
self.n_clusters = n_clusters
self.n_init = n_init
self.max_iter = max_iter
self.kmeans = None
@subhadarship
subhadarship / collate_fn_example.py
Created February 27, 2020 04:19
collate_fn for PyTorch DataLoader
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
class MyDataset(Dataset):
def __init__(self):
x = np.random.rand(1000, 3) # 1000 3-dim samples
self.x = [x[i].tolist() for i in range(1000)]
y = np.random.randint(low=0, high=2, size=(1000,))
@SuperShinyEyes
SuperShinyEyes / f1_score.py
Created October 15, 2019 10:16
F1 score in PyTorch
def f1_loss(y_true:torch.Tensor, y_pred:torch.Tensor, is_training=False) -> torch.Tensor:
'''Calculate F1 score. Can work with gpu tensors
The original implmentation is written by Michal Haltuf on Kaggle.
Returns
-------
torch.Tensor
`ndim` == 1. 0 <= val <= 1
@tvst
tvst / SessionState.py
Last active April 14, 2024 20:24
DO NOT USE!!! Try st.session_state instead.
"""Hack to add per-session state to Streamlit.
Usage
-----
>>> import SessionState
>>>
>>> session_state = SessionState.get(user_name='', favorite_color='black')
>>> session_state.user_name
''
@nlothian
nlothian / fasttext_to_tensorboard.py
Created November 23, 2017 23:28
Convert a FastText model to a form suitable for viewing in Tensorboard
from tensorflow.contrib.tensorboard.plugins import projector
import tensorflow as tf
import numpy as np
import os
meta_file = "g2x_metadata.tsv"
output_path = "./projections"
# read embedding file into list and get the size
with open('./ft_model.vec', 'r') as embedding_file:
@lorey
lorey / markdown_to_text.py
Last active April 8, 2024 03:25
Markdown to Plaintext in Python
from bs4 import BeautifulSoup
from markdown import markdown
import re
def markdown_to_text(markdown_string):
""" Converts a markdown string to plaintext """
# md -> html -> text since BeautifulSoup can extract text cleanly
html = markdown(markdown_string)
@napoler
napoler / 4GB-4Core-VPS-my.cnf
Last active January 22, 2016 19:54 — forked from lukebranch/4GB-4Core-VPS-my.cnf
/etc/my.cnf config for 4GB RAM 4 Core VPS - Wordpress
# Generated by Percona Configuration Wizard (http://tools.percona.com/) version REL5-20120208
# Configuration name server-1 generated for Luke at 2014-10-22 09:47:40
[mysql]
# CLIENT #
port = 3306
socket = /var/lib/mysql/mysql.sock
[mysqld]
@mcobzarenco
mcobzarenco / wikidata.py
Created July 7, 2015 17:29
Parse wikidata dump in Python
#!/usr/bin/env python
from __future__ import print_function, division
import argparse
import json
import sys
from gzip import GzipFile
def concat_claims(claims):
for rel_id, rel_claims in claims.iteritems():
@jayswan
jayswan / gist:a8d9920ef74516a02fe1
Last active March 11, 2022 15:33
Elasticsearch Python bulk index API example
>>> import itertools
>>> import string
>>> from elasticsearch import Elasticsearch,helpers
es = Elasticsearch()
>>> # k is a generator expression that produces
... # a series of dictionaries containing test data.
... # The test data are just letter permutations
... # created with itertools.permutations.
... #
... # We then reference k as the iterator that's