Skip to content

Instantly share code, notes, and snippets.

View konverner's full-sized avatar

Konstantin Verner konverner

  • Bordeaux, France
View GitHub Profile
@konverner
konverner / preprocessing.py
Last active November 11, 2022 11:57
text preprocessing
from gensim.parsing.preprocessing import preprocess_string
from gensim.utils import any2unicode
def preprocess(text: str) -> list:
text = any2unicode(text).lower()
text = preprocess_string(text)
return text
@konverner
konverner / mode.py
Last active November 17, 2022 22:29
one line mode in python
arr = [2, 2, 1, 1, 1, -1, -1, 0]
sorted([(i,arr.count(i)) for i in set(arr)], key=lambda x: x[1], reverse=True)[0][0]
@konverner
konverner / convolution.py
Created November 22, 2022 00:30
one-dimension convolution in numpy
import numpy as np
class Conv1d:
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1):
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.kernel = np.random.uniform(0, 1, size=(out_channels, in_channels, kernel_size))
@konverner
konverner / correlation.py
Created November 22, 2022 00:33
one dimension correlation in numpy
import numpy as np
class Corr1d:
def __init__(self, in_channels, out_channels, kernel_size, stride=1):
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.kernel = np.random.uniform(0, 1, size=(out_channels, in_channels, kernel_size))
@konverner
konverner / bert_embs.py
Created December 7, 2022 14:06
get embeddings from bert model
from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained("bert-base-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
embs = output.last_hidden_state
@konverner
konverner / create_ner_dataset.py
Created December 11, 2022 00:57
convert conll2003 format for NER annotation into dataset
"""
A O
spokesman O
for O
Israel B-ORG
civil I-ORG
administration I-ORG
Samuel B-PER
Graham I-PER
@konverner
konverner / color_gradient.py
Created January 8, 2023 15:32
it creates a color gradient of two colors, i.e. a list of hex-color strings from c1 to c2
import numpy as np
import matplotlib
def colorGrdient(c1: str, c2: str, n: int):
"""
c1 : color FROM (e.g. '#FFCDD2')
c2 : color TO (e.g. '#BBDEFB')
"""
c1=np.array(matplotlib.colors.to_rgb(c1))
c2=np.array(matplotlib.colors.to_rgb(c2))
@konverner
konverner / floats_formatting.py
Created January 20, 2023 22:01
alignment and precision in python string formatting
# 12 space symbols to left and 2 numbers after a decimal point
print("{:>12.2f} {:>12.2f}".format(1.32342, 1.9121))
@konverner
konverner / imshow_tensor_batch.py
Created January 24, 2023 13:46
display images from batch of torch tensors
def imshow_tensor_batch(batch):
"""
batch: torch.tesnor [batch_size, c, h, w]
"""
batch_size = batch.shape[0]
fig, axis = plt.subplots(1, batch_size)
if batch_size == 1:
axis.imshow((batch[0] * 255).permute(2, 1, 0).permute(1, 0, 2).detach().cpu().numpy().astype(np.uint8))
else:
@konverner
konverner / img2tensor.py
Created January 26, 2023 23:53
convert image file into torch tensor
import numpy as np
import torch
from PIL import Image
def img2tensor(img_path: str):
"""
img_path : path to the image to convert
---
t : normalized tensor [1, c, h, w]