Skip to content

Instantly share code, notes, and snippets.

View konverner's full-sized avatar

Konstantin Verner konverner

  • Bordeaux, France
View GitHub Profile
@konverner
konverner / spans_to_conll.py
Last active February 5, 2024 12:47
convert spans NER annotation to conll BIO format
import json
from typing import Any, Dict, List
import spacy
from spacy.training.iob_utils import biluo_to_iob, doc_to_biluo_tags
from tqdm import tqdm
def spans_to_conll(
samples: List[Dict[str, Any]],
@konverner
konverner / fix_ner_spans.py
Created February 1, 2024 00:00
fix corrupted spans in NER annotation
def fix_span(text: str, span: dict):
# let us check that spans are correctly extracted
fixed_span = span.copy()
# span starts with a space or a punctuation
while text[fixed_span["start"]] in [" ", ".", ",", ";", ":", "!", "?"]:
fixed_span["start"] += 1
# span is cut in the begging: e.g. "ashington DC"
@konverner
konverner / generate_ssh.sh
Created December 18, 2023 21:06
generate ssh key on linux
mkdir -p ~/.ssh
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
ssh-keygen -t rsa -C <email>
cat ~/.ssh/id_rsa.pub
@konverner
konverner / ols_template.py
Last active November 10, 2023 21:03
OLS method with statsmodels
import numpy as np
import statsmodels.api as sm
X = np.array([[1, 85, 5],
[1, 177, 6],
[1, 100, 9],
[1, 110, 8],
[1, 90, 7.5],
[1, 144, 5.5]])
@konverner
konverner / changePython.sh
Created July 8, 2023 21:20
Change python version in Colab
!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1
!sudo update-alternatives --config python3
!sudo apt install python3-pip
!python3 --version
@konverner
konverner / stacked_barplot.py
Created June 17, 2023 20:42
stacked barplot from dataframe
import pandas as pd
df = pd.DataFrame({'name': ['Store A', 'Store B', 'Store C', 'Store D']
'district': ['I', "II", "I", "III"],
"category": [X, X, Y, Z]
}
)
district_category_pivot_table = df.pivot_table('name', 'district', 'category', aggfunc='count')
@konverner
konverner / wordcloud_pandas.py
Last active June 16, 2023 22:15
create world cloud from dataframe column
from wordcloud import WordCloud
def create_wordcloud_from_column(df, column_name, title, max_token_len=3):
preprocessed_tokens = []
for tokens in df[column_name].str.split().tolist():
try:
for token in tokens:
if len(token) > max_token_len:
preprocessed_tokens.append(token.lower().strip().strip('-').strip('.'))
@konverner
konverner / show_percent_nans.py
Last active June 15, 2023 21:06
show percent of nan values
import pandas as pd
df = ...
pd.options.display.float_format = '{:,.1f}%'.format
print((df.isna().sum()/df.shape[0]).sort_values(ascending=False)*100)
pd.options.display.float_format = '{:,.2f}'.format
@konverner
konverner / mnist_pytorch_pipeline.py
Last active May 28, 2023 22:15
Template for image classification with pytorch on mnist
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from tqdm import tqdm
# Set device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@konverner
konverner / pathlib.py
Created May 7, 2023 00:23
Defining paths with pathlib
from pathlib import Path
DIR = Path.cwd() # work directory
PATH_TEST_DIR = Path(DIR, 'data')
PATH_TEST_FILE = Path( DIR, 'file.csv')