Skip to content

Instantly share code, notes, and snippets.

View snakers4's full-sized avatar
🚀
It is by will alone I set my mind in motion.

Alexander Veysov snakers4

🚀
It is by will alone I set my mind in motion.
View GitHub Profile
@snakers4
snakers4 / pandas_multiprocessing_wrappers.py
Created December 14, 2018 12:07
Pandas multiprocessing wrappers
from tqdm import tqdm
import numpy as np
import pandas as pd
from multiprocessing import Pool
def _apply_df(args):
df, func, num, kwargs = args
return num, df.apply(func, **kwargs)
def apply_by_multiprocessing(df,func,**kwargs):
@snakers4
snakers4 / parse_cc_index.py
Last active September 14, 2023 20:00
Plain common crawl pre-processing
import gc
import gzip
import time
import json
import shutil
import os,sys
import tldextract
import collections
import pandas as pd
from tqdm import tqdm
@snakers4
snakers4 / parse_cc_index.py
Last active October 12, 2022 10:43
Plain scripts to parse Common Crawl
import gc
import gzip
import time
import json
import shutil
import os,sys
import tldextract
import collections
import pandas as pd
from tqdm import tqdm
@snakers4
snakers4 / process_wikipedia.py
Last active January 4, 2023 22:19
Post process wikipedia files produced by wikiextractor
import os
import re
import sys
import glob
import nltk
import gensim
import numpy as np
import pandas as pd
from tqdm import tqdm
from uuid import uuid4
@snakers4
snakers4 / calculate_knn.py
Created August 26, 2018 12:00
Use faiss to calculate a KNN graph on data
import gc
import tqdm
import faiss
import bcolz
import os,sys
import numpy as np
from tqdm import tqdm
# open the stored bcolz array
# note that these vectors have to be 280 dimensional
@snakers4
snakers4 / Loss.py
Created July 21, 2018 11:02
Multi class classification focal loss
import torch
import torch.nn as nn
import torch.nn.functional as F
# Focal loss implementation inspired by
# https://github.com/c0nn3r/RetinaNet/blob/master/focal_loss.py
# https://github.com/doiken23/pytorch_toolbox/blob/master/focalloss2d.py
class MultiClassBCELoss(nn.Module):
def __init__(self,
use_weight_mask=False,
@snakers4
snakers4 / train.sh
Created July 7, 2018 11:34
VAE explanation bits
python3 train.py \
--epochs 30 --batch-size 512 --seed 42 \
--model_type fc_conv --dataset_type fmnist --latent_space_size 10 \
--do_augs False \
--lr 1e-3 --m1 40 --m2 50 \
--optimizer adam \
--do_running_mean False --img_loss_weight 1.0 --kl_loss_weight 1.0 \
--image_loss_type bce --ssim_window_size 5 \
--print-freq 10 \
--lognumber fmnist_fc_conv_l10_rebalance_no_norm \
@snakers4
snakers4 / Dockerfile
Created July 3, 2018 07:13
My PyTorch 0.4 Dockerfile
# add 7z tar and zip archivers
FROM nvidia/cuda:9.0-cudnn7-devel
# https://docs.docker.com/engine/examples/running_ssh_service/
RUN apt-get update && apt-get install -y openssh-server
RUN mkdir /var/run/sshd
RUN echo 'root:Ubuntu@41' | chpasswd
RUN sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
RUN mkdir ~/.ssh/
@snakers4
snakers4 / AttackerLoss.py
Created June 4, 2018 17:19
Loss for adversarial attacks
import torch
import torch.nn as nn
import torch.nn.functional as F
from utils.PyTorchSSIM import SSIM as SSIMLoss
class AttackerLoss(nn.Module):
def __init__(self,
gamma=0.9,
use_running_mean=False,
@snakers4
snakers4 / Dockerfile
Created June 4, 2018 10:25
Atmyra Dockerfile
# add 7z tar and zip archivers
FROM nvidia/cuda:9.0-cudnn7-devel
# https://docs.docker.com/engine/examples/running_ssh_service/
RUN apt-get update && apt-get install -y openssh-server
RUN mkdir /var/run/sshd
RUN echo 'root:Ubuntu@41' | chpasswd
RUN sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
RUN mkdir ~/.ssh/