Skip to content

Instantly share code, notes, and snippets.

View macleginn's full-sized avatar

Dmitry Nikolayev macleginn

View GitHub Profile
import pickle
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from import tqdm
with open('sts_attributions/shelf_approx_attr_l-9_N-100.pkl', 'rb') as inp:
shelf_approx = pickle.load(inp)
macleginn /
Last active September 8, 2023 12:50
XSBERT worker process
import os
import sys
import pickle
import requests
import torch
from sentence_transformers import SentenceTransformer
from sentence_transformers.models import Pooling
from sentence_transformers import util
from xsbert import models
macleginn /
Created September 8, 2023 11:44
XSBERT queue server
import json
from http.server import BaseHTTPRequestHandler, HTTPServer
import pandas as pd
hostName = "localhost"
serverPort = 20000
# A global variable to store the queue elements
queue = []
macleginn /
Created October 23, 2019 08:49
An example of an offline Plotly plot created using Python with Plotly.js included once in the <head> section of the page.
import as px
import plotly.offline
template = """
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8"/>
macleginn /
Created December 20, 2022 08:28
Clusterisation of fine-grained CMP domains based on SBERT sentence similarities
from collections import defaultdict
from itertools import combinations
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
def compute_kernel_bias(vecs, k=None):
Code taken from:
import os
import sys
import shutil
def copy_tree(src, dst):
Copy a directory tree from src to dst ignoring dangling
symlinks, retrieving files symlinks point to, and
breaking the cycles, i.e. never copying the same
macleginn /
Last active June 14, 2022 10:38
Export all tables from a MySQL database as .csv files using Python 3
import pymysql
def execute(c, command):
return c.fetchall()
db = pymysql.connect(host='localhost', port=3306, user='root', passwd='', db='mysql') #, charset='utf8')
c = db.cursor()
macleginn /
Last active June 3, 2022 13:54
Training and evaluation code for a simple model that predicts a token removed from a sentence
import json
from math import ceil
from random import shuffle
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel
from transformers import AdamW, get_scheduler
macleginn /
Last active February 11, 2022 12:01
A step in the simulation of random feature spread on a network guided by NPM
import numpy as np
# We're given an n by n distance matrix *D* with transfer
# probabilities for a given pair of nodes (for any feature),
# a feature matrix *M*, and a dropout probability p_d.
# We convert the transfer probabilities to no-transfer probabilities
# and take their logs
L = np.log(1 - D)
# Собираем вместе все возможные знаки пунктуации
import sys
from unicodedata import category
chrs = (chr(i) for i in range(sys.maxunicode + 1))
punctuation = set(c for c in chrs if category(c).startswith("P"))
# Дефис бывает внутри слов
def tokenize(s, lower_case=False):