Dmitry Nikolayev macleginn

## sumpos_test.py
import pickle
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm

with open('sts_attributions/shelf_approx_attr_l-9_N-100.pkl', 'rb') as inp:
    shelf_approx = pickle.load(inp)

## xsbert_worker_process.py
import os
import sys
import pickle
import requests
import torch
from sentence_transformers import SentenceTransformer
from sentence_transformers.models import Pooling
from sentence_transformers import util
from xsbert import models

## xsbert_queue_server.py
import json
from http.server import BaseHTTPRequestHandler, HTTPServer
import pandas as pd

hostName = "localhost"
serverPort = 20000
# A global variable to store the queue elements
queue = []


## clusterise_domain.py
from collections import defaultdict
from itertools import combinations
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util


def compute_kernel_bias(vecs, k=None):
    """
    Code taken from: https://github.com/bojone/BERT-whitening

## copy_dir.py
import os
import sys
import shutil


def copy_tree(src, dst):
    '''
    Copy a directory tree from src to dst ignoring dangling
    symlinks, retrieving files symlinks point to, and
    breaking the cycles, i.e. never copying the same

## predict_from_CLS.py
import json
from math import ceil
from random import shuffle

import torch
import torch.nn as nn

from transformers import AutoTokenizer, AutoModel
from transformers import AdamW, get_scheduler

## simulation_step.py
import numpy as np

# We're given an n by n distance matrix *D* with transfer
# probabilities for a given pair of nodes (for any feature),
# a feature matrix *M*, and a dropout probability p_d.

# We convert the transfer probabilities to no-transfer probabilities
# and take their logs
L = np.log(1 - D)

## get_n_grams.py
# Собираем вместе все возможные знаки пунктуации
import sys
from unicodedata import category
chrs = (chr(i) for i in range(sys.maxunicode + 1))
punctuation = set(c for c in chrs if category(c).startswith("P"))
# Дефис бывает внутри слов
punctuation.remove('-')


def tokenize(s, lower_case=False):

## get_roberta_word_embeddings.py
def rm_whitespace(s):
    if s.startswith('Ġ'):
        return s[1:]
    else:
        return s


def get_tokens_with_ranges(input_string, tokenizer):
    '''
    RoBERTa prepends 'Ġ' to the beginning of what it

## plot_spectra.py
import pandas as pd
import matplotlib.pyplot as plt

d = pd.read_excel('spectrograms-relative-20.xlsx', header=None)
# Combine the first two columns in a new index
index_col = [ f'{a}-{b}' for a, b in zip(d.iloc[:,0], d.iloc[:,1]) ]
d.index = index_col
# Delete old index columns
del d[0]
del d[1]
	import pickle
	import numpy as np
	import pandas as pd
	import torch
	import matplotlib.pyplot as plt
	import seaborn as sns
	from tqdm.auto import tqdm

	with open('sts_attributions/shelf_approx_attr_l-9_N-100.pkl', 'rb') as inp:
	shelf_approx = pickle.load(inp)
	import os
	import sys
	import pickle
	import requests
	import torch
	from sentence_transformers import SentenceTransformer
	from sentence_transformers.models import Pooling
	from sentence_transformers import util
	from xsbert import models
	import json
	from http.server import BaseHTTPRequestHandler, HTTPServer
	import pandas as pd

	hostName = "localhost"
	serverPort = 20000
	# A global variable to store the queue elements
	queue = []
	from collections import defaultdict
	from itertools import combinations
	import pandas as pd
	import numpy as np
	from sentence_transformers import SentenceTransformer, util


	def compute_kernel_bias(vecs, k=None):
	"""
	Code taken from: https://github.com/bojone/BERT-whitening
	import os
	import sys
	import shutil


	def copy_tree(src, dst):
	'''
	Copy a directory tree from src to dst ignoring dangling
	symlinks, retrieving files symlinks point to, and
	breaking the cycles, i.e. never copying the same
	import json
	from math import ceil
	from random import shuffle

	import torch
	import torch.nn as nn

	from transformers import AutoTokenizer, AutoModel
	from transformers import AdamW, get_scheduler
	import numpy as np

	# We're given an n by n distance matrix D with transfer
	# probabilities for a given pair of nodes (for any feature),
	# a feature matrix M, and a dropout probability p_d.

	# We convert the transfer probabilities to no-transfer probabilities
	# and take their logs
	L = np.log(1 - D)
	# Собираем вместе все возможные знаки пунктуации
	import sys
	from unicodedata import category
	chrs = (chr(i) for i in range(sys.maxunicode + 1))
	punctuation = set(c for c in chrs if category(c).startswith("P"))
	# Дефис бывает внутри слов
	punctuation.remove('-')


	def tokenize(s, lower_case=False):
	def rm_whitespace(s):
	if s.startswith('Ġ'):
	return s[1:]
	else:
	return s


	def get_tokens_with_ranges(input_string, tokenizer):
	'''
	RoBERTa prepends 'Ġ' to the beginning of what it
	import pandas as pd
	import matplotlib.pyplot as plt

	d = pd.read_excel('spectrograms-relative-20.xlsx', header=None)
	# Combine the first two columns in a new index
	index_col = [ f'{a}-{b}' for a, b in zip(d.iloc[:,0], d.iloc[:,1]) ]
	d.index = index_col
	# Delete old index columns
	del d[0]
	del d[1]