Skip to content

Instantly share code, notes, and snippets.

View leogao2's full-sized avatar

Leo Gao leogao2

View GitHub Profile
# modified from https://gist.github.com/dvschultz/3af50c40df002da3b751efab1daddf2c
def slerp(t, v0, v1):
'''
Spherical linear interpolation (batched)
Args:
t (float/np.ndarray): Float value between 0.0 and 1.0
v0 (np.ndarray): Starting vector
v1 (np.ndarray): Final vector
Returns:
v2 (np.ndarray): Interpolation vector between v0 and v1
@leogao2
leogao2 / mk_snapshot.sh
Last active March 13, 2022 03:04
A fully self contained script for making a backup on an Ubuntu system using LVM. On a typical system, only configuration you need to do is 1. change BKP_LOC to wherever you want to save your backup and 2. make a password file at ~/borg_password (and you might need to enter this password first time running the script, but it's automated thereafter)
#!/bin/bash
# change these values before use!
# where you want the backup to go
BKP_LOC=/mnt/backup/primary
# the name of the source LV
SOURCE_LVM=vgubuntu/root
# where the repo password is stored
PASSWORD_FILE=~/borg_password
import lm_eval.evaluator
import lm_eval.tasks
import lm_eval.models
import lm_eval.base
from lm_eval.base import rf
from lm_eval.metrics import mean
import json
import numpy as np
import lm_dataformat as lmd
from glob import glob
import os
import json
import collections
from tqdm import tqdm
import re
from best_download import download_file
import fasttext
@leogao2
leogao2 / repack_arxiv.py
Created October 25, 2020 01:28
Repack arxiv into lm_dataformat with the necessary filtering
import lm_dataformat as lmd
import os
import hashlib
import re
from tqdm import tqdm
def sha256str(s):
h = hashlib.sha256()
h.update(s)
return h.hexdigest()
[5490029, 13334514, 453569, 519492, 3918740, 668961, 4901974, 2960733, 19483022, 788550, 850339, 331931606, 73151963, 317457765, 12125882, 2957116, 5981150, 5076443, 3897974, 3168866, 3562501, 2359686, 2190588, 2259680, 1939043, 33940836, 7585067, 307257, 957452, 988910, 12134151, 761569, 5496196, 2694141, 3056881, 2875867, 1400782, 2111464, 2002002, 1863375, 8560496, 1312361, 2001227, 1856704, 2452760, 1763308, 1373272, 2108467, 696814, 2175566, 5588676, 2117130, 1137245, 1402563, 1482619, 885163, 1046439, 861938, 1685236, 861929, 8703875, 136918, 4131176, 125068, 5569656, 2185363, 2211943, 3318815, 2645116, 1987776, 1823516, 2086784, 3979404, 1386463, 1976802, 1878945, 5280544, 1991245, 3401307, 1699202, 533234, 1719563, 46523496, 15091626, 1777223, 1493551, 1261512, 1993835, 3327451, 1838996, 261678, 578121, 410401, 112476, 187576, 114176, 137108, 165725, 161260, 93809, 111664, 166452, 101629, 109933, 49571, 174499, 154784, 117038, 199557, 176357, 286071, 210636, 291046, 153984, 144760, 194501, 196414, 192
import os
import fasttext
import re
lid = fasttext.load_model("lid.176.bin")
def listdir(x):
return [x + '/' + fn for fn in os.listdir(x)]
def id(x):
@leogao2
leogao2 / gist:58a380b2e91cf7bc9bdce0f1d52945ee
Last active July 9, 2020 02:53
all the stuff needed to make cc_net work on ubuntu
sudo apt-get install libboost-all-dev cmake libeigen3-dev liblzma-dev unzip python3.7
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python3.7 get-pip.py
echo 'export PATH=$PATH:~/.local/bin' >> ~/.bashrc
36307962535
e 20011086704
a 14567660520
i 12838481994
o 12414752060
t 12120011826
n 11981140608
r 11551226833
s 10546012964
2551291360
e 1439259634
t 1012599011
a 981259505
o 913144815
i 866503493
n 850043038
r 761576918
s 753454243
l 514807040