Skip to content

Instantly share code, notes, and snippets.

@AlexMikhalev
AlexMikhalev / qa_search.py
Last active Apr 17, 2021
QA BERT pre-cashed
View qa_search.py
tokenizer = None
import numpy as np
import torch
import os
config_switch=os.getenv('DOCKER', 'local')
if config_switch=='local':
startup_nodes = [{"host": "127.0.0.1", "port": "30001"}, {"host": "127.0.0.1", "port":"30002"}, {"host":"127.0.0.1", "port":"30003"}]
else:
@AlexMikhalev
AlexMikhalev / memory.py
Created Apr 8, 2021
Check memory usage in python
View memory.py
#standard module
# peak memory usage (kilobytes on Linux, bytes on OS X)
import resource
resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
# pip install psutil
import os, psutil; print(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2)
View remap_docker_user_to_host_user.md

Problem

When I use docker to work with the shared workspace with host under Ubuntu, I find that files created by docker user is owned by root. This is not the same with macOS.

Maybe this is becuase docker is run by root user and the default user mapping mechanism is to map container-root to host-user or host-root. So can I map the container-root or container-any-user to host-current-user?

Fortunately the latest docker supports the re-map the container user to any host user via Linux namespace. Refer to this.

Linux namespace

View gist:85ec3359bfe6274d0030e38aec353913
```
redis-cli -c -p 30001 -h 127.0.0.1
127.0.0.1:30001> keys *
1) "processed_docs_stage1_para"
2) "sentence:PMC293432.xml:{06S}"
3) "sentence:PMC270701.xml:{06S}"
4) "edges_matched_{06S}"
5) "sentence:PMC222961.xml:{06S}"
6) "processed_docs_stage3{06S}"
7) "processed_docs_stage2_para{06S}"
View tokeniser_gears_summary.py
from transformers import AutoTokenizer, AutoModel
tokenizer = None
def loadTokeniser():
global tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-base",torchscript=True)
# Try RobertaTokenizerFast and BART
# tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
return tokenizer
@AlexMikhalev
AlexMikhalev / export_trace.py
Created Mar 18, 2021 — forked from lantiga/export_trace.py
🤗 Huggingface Bert on RedisAI
View export_trace.py
from transformers import BertForQuestionAnswering
import torch
bert_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
model = BertForQuestionAnswering.from_pretrained(bert_name, torchscript=True)
model.eval()
inputs = [torch.ones(1, 2, dtype=torch.int64),
torch.ones(1, 2, dtype=torch.int64),
View RedisGears5.py
bg = GearsBuilder('KeysReader')
bg.foreach(process_item)
bg.count()
bg.register('sentence:*', mode="async_local",onRegistered=OnRegisteredAutomata)
View RedisGears4.py
import httpimport
with httpimport.remote_repo(['stop_words'], "https://raw.githubusercontent.com/explosion/spaCy/master/spacy/lang/en/"):
import stop_words
from stop_words import STOP_WORDS
View RedisGears3.py
global sym_spell
if not sym_spell:
sym_spell=load_symspell()
suggestions = sym_spell.lookup_compound(sentence['value'], max_edit_distance=1,
transfer_casing=True, ignore_non_words=True)
View RedisGears2.py
"""
load symspell and relevant dictionaries
"""
sym_spell=None
def load_symspell():
import pkg_resources
from symspellpy import SymSpell, Verbosity
sym_spell = SymSpell(max_dictionary_edit_distance=1, prefix_length=7)
dictionary_path = pkg_resources.resource_filename(