Skip to content

Instantly share code, notes, and snippets.

View generall's full-sized avatar
📡

Andrey Vasnetsov generall

📡
View GitHub Profile
wget --load-cookies /tmp/cookies.txt\
"https://docs.google.com/uc?export=download&confirm=$(wget\
--quiet --save-cookies /tmp/cookies.txt --keep-session-cookies\
--no-check-certificate 'https://docs.google.com/uc?export=download&id=$FILE_ID'\
-O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=$FILE_ID" \
-O docs.rar && rm -rf /tmp/cookies.txt
Colombian
Iranian
British
Irish
Welsh
American
Canadian
Australian
Israeli
Greenlandic
Japan
Kingdom
Sweden
Norway
Poland
Denmark
Slovenia
Ireland
Argentina
Bolivia
@generall
generall / shrink_embeddings.ipynb
Created April 27, 2019 22:32
Shrinking Fasttext embeddings
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import numpy as np
import json
fd = open('./startups.json')
# payload is now an iterator over startup data
payload = map(json.loads, fd)
# Here we load all vectors into memory, numpy array works as iterable for itself.
# Other option would be to use Mmap, if we don't want to load all data into RAM
# File: neural_searcher.py
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
class NeuralSearcher:
def __init__(self, collection_name):
self.collection_name = collection_name
from qdrant_client.http.models import Filter
...
city_of_interest = "Berlin"
# Define a filter for cities
city_filter = Filter(**{
"must": [{
"key": "city", # We store city information in a field of the same name
# File: service.py
from fastapi import FastAPI
# That is the file where NeuralSearcher is stored
from neural_searcher import NeuralSearcher
app = FastAPI()
# Create an instance of the neural searcher
@generall
generall / cat_vectors.tsv
Last active July 3, 2021 22:10
categories_ru_config.json
We can't make this file beautiful and searchable because it's too large.
-0.836976 -0.82773167 -0.2321338 0.01756154 -0.33699885 -0.87920535 1.121577 -0.48148146 0.12505984 -0.5362323 -0.04594946 0.21799839 -0.9739923 0.06835833 -0.0905306 -0.4682897 0.21512008 -0.19464707 -0.93475235 -0.5278779 0.07396773 -0.7966168 -0.9158848 0.3736219 0.91806734 0.16620624 0.4292583 0.24501403 0.7698505 0.80631876 -0.40066087 0.06443666 -0.94286495 0.9055134 -0.32404503 0.029998884 0.69158125 0.5815438 -0.019293755 -0.67045003 -0.02590625 0.6856051 0.43317997 -0.022387378 -0.2444922 -0.63252854 0.49054447 0.4057305 0.15405568 0.42062312 0.8911598 -0.107649505 -0.1934745 -0.21087441 -0.98097706 0.7977838 -0.3478621 0.25571245 -0.59421146 -0.18338741 0.33044222 0.11104743 0.0024537258 -0.79205513 0.7596757 -0.18430562 0.2074007 0.24703579 -0.051154852 0.3755078 -0.028878666 -0.7895112 -0.2619388 0.31705666 -0.26749983 1.5598425 0.20117877 -0.8625497 -0.08463721 0.07981754 0.3533825 -0.22829238 -0.8085298 0.83868784 -1.3574781 -0.23162451 0.75115657 -0.67257035 0.20241697 0.09624748 0.11910073 -0.
@generall
generall / 10k_vector_search.py
Created May 20, 2022 21:05
Search 10k by 10k vectors fast
import asyncio
import time
from multiprocessing import Pool
import httpx
import numpy as np
from grpclib.client import Channel
from qdrant_client import QdrantClient
from qdrant_client.grpc import PointsStub, WithPayloadSelector
from qdrant_client.http.models import Distance, OptimizersConfigDiff, \