Skip to content

Instantly share code, notes, and snippets.

View alexcg1's full-sized avatar

Alex Cureton-Griffiths alexcg1

View GitHub Profile
@alexcg1
alexcg1 / executor.py
Created May 24, 2023 11:02
StableLM Executor
from docarray import Document, DocumentArray
from jina import Executor, requests
from transformers import AutoModelForCausalLM, AutoTokenizer
class StableLM(Executor):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.tokenizer = AutoTokenizer.from_pretrained(
'StabilityAI/stablelm-base-alpha-3b'
DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages (raised from /mnt/data/work/repos/scene-alexcg/env/lib/python3.10/site-packages/pkg_resources/__init__.py:2804)
DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.logging')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages (raised from /mnt/data/work/repos/scene-alexcg/env/lib/python3.10/site-packages/pkg_resources/__init__.py:2804)
DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`.
from docarray import BaseDoc, DocArray
from docarray.documents import TextDoc
from docarray.typing import AnyUrl
from jina import Deployment, Executor, requests
class TextChunk(TextDoc):
tags: dict = {}
@alexcg1
alexcg1 / pdf_search.py
Created August 23, 2022 12:49
PDF search - text/images/tables - start Flow
with flow:
client = Client(port=flow.port)
indexed_docs = client.post("/index", docs, request_size=1, show_progress=True, target_executor="(index_*|all_*)")
@alexcg1
alexcg1 / app.py
Created June 10, 2022 13:35
Stack overflow search: show results
print(response[0].matches.texts)
@alexcg1
alexcg1 / app.py
Created June 10, 2022 13:30
Stack overflow search: send Document to search Flow
response = client.search(search_doc)
@alexcg1
alexcg1 / app.py
Created June 10, 2022 13:27
Stack overflow: search Document
from docarray import Document
search_doc = Document(text="statistic visualization")
@alexcg1
alexcg1 / app.py
Created June 10, 2022 13:25
Stack overflow search: index DocumentArray
from jina import Client
client = Client(host="grpcs://<unique_id>.wolf.jina.ai") # Your gateway from earlier
client.index(docs, show_progress=True)
@alexcg1
alexcg1 / app.py
Last active June 10, 2022 13:25
Stack overflow search: load Documents
from docarray import DocumentArray
docs = DocumentArray.from_csv("Questions.csv", field_resolver={"Title": "text"})
@alexcg1
alexcg1 / flow.yml
Created June 10, 2022 12:56
Stack Overflow search: Flow
jtype: Flow
with:
protocol: grpc
executors:
- name: encoder
uses: jinahub+docker://SpacyTextEncoder/v0.4
uses_with:
model_name: 'en_core_web_md'
resources:
memory: 8G # encoding is hungry. add more memory