Skip to content

Instantly share code, notes, and snippets.

@orcaman
orcaman / wikipedia_learner.py
Last active May 9, 2023 16:23
LangChain Retrieval Question/Answering
import os
import sys
from langchain.text_splitter import CharacterTextSplitter
from langchain.utilities import WikipediaAPIWrapper
import dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain import OpenAI
from langchain.chains import RetrievalQA
import langchain
@orcaman
orcaman / bq_size.py
Created October 27, 2021 12:46
Compute BigQuery Project Size in Bytes
def query_size(client, dataset_id: str) -> float:
query_job = client.query(f'SELECT sum(size_bytes) as size FROM {dataset_id}.__TABLES__')
results = query_job.result()
for row in results:
print(f'adding to size: {row.size}')
if row.size is None:
return 0
return float(row.size)
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_PROC) as executor:
for entity_to_load in entities_to_load:
data_list = entities_to_load[entity_to_load]
batches = chunks(data_list, 25)
for batch in batches:
futures.append(executor.submit(dal.dynamo.load_batch, entity_to_load, batch))
for future in concurrent.futures.as_completed(futures):
results.append(future.result())
def chunks(lst, n):
@orcaman
orcaman / get_composite_keys_no_cache.py
Created February 18, 2021 04:38
get_composite_keys_no_cache.py
def get_composite_keys(table_name: str, with_env=True) -> list:
composite_keys = list()
desc = dynamodb_client.describe_table(TableName=table_name)
key_schema = desc['Table']['KeySchema']
for k in key_schema:
if '#' in k['AttributeName']:
composite_keys.append(k['AttributeName'])
return composite_keys
@orcaman
orcaman / get_composite_keys.py
Created February 18, 2021 04:33
get_composite_keys
def get_composite_keys(table_name: str) -> list:
if table_name in table_composite_keys:
return table_composite_keys[table_name]
composite_keys = list()
desc = dynamodb_client.describe_table(TableName=table_name)
key_schema = desc['Table']['KeySchema']
for k in key_schema:
if '#' in k['AttributeName']:
composite_keys.append(k['AttributeName'])
@orcaman
orcaman / aws_docs_dynamodb_create.py
Created February 18, 2021 04:23
DynamoDB Create From AWS Docs
from decimal import Decimal
import json
import boto3
def load_movies(movies, dynamodb=None):
if not dynamodb:
dynamodb = boto3.resource('dynamodb', endpoint_url="http://localhost:8000")
table = dynamodb.Table('Movies')
@orcaman
orcaman / Dockerfile
Created January 5, 2021 05:12
Dockerfile
FROM public.ecr.aws/lambda/python:3.8
## install JRE - needed for PDF parser
RUN yum install -y \
java-1.8.0-openjdk \
java-1.8.0-openjdk-devel
ENV JAVA_HOME /etc/alternatives/jre
## copy app
WORKDIR ${LAMBDA_TASK_ROOT}
import tabula
from IPython.display import display
# Read remote pdf into list of DataFrame
df_list = tabula.read_pdf('https://garthtarr.com/data/WARN-Report-for-7-1-2016-to-10-25-2016.pdf', pages='all')
for df in df_list:
display(df)
@orcaman
orcaman / gather.go
Created March 8, 2020 08:14
CloudFunction to gather multiple GET requests
package p
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"strings"
"sync"
{
"reviews": [
{
"author_name": "****",
"author_url": "https://www.google.com/maps/contrib/****/reviews",
"language": "en",
"profile_photo_url": "https://lh6.ggpht.com/-******/AAAAAAAAAAI/AAAAAAAAAAA/YiZVkgB0bOI/s128-c0x00000000-cc-rp-mo/photo.jpg",
"rating": 1,
"relative_time_description": "3 months ago",
"text": "Haha. I can’t even believe you guys are sending this. I’m not even really sure where to start. But, I’ve lived in apartments all over the country. Probably 10 in total. This apartment complex was the worst by a mile. Everything about it was bad. The staff, the grounds, the units, everything. I wouldn’t come back and live her if you guys payed me to live here.",