Skip to content

Instantly share code, notes, and snippets.

@celestehorgan
Forked from Ugbot/loader.py
Last active May 13, 2024 11:26
Show Gist options
  • Save celestehorgan/45812e0396e99a525b50b8688801675d to your computer and use it in GitHub Desktop.
Save celestehorgan/45812e0396e99a525b50b8688801675d to your computer and use it in GitHub Desktop.
uploading from S3 into clickhouse
## You also need to create a function URL in AWS Lambda. I'm not sure what the permissions should be so I was very permissive.
import json
import boto3
import csv
import io
from botocore.vendored import requests
s3Client = boto3.client('s3')
demo_query = """SELECT *
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_*.gz', 'TabSeparatedWithNames')
LIMIT 10;"""
def ch_test(my_query):
response = requests.post(
"https://serverless-search-demo-devrel-celeste.k.aivencloud.com:19509",
params={"query": "SELECT 1"},
headers={
"X-ClickHouse-Database": "system",
"X-ClickHouse-User": "avnadmin",
"X-ClickHouse-Key": "******",
"X-ClickHouse-Format": "JSONCompact",
})
print(response.text)
def ch_query(my_query, my_db):
response = requests.post(
"https://serverless-search-demo-devrel-celeste.k.aivencloud.com:19509",
params={"query":my_query},
headers={
"X-ClickHouse-Database": my_db,
"X-ClickHouse-User": "avnadmin",
"X-ClickHouse-Key": "*******",
"X-ClickHouse-Format": "JSONCompact",
})
print(response.text)
def ch_query_builder(event):
bucket_name = event['Records'][0]['s3']['bucket']['name']
file_name = event['Records'][0]['s3']['object']['key']
# you need to create an access point for the bucket. I used plain internet rather than VPC
s3_path = f"s3://s3-region.amazonaws.com/{bucket_name}/{file_name}"
print(s3_path)
# Construct the query
query = f"""
INSERT INTO movie_plots
SELECT *
FROM s3('{s3_path}', 'JSONEachRow')
"""
return query
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
#Get our object
response = s3Client.get_object(Bucket=bucket, Key=key)
# TODO implement
ch_query()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment