Skip to content

Instantly share code, notes, and snippets.

@Ugbot
Last active May 13, 2024 10:31
Show Gist options
  • Save Ugbot/9fef8d6936739b4bab6ef254ee5f5a25 to your computer and use it in GitHub Desktop.
Save Ugbot/9fef8d6936739b4bab6ef254ee5f5a25 to your computer and use it in GitHub Desktop.
uploading from S3 into clickhouse
import json
import boto3
import csv
import io
from botocore.vendored import requests
s3Client = boto3.client('s3')
demo_query = """SELECT *
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_*.gz', 'TabSeparatedWithNames')
LIMIT 10;"""
def ch_test(my_query):
response = requests.post(
"https://lambda-go-devrel-ben.a.aivencloud.com:15544",
params={"query": "SELECT 1"},
headers={
"X-ClickHouse-Database": "system",
"X-ClickHouse-User": "avnadmin",
"X-ClickHouse-Key": "******",
"X-ClickHouse-Format": "JSONCompact",
})
print(response.text)
def ch_query(my_query, my_db):
response = requests.post(
"https://lambda-go-devrel-ben.a.aivencloud.com:15544",
params={"query":my_query},
headers={
"X-ClickHouse-Database": my_db,
"X-ClickHouse-User": "avnadmin",
"X-ClickHouse-Key": "*******",
"X-ClickHouse-Format": "JSONCompact",
})
print(response.text)
def ch_query_builder(event):
bucket_name = event['Records'][0]['s3']['bucket']['name']
file_name = event['Records'][0]['s3']['object']['key']
s3_path = f"s3://s3-region.amazonaws.com/{bucket_name}/{file_name}"
print(s3_path)
# Construct the query
query = f"""
INSERT INTO movie_plots
SELECT *
FROM s3('{s3_path}', 'JSONEachRow')
"""
return query
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
#Get our object
response = s3Client.get_object(Bucket=bucket, Key=key)
# TODO implement
ch_query()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment