-
-
Save celestehorgan/45812e0396e99a525b50b8688801675d to your computer and use it in GitHub Desktop.
uploading from S3 into clickhouse
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## You also need to create a function URL in AWS Lambda. I'm not sure what the permissions should be so I was very permissive. | |
import json | |
import boto3 | |
import csv | |
import io | |
from botocore.vendored import requests | |
s3Client = boto3.client('s3') | |
demo_query = """SELECT * | |
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_*.gz', 'TabSeparatedWithNames') | |
LIMIT 10;""" | |
def ch_test(my_query): | |
response = requests.post( | |
"https://serverless-search-demo-devrel-celeste.k.aivencloud.com:19509", | |
params={"query": "SELECT 1"}, | |
headers={ | |
"X-ClickHouse-Database": "system", | |
"X-ClickHouse-User": "avnadmin", | |
"X-ClickHouse-Key": "******", | |
"X-ClickHouse-Format": "JSONCompact", | |
}) | |
print(response.text) | |
def ch_query(my_query, my_db): | |
response = requests.post( | |
"https://serverless-search-demo-devrel-celeste.k.aivencloud.com:19509", | |
params={"query":my_query}, | |
headers={ | |
"X-ClickHouse-Database": my_db, | |
"X-ClickHouse-User": "avnadmin", | |
"X-ClickHouse-Key": "*******", | |
"X-ClickHouse-Format": "JSONCompact", | |
}) | |
print(response.text) | |
def ch_query_builder(event): | |
bucket_name = event['Records'][0]['s3']['bucket']['name'] | |
file_name = event['Records'][0]['s3']['object']['key'] | |
# you need to create an access point for the bucket. I used plain internet rather than VPC | |
s3_path = f"s3://s3-region.amazonaws.com/{bucket_name}/{file_name}" | |
print(s3_path) | |
# Construct the query | |
query = f""" | |
INSERT INTO movie_plots | |
SELECT * | |
FROM s3('{s3_path}', 'JSONEachRow') | |
""" | |
return query | |
def lambda_handler(event, context): | |
bucket = event['Records'][0]['s3']['bucket']['name'] | |
key = event['Records'][0]['s3']['object']['key'] | |
#Get our object | |
response = s3Client.get_object(Bucket=bucket, Key=key) | |
# TODO implement | |
ch_query() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment