Skip to content

Instantly share code, notes, and snippets.

@marksparrish
Created November 7, 2021 15:54
Show Gist options
  • Save marksparrish/bb33cdc4b6e46da6b4183f24ad023850 to your computer and use it in GitHub Desktop.
Save marksparrish/bb33cdc4b6e46da6b4183f24ad023850 to your computer and use it in GitHub Desktop.
Laravel Elasticsearch Python Index Importer
import time
import pandas as pd
from es_pandas import es_pandas
import numpy as np
def gather(sql):
print("Getting...")
data_url = 'mysql+mysqldb://sail:password@127.0.0.1:3306/sales'
print(sql)
df = pd.read_sql_query(sql=sql,con=data_url)
return df
def provide(df):
print("Indexing...")
ep = es_pandas('127.0.0.1:9200')
df = df.set_index('id')
df['tags'] = df.tags.apply(eval).apply(np.array)
ep.to_es(df, 'products', use_index=True, thread_count=4, chunk_size=500, timeout="60s")
return df
def main():
sql = "SELECT id, name, price, GROUP_CONCAT(tag_id SEPARATOR ',') as tags from products, product_tag where product_tag.product_id = products.id GROUP BY 1,2,3"
df = gather(sql)
df = provide(df)
return len(df)
if __name__ == '__main__':
executionTime = time.time()
startTime = time.time()
rows_processed = main()
executionTime = (time.time() - startTime) / 60
print('Total Execution time in minutes: ' + str(executionTime))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment