Created
November 7, 2021 15:54
-
-
Save marksparrish/bb33cdc4b6e46da6b4183f24ad023850 to your computer and use it in GitHub Desktop.
Laravel Elasticsearch Python Index Importer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import pandas as pd | |
from es_pandas import es_pandas | |
import numpy as np | |
def gather(sql): | |
print("Getting...") | |
data_url = 'mysql+mysqldb://sail:password@127.0.0.1:3306/sales' | |
print(sql) | |
df = pd.read_sql_query(sql=sql,con=data_url) | |
return df | |
def provide(df): | |
print("Indexing...") | |
ep = es_pandas('127.0.0.1:9200') | |
df = df.set_index('id') | |
df['tags'] = df.tags.apply(eval).apply(np.array) | |
ep.to_es(df, 'products', use_index=True, thread_count=4, chunk_size=500, timeout="60s") | |
return df | |
def main(): | |
sql = "SELECT id, name, price, GROUP_CONCAT(tag_id SEPARATOR ',') as tags from products, product_tag where product_tag.product_id = products.id GROUP BY 1,2,3" | |
df = gather(sql) | |
df = provide(df) | |
return len(df) | |
if __name__ == '__main__': | |
executionTime = time.time() | |
startTime = time.time() | |
rows_processed = main() | |
executionTime = (time.time() - startTime) / 60 | |
print('Total Execution time in minutes: ' + str(executionTime)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment