@app.route('/import_wp') def import_data_blog(): # Connect to Elasticsearch (locally installed) document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="blogposts") # convert files to dicts containing documents that can be indexed to our datastore dicts = convert_sql_to_dicts(True) # You can optionally supply a cleaning function that is applied to each doc (e.g. to remove footers) # It must take a str as input, and return a str. # Now, let's write the docs to our DB. document_store.write_documents(dicts) return 'Import posts done' def convert_sql_to_dicts( split_paragraphs: bool = False) -> List[dict]: documents = [] text_maker = html2text.HTML2Text() text_maker.ignore_links = True text_maker.ignore_tables = True try: cnx = mysql.connector.connect(user='root', database='haystack') cursor = cnx.cursor() query = "SELECT post_title, post_name, post_content FROM wp_posts WHERE post_status='publish'" cursor.execute(query) for (post_title, post_name, post_content) in cursor: text = text_maker.handle(post_content) if split_paragraphs: for para in text.split("\n\n"): if not para.strip(): # skip empty paragraphs continue documents.append({"content": para, "meta": {"name": post_title, "url": post_name}}) else: documents.append({"content": text, "meta": {"name": post_title, "url": post_name}}) logger.info("Fetching post ", post_name) cursor.close() except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("Something is wrong with your user name or password") elif err.errno == errorcode.ER_BAD_DB_ERROR: print("Database does not exist") else: print(err) else: cnx.close() return documents