ShaikeA/articles_insert.py

## articles_insert.py
try:
    # Instert all articles into the articles table.
    # Note that we use INSERT IGNORE which means that duplicates will not be inserted to DB (checked against doi_link).
    for key, item in all_articles.items():
        cursor.execute("""INSERT IGNORE INTO articles (doi_link, title, abstract, publication_date, citations)
                        VALUES ('{}', '{}', '{}', '{}', {});""".format(key, item[0], item[1], item[2], int(item[3])))

    # The for loop above can be replaced with:
    # cur.executemany("""INSERT IGNORE INTO articles (doi_link, title, abstract, publication_date, citations)
    #                   VALUES ('{}', '{}', '{}', '{}', {});""".format(key, item[0], item[1], item[2], int(item[3])),
    #                   [--tuples of the data to be updated--])

# Catch any error that might occur and logging it into the logs file.
except (KeyError, IndexError, TypeError) as err:
    logger.error("There was an error during articles insertion. The error: {}".format(err))

    # Save the problematic articles to csv file for future inspection
    df = pd.DataFrame(list(all_articles.values()), index=list(all_articles.keys()))
    df.to_csv("Articles.csv", sep=',')

    # This is for debugging.
    # steps is a dict whose keys are the steps in the scraping processand its values are 1 or 0 (succeeded or failed)
    steps['Articles'] = 0
	try:
	# Instert all articles into the articles table.
	# Note that we use INSERT IGNORE which means that duplicates will not be inserted to DB (checked against doi_link).
	for key, item in all_articles.items():
	cursor.execute("""INSERT IGNORE INTO articles (doi_link, title, abstract, publication_date, citations)
	VALUES ('{}', '{}', '{}', '{}', {});""".format(key, item[0], item[1], item[2], int(item[3])))

	# The for loop above can be replaced with:
	# cur.executemany("""INSERT IGNORE INTO articles (doi_link, title, abstract, publication_date, citations)
	# VALUES ('{}', '{}', '{}', '{}', {});""".format(key, item[0], item[1], item[2], int(item[3])),
	# [--tuples of the data to be updated--])

	# Catch any error that might occur and logging it into the logs file.
	except (KeyError, IndexError, TypeError) as err:
	logger.error("There was an error during articles insertion. The error: {}".format(err))

	# Save the problematic articles to csv file for future inspection
	df = pd.DataFrame(list(all_articles.values()), index=list(all_articles.keys()))
	df.to_csv("Articles.csv", sep=',')

	# This is for debugging.
	# steps is a dict whose keys are the steps in the scraping processand its values are 1 or 0 (succeeded or failed)
	steps['Articles'] = 0