Skip to content

Instantly share code, notes, and snippets.

@Lucs1590
Created July 5, 2021 01:09
Show Gist options
  • Save Lucs1590/4e6c4659ee64afb38c3357d744ec100e to your computer and use it in GitHub Desktop.
Save Lucs1590/4e6c4659ee64afb38c3357d744ec100e to your computer and use it in GitHub Desktop.
This is a test, in which I try to put in a neo4j database, data translated from a json document (with 700,000 data).
import requests
from py2neo import Graph, Node
from time import time
import json
import re
def main():
t1 = time()
connection = connect_database()
dataset = get_data(
'https://world.openfoodfacts.org/ingredients.json')["tags"]
insert_data(connection, dataset, "Ingredient", "name")
print("Execution Time: ", time() - t1)
def connect_database():
return Graph(
host='54.173.133.27',
port=*****,
password='****'
)
def get_data(url, type_request="GET", headers={}, querystring={}):
response = requests.request(
type_request, url, headers=headers, params=querystring)
return json.loads(response.text)
def insert_data(connection, dataset, label, attribute):
for data in dataset:
translated_data, english = translate_data(data[attribute])
if translate_data != " ":
ingredient = Node(label, name=translated_data, english=english) if english == True else Node(
label, name=translated_data)
ingredient.__primarylabel__ = label
ingredient.__primarykey__ = attribute
connection.merge(ingredient)
print(translated_data)
def translate_data(data):
data = filter_data(data)
url = "https://systran-systran-platform-for-language-processing-v1.p.rapidapi.com/translation/text/translate"
querystring = {"source": "en", "target": "pt", "input": data}
headers = {
'x-rapidapi-host': "systran-systran-platform-for-language-processing-v1.p.rapidapi.com",
'x-rapidapi-key': "b2448ece4bmsh2e999bf748c5de3p1b0cb4jsn39b94190ee41"
}
translated_data = get_data(url, "GET", headers, querystring)
try:
return str(translated_data["outputs"][0]["output"]).lower(), False
except:
return data.replace("%20", " "), True
def filter_data(data):
re_validation = re.findall(
r"[a-zA-Záàâãéèêíïóôõöúçñ][^0-9]\w*", data, re.IGNORECASE)
return " ".join(re_validation).lower()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment