Skip to content

Instantly share code, notes, and snippets.

@nikkisharma536
Created August 18, 2019 04:50
Show Gist options
  • Save nikkisharma536/5369a4c12f06dfb7d1b9bfbf6acfb78e to your computer and use it in GitHub Desktop.
Save nikkisharma536/5369a4c12f06dfb7d1b9bfbf6acfb78e to your computer and use it in GitHub Desktop.
def read_data():
data = pd.read_csv(
"/Users/nikki/work/code/knowledge_graph/data_extraction/data/survey_results_public.csv",
low_memory=False)
print("Column name of data : ", data.columns)
return data
def process_user_data(data):
user_data = data[['Respondent','Hobby', 'OpenSource', 'Student', 'Employment', 'CompanySize', 'YearsCoding']]
user_data = user_data.dropna()
# Convert data frame to list of dictionaries
# Neo4j UNWIND query expects a list of dictionaries
# for bulk insertion
user_data = list(user_data.T.to_dict().values())
print(user_data)
query = """
UNWIND {rows} AS row
MERGE (person:Person {uid:row.Respondent})
ON CREATE SET
person.codes_as_hobby = row.Hobby,
person.contributes_to_open_source = row.OpenSource,
person.is_student = row.Student,
person.employment_status = row.Employment,
person.company_size = row.CompanySize,
person.total_years_of_coding_experience = row.YearsCoding
"""
run_neo_query(user_data,query)
def run_neo_query(data, query):
batches = get_batches(data)
for index, batch in batches:
print('[Batch: %s] Will add %s node to Graph' % (index, len(batch)))
graph.run(query, rows=batch)
def get_batches(lst, batch_size=100):
return [(i, lst[i:i + batch_size]) for i in range(0, len(lst), batch_size)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment