Skip to content

Instantly share code, notes, and snippets.

@cevaris
Created August 27, 2021 16:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cevaris/5b228bd4997c631d981ffd54646a0b00 to your computer and use it in GitHub Desktop.
Save cevaris/5b228bd4997c631d981ffd54646a0b00 to your computer and use it in GitHub Desktop.
DRAFT: B2B records dump w/ survey transformation
#!/usr/bin/env python3
import json
import os
import sys
class UserRecord(object):
user_id: str
first_name: str
last_name: str
email: str
answers: dict
def main():
question_path = sys.argv[1]
question_json = readLines(question_path, skip_header=False)
questions = json.loads(question_json)
questions_title_dict = {
e['name']: e['title']
for e in questions['pages'][0]['elements']
}
# hack for including "hasOther" comment values
questions_title_dict['question16-Comment'] = F"Comment: {questions_title_dict['question16']}"
questions_title_dict['question17-Comment'] = F"Comment: {questions_title_dict['question17']}"
answers_path = sys.argv[2]
answers_tsv = readLines(answers_path, skip_header=True)
answer_lines = answers_tsv.split("\n")
user_records: list[UserRecord] = []
for line in answer_lines:
line = line.strip() # clean lines
if len(line) == 0: # ignore any empty lines
continue
columns = line.split("\t")
user_record = UserRecord()
user_record.user_id = columns[0]
user_record.first_name = columns[1]
user_record.last_name = columns[2]
user_record.email = columns[3]
user_record.answers = json.loads(columns[4])
user_records.append(user_record)
csv_user_rows = []
for user_record in user_records:
print(F"{user_record.user_id}\n")
csv_user_row = []
csv_user_row.append(user_record.user_id)
csv_user_row.append(user_record.first_name)
csv_user_row.append(user_record.last_name)
csv_user_row.append(user_record.email)
for question_i in range(1, 20):
question_name = F"question{question_i}"
question_comment_name = F"{question_name}-Comment"
answer: str = user_record.answers.get(
question_comment_name) or user_record.answers.get(question_name) or ''
# if user_record.answers.get(question_comment_name):
# answer = user_record.answers.get(question_comment_name) or ''
# # print(question_comment_name, answer)
# else:
# answer = user_record.answers.get(question_name) or ''
# # print(question_name, answer)
csv_user_row.append(answer)
csv_user_rows.append(csv_user_row)
print(csv_user_row, "\n")
# found_question = list(filter(
# lambda q: question_name == q or F"{question_name}-Comment" == q, user_record.answers.keys()
# ))
# print(question_name, found_question)
# print(question_name, ques)
def readLines(path: str, skip_header: bool):
with open(path) as reader:
if skip_header:
reader.readline()
return "\n".join(reader.readlines())
if __name__ == "__main__":
if len(sys.argv) != 3:
sys.exit(
F"invalid usage:\n ./{os.path.basename(__file__)} <question.json> <answers.tsv>")
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment