Skip to content

Instantly share code, notes, and snippets.

@amirih
Created September 3, 2023 16:43
Show Gist options
  • Save amirih/8cc4c1d66040c5963c3f4eb3a489aff0 to your computer and use it in GitHub Desktop.
Save amirih/8cc4c1d66040c5963c3f4eb3a489aff0 to your computer and use it in GitHub Desktop.
A source code to be reviewed as a part of BMI 500 Homework
# Originated in a private repository: code/geolife-train-test.py
# Author: Hossein Amiri (haenter)
# All right is reserved to the author
import pandas as pandas
import utils.utils as utils
import utils.files as files
import utils.geolife.geo_data as geo_data
number_of_needles = 20
train_percentage = 0.9
minimum_needle_records = 100
dataFrame = geo_data.get_dataFrame('origin-no-needle.tsv')
dataFrame = geo_data.get_filteredDataFrame(dataFrame, minimum_needle_records)
eligible_agents = geo_data.get_agentsRecord(dataFrame)
total_agents = len(eligible_agents)
trainDataFrame = geo_data.get_trainDataFrame(dataFrame, train_percentage)
testDataFrame = geo_data.get_testDataFrame(dataFrame, trainDataFrame)
selected_agents = eligible_agents.head(number_of_needles+1)
selected_agents = selected_agents.to_dict()
replaced_agent_ids = list(selected_agents.keys())[0:number_of_needles]
needle_ids = list(selected_agents.keys())[1:number_of_needles+1]
swap_agents = (replaced_agent_ids, needle_ids)
testDataFrame = geo_data.add_needle(testDataFrame, swap_agents)
train_file_name = f'train-{number_of_needles}-needles-{total_agents}-agents-{train_percentage}-normal-portion.tsv'
test_file_name = f'test-{number_of_needles}-needles-{total_agents}-agents-{train_percentage}-normal-portion.tsv'
files.save_dataFrame(trainDataFrame, train_file_name)
files.save_dataFrame(testDataFrame, test_file_name)
files.log(f'number of needles: {number_of_needles}')
files.log(f'total agents: {total_agents}')
files.log(f'normal data for each needle: {train_percentage*100}%')
files.log(
f'file name pattern: *-numberOfNeedles-needles-totalAgents-agents-trainPercentage-normal-portion.tsv')
files.log(
f'file name pattern: *-{number_of_needles}-needles-{total_agents}-agents-{train_percentage}-normal-portion.tsv')
files.log(f'Needles: {needle_ids}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment