gravityfargo/main.py

## main.py
# Import required modules
import os
import random

# Define paths for the train, test and firstlast data files
ratio = "50-50"
trainFile = "/mnt/Storage/VMShare/" + ratio + "/train.dat"
testFile = "/mnt/Storage/VMShare/" + ratio + "/test.dat"
firstLastFile = "/mnt/Storage/VMShare/" + ratio + "/firstlast.dat"

def main():
    # Start a loop that continues indefinitely
    while True:
        # Display files in the current working directory
        print("Files in Working Directory:")
        print(listFiles())

        # Prompt the user to select a file from the current working directory
        selectedFile = ""
        while selectedFile not in listFiles():
            selectedFile = input("Select the file containing the training data: ")

        # Open the selected file and read its contents
        contents = openFile("data.csv")
        lenOG = len(contents)

        # Display the dimensions of the data
        print("The file contains " + str(len(contents)) + " rows and " + str(len(contents[1])) + " columns.")

        # Ask the user if they want to remove the leading and trailing 5% of data
        presplit = ""
        while presplit not in ['y', 'n']:
            presplit = input("Would you like to remove the leading and trailing 5% of data? (y/n)\n")
        if presplit == 'y':
            # If yes, remove the leading and trailing 5% of data
            contents = firstLastRemove(contents)
        else:
            # If no, continue with the original data
            continue

        # Ask the user to specify the proportion of data to be used for training
        percentage = input("How much of the data should be used for training? ex. 30 = 30%\n")
        splitData(percentage, contents)

        # Perform sanity check to ensure no rows were lost in the processing
        lenFirstLast = str(len(openFile(firstLastFile)))
        lenTrain = str(len(openFile(trainFile)))
        lenTest = str(len(openFile(testFile)))
        lenTotal = int(lenFirstLast) + int(lenTrain) + int(lenTest)

        # Display the number of rows in each file and the total
        print(str(lenOG) + " rows in the file og file.")
        print(str(lenFirstLast) + " rows in firstlast.dat")
        print(str(lenTrain) + " rows in train.dat")
        print(str(lenTest) + " rows in test.dat")
        print(str(lenTotal) + " rows total after processing")

        # Check if the total number of rows matches the original number
        if lenTotal == lenOG:
            # If they match, indicate that no lines were lost and exit
            print("No lines lost.")
            exit()
        else:
            # If they do not match, indicate that lines were lost and exit
            print("Lines were lost")
            exit()

# This function returns the list of files in the current directory
def listFiles():
    itemsindir = []
    for path in os.listdir('.'):
        itemsindir.append(path)
    return itemsindir

# This function opens a file and reads its contents into a list
def openFile(filename):
    contentsList = []
    lineitemsList = []
    try:
        file = open(filename, 'r')
        while True:
            line = file.readline()
            if not line:
                break
            lineitemsList = line.split()
            contentsList.append(lineitemsList)
            lineitemsList.clear
        return contentsList
    except:
        print("Invalid File.")
        exit()

# This function removes the leading and trailing 5% of rows from the data
def firstLastRemove(contents):
    totallines = len(contents)
    numberofdata = round(0.05*totallines)
    print(str(numberofdata) + " rows will be removed from the top and bottom.")
    leading = contents[:numberofdata]
    ending = contents[(totallines-numberofdata):]
    remaining = contents[numberofdata:(totallines-numberofdata)]
    firstLastData = leading + ending

    firstLast = open(firstLastFile, "w")
    for row in firstLastData:
        if len(row) > 0:
            rowstr = ' '.join([str(elem) for elem in row])
            firstLast.write(rowstr)
            firstLast.write('\n')
    firstLast.close()

    return remaining

# This function splits the data into training and testing sets based on the specified percentage
def splitData(percentage, contents):
    rowstr = ""
    totallines = len(contents)
    numberofdata = round((totallines/100)*float(percentage))
    random.shuffle(contents)
    traindata = contents[:numberofdata]
    testdata = contents[numberofdata:]

    trainfile = open(trainFile, "w")
    for row in traindata:
        if len(row) > 0:
            rowstr = ' '.join([str(elem) for elem in row])
            trainfile.write(rowstr)
            trainfile.write('\n')
    trainfile.close()

    testfile = open(testFile, "w")
    for row in testdata:
        if len(row) > 0:
            rowstr = ' '.join([str(elem) for elem in row])
            testfile.write(rowstr)
            testfile.write('\n')
    testfile.close()
    print("The remaining data has been shuffled and split into train.dat and test.dat\n\n")

# Execute the main function
if __name__ == "__main__":
    main()
	# Import required modules
	import os
	import random

	# Define paths for the train, test and firstlast data files
	ratio = "50-50"
	trainFile = "/mnt/Storage/VMShare/" + ratio + "/train.dat"
	testFile = "/mnt/Storage/VMShare/" + ratio + "/test.dat"
	firstLastFile = "/mnt/Storage/VMShare/" + ratio + "/firstlast.dat"

	def main():
	# Start a loop that continues indefinitely
	while True:
	# Display files in the current working directory
	print("Files in Working Directory:")
	print(listFiles())

	# Prompt the user to select a file from the current working directory
	selectedFile = ""
	while selectedFile not in listFiles():
	selectedFile = input("Select the file containing the training data: ")

	# Open the selected file and read its contents
	contents = openFile("data.csv")
	lenOG = len(contents)

	# Display the dimensions of the data
	print("The file contains " + str(len(contents)) + " rows and " + str(len(contents[1])) + " columns.")

	# Ask the user if they want to remove the leading and trailing 5% of data
	presplit = ""
	while presplit not in ['y', 'n']:
	presplit = input("Would you like to remove the leading and trailing 5% of data? (y/n)\n")
	if presplit == 'y':
	# If yes, remove the leading and trailing 5% of data
	contents = firstLastRemove(contents)
	else:
	# If no, continue with the original data
	continue

	# Ask the user to specify the proportion of data to be used for training
	percentage = input("How much of the data should be used for training? ex. 30 = 30%\n")
	splitData(percentage, contents)

	# Perform sanity check to ensure no rows were lost in the processing
	lenFirstLast = str(len(openFile(firstLastFile)))
	lenTrain = str(len(openFile(trainFile)))
	lenTest = str(len(openFile(testFile)))
	lenTotal = int(lenFirstLast) + int(lenTrain) + int(lenTest)

	# Display the number of rows in each file and the total
	print(str(lenOG) + " rows in the file og file.")
	print(str(lenFirstLast) + " rows in firstlast.dat")
	print(str(lenTrain) + " rows in train.dat")
	print(str(lenTest) + " rows in test.dat")
	print(str(lenTotal) + " rows total after processing")

	# Check if the total number of rows matches the original number
	if lenTotal == lenOG:
	# If they match, indicate that no lines were lost and exit
	print("No lines lost.")
	exit()
	else:
	# If they do not match, indicate that lines were lost and exit
	print("Lines were lost")
	exit()

	# This function returns the list of files in the current directory
	def listFiles():
	itemsindir = []
	for path in os.listdir('.'):
	itemsindir.append(path)
	return itemsindir

	# This function opens a file and reads its contents into a list
	def openFile(filename):
	contentsList = []
	lineitemsList = []
	try:
	file = open(filename, 'r')
	while True:
	line = file.readline()
	if not line:
	break
	lineitemsList = line.split()
	contentsList.append(lineitemsList)
	lineitemsList.clear
	return contentsList
	except:
	print("Invalid File.")
	exit()

	# This function removes the leading and trailing 5% of rows from the data
	def firstLastRemove(contents):
	totallines = len(contents)
	numberofdata = round(0.05*totallines)
	print(str(numberofdata) + " rows will be removed from the top and bottom.")
	leading = contents[:numberofdata]
	ending = contents[(totallines-numberofdata):]
	remaining = contents[numberofdata:(totallines-numberofdata)]
	firstLastData = leading + ending

	firstLast = open(firstLastFile, "w")
	for row in firstLastData:
	if len(row) > 0:
	rowstr = ' '.join([str(elem) for elem in row])
	firstLast.write(rowstr)
	firstLast.write('\n')
	firstLast.close()

	return remaining

	# This function splits the data into training and testing sets based on the specified percentage
	def splitData(percentage, contents):
	rowstr = ""
	totallines = len(contents)
	numberofdata = round((totallines/100)*float(percentage))
	random.shuffle(contents)
	traindata = contents[:numberofdata]
	testdata = contents[numberofdata:]

	trainfile = open(trainFile, "w")
	for row in traindata:
	if len(row) > 0:
	rowstr = ' '.join([str(elem) for elem in row])
	trainfile.write(rowstr)
	trainfile.write('\n')
	trainfile.close()

	testfile = open(testFile, "w")
	for row in testdata:
	if len(row) > 0:
	rowstr = ' '.join([str(elem) for elem in row])
	testfile.write(rowstr)
	testfile.write('\n')
	testfile.close()
	print("The remaining data has been shuffled and split into train.dat and test.dat\n\n")

	# Execute the main function
	if __name__ == "__main__":
	main()