Created
July 8, 2020 16:24
-
-
Save EnkrateiaLucca/26a606152c41c71db22e6286f3b2cf1b to your computer and use it in GitHub Desktop.
Setting up the dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def set_up_dataset(csv_file="dataset.csv",text_file='dataset.txt'): | |
""" | |
Creates a Dataframe from a text file with urls. | |
The columns are: | |
attention_level -> input a number representing how much attention the user will give to that source | |
session_time -> How long it took to finish studying that source | |
date -> The date of this session | |
session_score -> The subjective score the user gives to its own performance on that content. | |
""" | |
df = pd.read_csv(text_file, sep=" ", header=None) | |
df.columns= ["links"] | |
cols = ["attention_level", "session_time","date","session_score"] | |
for col in cols: | |
df[col] = None | |
df["last_index"] = 0 | |
df.to_csv(csv_file) | |
os.remove(text_file) | |
return df | |
text_file = "./dataset.txt" # This is the .txt file with the relevant links (should be adapted to each user). | |
if os.path.isfile(text_file): # Checking if the txt files exists | |
df = set_up_dataset() # If there is a text file it will set up the new dataset as a .csv file | |
if not os.path.isdir(".\\sessions"): | |
os.mkdir(".\\sessions") # Creates a directory to store the data for each session | |
try: | |
df = pd.read_csv("dataset.csv") | |
except: | |
print("Make sure the csv is in the right folder") | |
last_index = df["last_index"][0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment