Skip to content

Instantly share code, notes, and snippets.

@EnkrateiaLucca
Created July 8, 2020 16:24
Show Gist options
  • Save EnkrateiaLucca/26a606152c41c71db22e6286f3b2cf1b to your computer and use it in GitHub Desktop.
Save EnkrateiaLucca/26a606152c41c71db22e6286f3b2cf1b to your computer and use it in GitHub Desktop.
Setting up the dataset
def set_up_dataset(csv_file="dataset.csv",text_file='dataset.txt'):
"""
Creates a Dataframe from a text file with urls.
The columns are:
attention_level -> input a number representing how much attention the user will give to that source
session_time -> How long it took to finish studying that source
date -> The date of this session
session_score -> The subjective score the user gives to its own performance on that content.
"""
df = pd.read_csv(text_file, sep=" ", header=None)
df.columns= ["links"]
cols = ["attention_level", "session_time","date","session_score"]
for col in cols:
df[col] = None
df["last_index"] = 0
df.to_csv(csv_file)
os.remove(text_file)
return df
text_file = "./dataset.txt" # This is the .txt file with the relevant links (should be adapted to each user).
if os.path.isfile(text_file): # Checking if the txt files exists
df = set_up_dataset() # If there is a text file it will set up the new dataset as a .csv file
if not os.path.isdir(".\\sessions"):
os.mkdir(".\\sessions") # Creates a directory to store the data for each session
try:
df = pd.read_csv("dataset.csv")
except:
print("Make sure the csv is in the right folder")
last_index = df["last_index"][0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment