This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i,url in enumerate(df["links"][last_index:]): | |
webbrowser.open(url) | |
start_session = time.time() | |
date = get_date() | |
df["date"][i] = date | |
att_level = float(input("Input the attention level for this | |
input (0-10):")) | |
df["attention_level"][i] = att_level | |
next_inp = input("Press enter to go to next link or q to quit | |
training for now") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def set_up_dataset(csv_file="dataset.csv",text_file='dataset.txt'): | |
""" | |
Creates a Dataframe from a text file with urls. | |
The columns are: | |
attention_level -> input a number representing how much attention the user will give to that source | |
session_time -> How long it took to finish studying that source | |
date -> The date of this session | |
session_score -> The subjective score the user gives to its own performance on that content. | |
""" | |
df = pd.read_csv(text_file, sep=" ", header=None) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i,url in enumerate(df["links"][last_index:]): # Looping over the links | |
webbrowser.open(url) # Opens each link on the default browser | |
start_session = time.time() | |
date = get_date() | |
df["date"][i] = date # Stores the date | |
att_level = float(input("Input the attention level for this input (0-10):")) # Requests a score of attention | |
df["attention_level"][i] = att_level | |
next_inp = input("Press enter to go to next link or q to quit training for now") # To go to the next link | |
if next_inp == "q": | |
df["last_index"] = i |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# source for the stopwatch: https://stackoverflow.com/questions/31995804/stopwatch-on-tkinter-creating-a-class | |
import tkinter as tk | |
from tkinter import messagebox | |
import numpy as np | |
import os | |
import natsort | |
import shutil | |
import random | |
import sys | |
import time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk import word_tokenize | |
# Loading my article in the article.txt file | |
with open("article.txt", "r", encoding="utf8") as intro: | |
text = intro.readlines() | |
# Selecting a sentence as a string | |
sentence = text[3] | |
print(word_tokenize(sentence)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.stem import PorterStemmer, LancasterStemmer | |
#Instantiating the Stemmer classes from nltk | |
porter = PorterStemmer() | |
lancaster = LancasterStemmer() | |
# Selecting a Sentence | |
sentence = text[3] | |
# Making a nice print output | |
print("Sentence:") | |
print("'" + sentence + "'") | |
print("{0:20} {1:20}".format("Ported Stemmer", "Lancaster Stemmer")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.stem import WordNetLemmatizer | |
# Instantiating the lemmatizer class from nltk | |
wordnet_lemmatizer = WordNetLemmatizer() | |
# List of verbs and adjectives to feed to the lemmatizer | |
verbs = ["thinking", "wondering", "reflecting"] | |
adjectives = ["better", "brighter", "smarter"] | |
# Making a nice print for both | |
print("Verbs") | |
print("{0:20}{1:20}".format("Word","Lemma")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.stem import WordNetLemmatizer | |
# Instantiating the lemmatizer class from nltk | |
wordnet_lemmatizer = WordNetLemmatizer() | |
# List of verbs and adjectives to feed to the lemmatizer | |
verbs = ["thinking", "wondering", "reflecting"] | |
adjectives = ["better", "brighter", "smarter"] | |
# Print them in parallel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
sp = spacy.load("en_core_web_sm") | |
sentence = text[-7] | |
sent_tokens = word_tokenize(sentence) | |
pos_tagged = nltk.pos_tag(text) | |
sen = sp(u"What matters most is using it") | |
spacy.displacy.render(sen, style='dep', jupyter=True, options={'distance': 100}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
sp = spacy.load("en_core_web_sm") | |
sentence = "Only accept truths that are absolutely evident" | |
#Tokenizing the sentence | |
sentence_token = word_tokenize(sentence) | |
# Tagging the tokens of the sentence | |
sentence_tagged = nltk.pos_tag(sentence_token) | |
print(sentence_tagged) | |
# Stablishing a pattern to give to the chunker |
OlderNewer