This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import pyodbc | |
#Ask for user and password input | |
user = input('Provide user: \n') | |
pwd = input('Provide password: \n') | |
#Make connection to My SQL local host | |
mydb = pyodbc.connect("DRIVER={MySQL ODBC 8.0 ANSI Driver}; SERVER=localhost; PORT=3306;DATABASE=sakila; UID=%s; PASSWORD=%s;" % (user, pwd)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Loading readxl library | |
library(readxl) | |
clean_crime_data <- function(path) { | |
# Load the Data | |
crime_data <- read_xls(path) | |
# Assigning colnames | |
colnames(crime_data) <- crime_data[3,] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import sklearn vectorizers and pandas | |
import pandas as pd | |
from sklearn.feature_extraction.text import ( | |
CountVectorizer, | |
TfidfVectorizer | |
) | |
# Defining our sentence examples | |
sentence_list = [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.tokenize import word_tokenize | |
from nltk.stem import PorterStemmer, SnowballStemmer, LancasterStemmer | |
porter = PorterStemmer() | |
snowball = SnowballStemmer(language='english') | |
lanc = LancasterStemmer() | |
sentence_example = ( | |
'This is definitely a controversy as the attorney labeled the case "extremely controversial"' | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wikipedia | |
import pandas as pd | |
import numpy as np | |
import string | |
from nltk.tokenize import word_tokenize | |
from sklearn.metrics.pairwise import cosine_similarity | |
def retrieve_page(page_name: str) -> list: | |
''' | |
Retrieves page data from wikipedia |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Don't forget to download the train.csv file | |
# to make this gist work. | |
# Download it at: https://www.kaggle.com/c/titanic/data?select=train.csv | |
# You also need to install ROCR and rpart libraries | |
# Reading the titanic train dataset | |
titanic <- read.csv('./train.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training a Random Forest in R - used in blog post: | |
# https://towardsdatascience.com/data-science-tutorials-training-a-random-forest-in-r-a883cc1bacd1 | |
library(dplyr) | |
library(randomForest) | |
library(ranger) | |
library(Metrics) | |
# Load london bike csv | |
london_bike <- read.csv('./london_merged.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training an XGBoost in R - used in blog post: | |
# https://towardsdatascience.com/data-science-tutorials-training-an-xgboost-using-r-cf3c00b1425 | |
library(dplyr) | |
library(xgboost) | |
library(Metrics) | |
library(ggplot2) | |
# Load london bike csv | |
london_bike <- read.csv('./london_merged.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Getting Latitude and Longitude from Nominatim | |
from geopy.geocoders import Nominatim | |
from geopy.extra.rate_limiter import RateLimiter | |
geocoder = Nominatim(user_agent="FindAddress") | |
geocode = RateLimiter( | |
geocoder.geocode, | |
min_delay_seconds = 1, | |
return_value_on_exception = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training a decision tree in R - used in blog post: | |
# https://medium.com/codex/data-science-tutorials-training-a-decision-tree-using-r-d6266936d86 | |
library(dplyr) | |
library(rpart) | |
library(rpart.plot) | |
library(caret) | |
library(Metrics) | |
library(ggplot2) |
OlderNewer