Skip to content

Instantly share code, notes, and snippets.

View francoisstamant's full-sized avatar

François St-Amant francoisstamant

View GitHub Profile
ggplot(df,aes(x=diagnosis))+geom_bar(stat="count",fill ="steelblue",width =0.6)+scale_x_discrete(labels=c("Benign","Malign"))+
labs(title = "Proportion of diagnosis") + theme_gray(base_size = 19) +
theme(axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold"))
#FORESTS --------------------------
rf=randomForest(diagnosis~.,data=mydata.train,ntree=250, mtry = 8)
predrf=predict(rf,newdata=mydata.valid)
forest_accuracy = mean(predrf==mydata.valid$diagnosis)
# SVM ------------------------------
mysvm = svm(diagnosis~., data = mydata.train, kernel="polynomial", cost=5, degree=3)
pred_svm_optimal = predict(mysvm, mydata.valid)
accuracy_svm = mean(pred_svm_optimal==mydata.valid$diagnosis)
result_matrix = matrix(nrow = 200, ncol = 3)
for (i in 1:200){
set.seed(i)
n=nrow(df)
size.train=floor(n*0.50)
size.valid=floor(n*0.50)
id.train=sample(1:n,size.train,replace=FALSE)
id.valid=sample(setdiff(1:n,id.train),size.valid,replace=FALSE)
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from time import sleep
from random import randint
###########################
# GET STARTING DATAFRAME
###########################
#Clean size
df['size']=pd.to_numeric(df['size'].str.extract('(\d+)', expand=False))
df['size']+=0.5
#Clean location
df['distance_center']=pd.to_numeric(df['distance_center'].str.extract('(\d+)', expand=False))
df['distance_center'] = df['distance_center'].fillna(1)
#Clean price
new=[]
# import libraries
from bs4 import BeautifulSoup
import numpy as np
from time import sleep
from random import randint
from selenium import webdriver
###########################
# Loop of all the pages
###########################
##############################
#First loop: getting the URLs
##############################
pages = np.arange(1, 3, 1)
url_collected=[]
for page in pages:
page="https://www.hostelworld.com/s?q=Barcelona,%20Catalonia,%20Spain&country=Spain&city=Barcelona&type=city&id=83&from=2020-07-03&to=2020-07-05&guests=1&page=" + str(page)
driver = webdriver.Chrome()
data=[]
for i in range(0,10):
url = final_list[i]
driver2 = webdriver.Chrome()
driver2.get(url)
sleep(randint(10,20))
soup = BeautifulSoup(driver2.page_source, 'html.parser')
my_table2 = soup.find_all(class_=['title-2', 'rating-score body-3'])
df=pd.DataFrame()
df['hostel']=data[0::11]
df['security']=data[1::11]
df['location']=data[2::11]
df['staff']=data[3::11]
df['fun']=data[4::11]
df['cleanliness']=data[5::11]
df['facilities']=data[6::11]
df['value']=data[7::11]
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
###################
#APPLICATION DASH
###################
app = dash.Dash()