This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ggplot(df,aes(x=diagnosis))+geom_bar(stat="count",fill ="steelblue",width =0.6)+scale_x_discrete(labels=c("Benign","Malign"))+ | |
labs(title = "Proportion of diagnosis") + theme_gray(base_size = 19) + | |
theme(axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#FORESTS -------------------------- | |
rf=randomForest(diagnosis~.,data=mydata.train,ntree=250, mtry = 8) | |
predrf=predict(rf,newdata=mydata.valid) | |
forest_accuracy = mean(predrf==mydata.valid$diagnosis) | |
# SVM ------------------------------ | |
mysvm = svm(diagnosis~., data = mydata.train, kernel="polynomial", cost=5, degree=3) | |
pred_svm_optimal = predict(mysvm, mydata.valid) | |
accuracy_svm = mean(pred_svm_optimal==mydata.valid$diagnosis) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
result_matrix = matrix(nrow = 200, ncol = 3) | |
for (i in 1:200){ | |
set.seed(i) | |
n=nrow(df) | |
size.train=floor(n*0.50) | |
size.valid=floor(n*0.50) | |
id.train=sample(1:n,size.train,replace=FALSE) | |
id.valid=sample(setdiff(1:n,id.train),size.valid,replace=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import pandas as pd | |
import numpy as np | |
from time import sleep | |
from random import randint | |
########################### | |
# GET STARTING DATAFRAME | |
########################### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Clean size | |
df['size']=pd.to_numeric(df['size'].str.extract('(\d+)', expand=False)) | |
df['size']+=0.5 | |
#Clean location | |
df['distance_center']=pd.to_numeric(df['distance_center'].str.extract('(\d+)', expand=False)) | |
df['distance_center'] = df['distance_center'].fillna(1) | |
#Clean price | |
new=[] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
from bs4 import BeautifulSoup | |
import numpy as np | |
from time import sleep | |
from random import randint | |
from selenium import webdriver | |
########################### | |
# Loop of all the pages | |
########################### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################## | |
#First loop: getting the URLs | |
############################## | |
pages = np.arange(1, 3, 1) | |
url_collected=[] | |
for page in pages: | |
page="https://www.hostelworld.com/s?q=Barcelona,%20Catalonia,%20Spain&country=Spain&city=Barcelona&type=city&id=83&from=2020-07-03&to=2020-07-05&guests=1&page=" + str(page) | |
driver = webdriver.Chrome() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data=[] | |
for i in range(0,10): | |
url = final_list[i] | |
driver2 = webdriver.Chrome() | |
driver2.get(url) | |
sleep(randint(10,20)) | |
soup = BeautifulSoup(driver2.page_source, 'html.parser') | |
my_table2 = soup.find_all(class_=['title-2', 'rating-score body-3']) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df=pd.DataFrame() | |
df['hostel']=data[0::11] | |
df['security']=data[1::11] | |
df['location']=data[2::11] | |
df['staff']=data[3::11] | |
df['fun']=data[4::11] | |
df['cleanliness']=data[5::11] | |
df['facilities']=data[6::11] | |
df['value']=data[7::11] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dash | |
import dash_core_components as dcc | |
import dash_html_components as html | |
from dash.dependencies import Input, Output | |
################### | |
#APPLICATION DASH | |
################### | |
app = dash.Dash() |
OlderNewer