This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
education = reviews_info %>% dplyr::select(surname, education) | |
education = filter(education, !is.na(surname)) | |
#удаляем точки"им. А.В." и "г." проф. ак. | |
#создаю, education1, потому что буду прикреплять по переменной education в будущем | |
education$education1 = gsub("\\b(им|[А-Я])\\.", "\\1 ", education$education) | |
education$education1 = gsub("\\г[.]","",education$education1) #удаляем "г." | |
# ак. проф. | |
education$education1 = gsub("\\b(проф)\\.", "\\1 ",education$education1) | |
education$education1 = gsub("\\b(ак)\\.", "\\1 ",education$education1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#приводим всё в порядок | |
library(stringr) | |
library(dplyr) | |
library(tidyr) | |
library(genderizeR) | |
library(plyr) | |
#дф со всеми именами ок | |
all_names = as.data.frame(reviews_info$surname) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#для начала надо собрать вектор со всеми линками, откуда будуть выгружаться данные | |
#создадим дф только с линками на отзыв | |
comment_links = as.data.frame(reviews_info$r_link) | |
#как выяснилось есть линки, который начинаются с kamchatka.info или другиз регионов, поэтому мы их удаляем | |
comment_links$true = str_detect(comment_links$`reviews_info$r_link`, "http://spb.repetitors.info/") | |
comment_links = comment_links %>% filter(true == "TRUE") | |
#на странице с детальным описанием репетитора есть строчка "показано [число] из", если такой строки не были, я её позже убирала |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(plotly) | |
library(rvest) | |
library(stringr) | |
xtract_links <- function(node) { | |
css <- '.pnmst a' | |
link <- html_nodes(node, css) %>% html_attr('href') | |
link <- paste("http://spb.repetitors.info", link, sep = "") | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(plotly) | |
library(rvest) | |
library(stringr) | |
xtract_links <- function(node) { | |
css <- '.pnmst a' | |
link <- html_nodes(node, css) %>% html_attr('href') | |
link <- paste("http://spb.repetitors.info", link, sep = "") | |
} |