forked from carlamarcolin/lia2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWebScrapVivaReal-Modificado.R
75 lines (61 loc) · 2.16 KB
/
WebScrapVivaReal-Modificado.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# References:
# http://notesofdabbler.github.io/201408_hotelReview/scrapeTripAdvisor.html
# https://github.com/hadley/rvest/blob/master/demo/tripadvisor.R
# http://notesofdabbler.github.io/201408_hotelReview/scrapeTripAdvisor.htmllibrary(rvest)
library(dplyr)
library(data.table)
library(readxl)
urllinkmain <- "https://www.vivareal.com.br/aluguel/minas-gerais/uberlandia/bairros/santa-monica/#onde=Brasil,Minas%20Gerais,Uberl%C3%A2ndia,Bairros,Santa%20M%C3%B4nica,,,,BR%3EMinas%20Gerais%3ENULL%3EUberlandia%3EBarrios%3ESanta%20Monica,,,"
openCon <- urllinkmain %>% read_html
total_busca <- openCon %>%
html_node("[class='results-summary__count js-total-records']") %>%
html_text()
total_busca <- as.numeric(gsub("\\.", "", total_busca))
pag <- trunc(total_busca / 36)
DFLink <- vector()
for (i in 2:pag){
urllinkpre=paste(strsplit(urllinkmain,"#onde")[[1]][1],"?",sep="")
urllinkpost=strsplit(urllinkmain,"#onde")[[1]][2]
pagina = paste("pagina=",i,"#onde")
pagina = gsub("\\s","",pagina)
novolink = paste(urllinkpre,pagina,urllinkpost, sep="")
DFLink[i] <- novolink
}
DFLink[1] <- urllinkmain
DFtextos <- as.data.frame(matrix(ncol = 1,nrow=1))
colnames(DFtextos) <- c("name.f")
DFtextos$name.f <- as.character(DFtextos$name.f)
f <- 1
i = 1
for (i in 1:length(DFLink)){
url <- DFLink[i]
openCon <- url %>% read_html
name <- openCon %>%
html_nodes("[class='property-card__content']") %>%
html_text()
for (f in 1:length(name)){
newRow <- data.frame(name[f])
names(newRow) <- names(DFtextos)
DFtextos <- rbind(DFtextos,newRow)
}
}
i = 1
metros2 <- vector()
quartos <- vector()
aluguel <- vector()
for (i in 2:nrow(DFtextos)){
anunc_terms <- strsplit(DFtextos[i,], " ")
metros2[i] <- anunc_terms[[1]][11]
qt <- anunc_terms[[1]][7]
quartos[i] <- ifelse(is.na(qt)|qt=="",1,qt)
indice_mes <- which(anunc_terms[[1]] %in% "/Mês")
# faz verificação para conseguir mais dados
if (length(anunc_terms[[1]]) >= indice_mes) {
aluguel[i] <- anunc_terms[[1]][indice_mes - 1]
} else {
aluguel[i] <- NA # ou algum outro valor padrão
}
# fim da verificação
}
DFunico <- data.frame(metros2,quartos,aluguel)
write.csv(DFunico, "DFunico.csv")