Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
paul committed Mar 15, 2017
1 parent 7bb2a0a commit 863e501
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions text_portrait.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
library(jiebaR)
library(wordcloud2)

#读取中文并去掉非中文及注音符号
readChineseWords<-function(path){
rawstring = readLines(path)
rawstring = paste0(rawstring,collapse = ' ')
s <- gsub(' ','',rawstring)
s <- gsub('\\w','',rawstring,perl = T)
s <- gsub('[[:punct:]]',' ',s)

return(s)
}

#文件路径
uri <- 'gov_report.txt'
Rt<-readChineseWords(uri)
Rt<-gsub(' ','',Rt)

#stop.txt为停用词表
words<-worker(stop_word = 'stop.txt')

#添加新词
new_user_word(words,c('大数据','n','新中国','n'))
seg<-segment(Rt,words)

#获取top30的关键字
keys<-worker('keywords',topn=30)
v<-vector_keywords(seg,keys)

#转为data.frame格式
df<-stack(v)
df$ind<-as.numeric(as.character(df$ind))

#生成词云
wordcloud2(df)

0 comments on commit 863e501

Please sign in to comment.