Synonyms and Antonyms in Emily Dickinson's Poems (Gutenberg edition)code and inspiration by Christina Wang (STAT495) |
#list.of.packages <- c("XML", "httr", "mosaic", "tidyr", "rvest", "tm", "lubridate", "stringr", "ggplot2", "wordcloud")
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)){install.packages(new.packages)}
require(XML)
require(httr)
require(mosaic)
require(tidyr)
require(rvest)
require(wordcloud)
require(tm)
require(lubridate)
require(stringr)
require(ggplot2)
location <- "gutenberg"
mycorpus <- VCorpus(DirSource(location))
mycorpus <- tm_map(mycorpus, stripWhitespace)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, content_transformer(tolower))
mycorpus <- tm_map(mycorpus, removeWords, stopwords("english"))
shinyServer(function(input, output) {
# Wordcloud
output$wordcloud <- renderPlot({
ncloud <- input$ncloud
random.order <- input$random.order
random.color <- input$random.color
wordcloud(mycorpus, max.words = ncloud, colors=brewer.pal(8, "Dark2"), scale=c(6,0.5),
random.order = random.order, random.color = random.color)
})
# Frequency
hold <- eventReactive(input$findsyn, {
word <- input$word
nthes <- input$nthes
thesaurus = "http://www.thesaurus.com/browse"
myurl <- paste(thesaurus, word, sep="/")
selector_name1 <- ".common-word"
selector_name2 <- ".text"
new <- html(myurl)
list <- html_nodes(new, selector_name1) %>%
html_nodes(selector_name2) %>%
html_text()
syn <- as.vector(list)
syn <- unique(syn)
combined <- c(word, syn)
wordmatrix <- as.data.frame(
inspect(DocumentTermMatrix(mycorpus, list(dictionary = combined)))
)
col <- ncol(wordmatrix)
sum <- summarise_each(wordmatrix, funs(sum))
sum <- rbind(colnames(wordmatrix), sum)
colnames(sum) <- c(1:col)
rownames(sum) <- c("element","count")
sum <- as.data.frame(t(sum))
sum <- mutate(sum, count=as.numeric(extract_numeric(count)))
sum0 <- sum %>%
filter(element==word) %>%
mutate(group = "orig")
sum1 <- sum[!(sum$element==word),]
sum1 <- sum1 %>%
mutate(group = "new") %>%
arrange(desc(count))
sum_combined <- rbind(sum0, sum1)
sum_combined <- sum_combined[1:nthes,]
# sum <- sum[!(sum$count==0),]
# sum <- arrange(sum, desc(count))
hist <- ggplot(sum_combined, aes(x=element, y=count, fill=group)) +
geom_bar(stat="identity") +
theme_minimal() + scale_fill_brewer(palette="Dark") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15), legend.position="none",
axis.title.x=element_blank(), axis.title.y=element_blank()) +
scale_x_discrete(limits=sum_combined$element) +
geom_text(aes(label=count), color="dark green", size=3.5)
hist
})
output$histsyn <- renderPlot({
hold()
})
}
)
#list.of.packages <- c("XML", "httr", "mosaic", "tidyr", "rvest", "tm", "lubridate", "stringr", "ggplot2", "wordcloud")
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)){install.packages(new.packages)}
require(XML)
require(httr)
require(mosaic)
require(tidyr)
require(rvest)
require(wordcloud)
require(tm)
require(lubridate)
require(stringr)
require(ggplot2)
location <- "gutenberg"
mycorpus <- VCorpus(DirSource(location))
mycorpus <- tm_map(mycorpus, stripWhitespace)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, content_transformer(tolower))
mycorpus <- tm_map(mycorpus, removeWords, stopwords("english"))
extractword <- function(link) {
selector_name1 <- ".common-word"
selector_name2 <- ".text"
new <- read_html(link)
synonym <- html_nodes(new, selector_name1) %>%
html_nodes(selector_name2) %>%
html_text()
return(synonym)
}
shinyServer(function(input, output) {
# Wordcloud
output$wordcloud <- renderPlot({
ncloud <- input$ncloud
random.order <- input$random.order
random.color <- input$random.color
wordcloud(mycorpus, max.words = ncloud, colors=brewer.pal(8, "Dark2"), scale=c(6,0.5),
random.order = random.order, random.color = random.color)
})
# Frequency
hold <- eventReactive(input$findsyn, {
word <- input$word
nthes <- input$nthes
thesaurus = "http://www.thesaurus.com/browse"
myurl <- paste(thesaurus, word, sep="/")
#xpath <- '*[contains(concat( " ", @class, " " ), concat( " ", "text", " " ))]'
xpath <- '//span[@class="text"]'
html <- html_session(myurl)
syn <-
html %>%
html_nodes(xpath = xpath) %>%
html_text()
combined <- c(word, syn)
wordmatrix <- as.data.frame(
inspect(DocumentTermMatrix(mycorpus, list(dictionary = combined)))
)
col <- ncol(wordmatrix)
sum <- summarise_each(wordmatrix, funs(sum))
sum <- rbind(colnames(wordmatrix), sum)
colnames(sum) <- c(1:col)
rownames(sum) <- c("element","count")
sum <- as.data.frame(t(sum))
sum <- mutate(sum, count=as.numeric(extract_numeric(count)))
sum0 <- sum %>%
filter(element==word) %>%
mutate(group = "orig")
sum1 <- sum[!(sum$element==word),]
sum1 <- sum1 %>%
mutate(group = "new") %>%
arrange(desc(count))
sum_combined <- rbind(sum0, sum1)
sum_combined <- sum_combined[1:nthes,]
# sum <- sum[!(sum$count==0),]
# sum <- arrange(sum, desc(count))
hist <- ggplot(sum_combined, aes(x=element, y=count, fill=group)) +
geom_bar(stat="identity") +
theme_minimal() + scale_fill_brewer(palette="Dark") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15), legend.position="none",
axis.title.x=element_blank(), axis.title.y=element_blank()) +
scale_x_discrete(limits=sum_combined$element) +
geom_text(aes(label=count), color="dark green", size=3.5)
hist
})
output$histsyn <- renderPlot({
hold()
})
}
)
library(shiny)
shinyUI(fluidPage(
# Application title
titlePanel("Synonyms and Antonyms in Emily Dickinson's Poems (Gutenberg edition)"),
p("code and inspiration by Christina Wang (STAT495)"),
tabsetPanel(
# First tab
tabPanel("Emily's Wordcloud",
sidebarLayout(
sidebarPanel(
sliderInput("ncloud", "Number of words in wordcloud", min = 10, max = 50, value = 35),
checkboxInput("random.order", "Random order?", F),
checkboxInput("random.color", "Random color?", F)
),
mainPanel(plotOutput("wordcloud"))
)
), # End of first tab
# Second tab
tabPanel("Frequency Analyzer",
sidebarLayout(
sidebarPanel(
textInput("word", "Enter a word here", "like"),
sliderInput("nthes", "How many words to display?", min = 10, max = 25, value = 15),
actionButton("findsyn", "I'm ready!")),
# Show a plot of the generated distribution
mainPanel(plotOutput("histsyn"))
)
)
)
))