Synonyms and Antonyms in Emily Dickinson's Poems (Gutenberg edition)

code and inspiration by Christina Wang (STAT495)

show with app
#list.of.packages <- c("XML", "httr", "mosaic", "tidyr", "rvest", "tm", "lubridate", "stringr", "ggplot2", "wordcloud")
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)){install.packages(new.packages)}

require(XML)
require(httr)
require(mosaic)
require(tidyr)
require(rvest)
require(wordcloud)

require(tm)
require(lubridate)
require(stringr)
require(ggplot2)

location <- "gutenberg"
mycorpus <- VCorpus(DirSource(location))
mycorpus <- tm_map(mycorpus, stripWhitespace)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, content_transformer(tolower))
mycorpus <- tm_map(mycorpus, removeWords, stopwords("english"))

shinyServer(function(input, output) {
  # Wordcloud
  output$wordcloud <- renderPlot({
    ncloud <- input$ncloud
    random.order <- input$random.order
    random.color <- input$random.color
    wordcloud(mycorpus, max.words = ncloud, colors=brewer.pal(8, "Dark2"), scale=c(6,0.5),
              random.order = random.order, random.color = random.color)
  })
  
  # Frequency
  hold <- eventReactive(input$findsyn, {
    word <- input$word
    nthes <- input$nthes
    thesaurus = "http://www.thesaurus.com/browse"
    myurl <- paste(thesaurus, word, sep="/")
    
    selector_name1 <- ".common-word"
    selector_name2 <- ".text"
    new <- html(myurl)
    list <- html_nodes(new, selector_name1) %>%
      html_nodes(selector_name2) %>%
      html_text()
    syn <- as.vector(list)
    syn <- unique(syn)
    combined <- c(word, syn)

    
    wordmatrix <- as.data.frame(
      inspect(DocumentTermMatrix(mycorpus, list(dictionary = combined)))
    )
    
    col <- ncol(wordmatrix)
    sum <- summarise_each(wordmatrix, funs(sum))
    sum <- rbind(colnames(wordmatrix), sum)
    colnames(sum) <- c(1:col)
    rownames(sum) <- c("element","count")
    sum <- as.data.frame(t(sum))
    sum <- mutate(sum, count=as.numeric(extract_numeric(count)))
    sum0 <- sum %>%
      filter(element==word) %>%
      mutate(group = "orig")
    sum1 <- sum[!(sum$element==word),]
    sum1 <- sum1 %>%
      mutate(group = "new") %>%
      arrange(desc(count))
    sum_combined <- rbind(sum0, sum1)
    sum_combined <- sum_combined[1:nthes,]
    

    # sum <- sum[!(sum$count==0),]
    # sum <- arrange(sum, desc(count))
    
    hist <- ggplot(sum_combined, aes(x=element, y=count, fill=group)) + 
      geom_bar(stat="identity") + 
      theme_minimal() + scale_fill_brewer(palette="Dark") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15), legend.position="none",
            axis.title.x=element_blank(), axis.title.y=element_blank()) +
      scale_x_discrete(limits=sum_combined$element) +
      geom_text(aes(label=count), color="dark green", size=3.5)
    
    hist
  })
  
  
  output$histsyn <- renderPlot({
    hold()
  })
  
}
)
#list.of.packages <- c("XML", "httr", "mosaic", "tidyr", "rvest", "tm", "lubridate", "stringr", "ggplot2", "wordcloud")
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)){install.packages(new.packages)}

require(XML)
require(httr)
require(mosaic)
require(tidyr)
require(rvest)
require(wordcloud)

require(tm)
require(lubridate)
require(stringr)
require(ggplot2)

location <- "gutenberg"
mycorpus <- VCorpus(DirSource(location))
mycorpus <- tm_map(mycorpus, stripWhitespace)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, content_transformer(tolower))
mycorpus <- tm_map(mycorpus, removeWords, stopwords("english"))

extractword <- function(link) {
    selector_name1 <- ".common-word"
    selector_name2 <- ".text"
   
    new <- read_html(link)
    synonym <- html_nodes(new, selector_name1) %>%
      html_nodes(selector_name2) %>%
      html_text()
    return(synonym)
}

shinyServer(function(input, output) {
  # Wordcloud
  output$wordcloud <- renderPlot({
    ncloud <- input$ncloud
    random.order <- input$random.order
    random.color <- input$random.color
    wordcloud(mycorpus, max.words = ncloud, colors=brewer.pal(8, "Dark2"), scale=c(6,0.5),
              random.order = random.order, random.color = random.color)
  })
  
  # Frequency
  hold <- eventReactive(input$findsyn, {
    word <- input$word
    nthes <- input$nthes
    thesaurus = "http://www.thesaurus.com/browse"
    myurl <- paste(thesaurus, word, sep="/")

    #xpath <- '*[contains(concat( " ", @class, " " ), concat( " ", "text", " " ))]'
    xpath <- '//span[@class="text"]'
    html <- html_session(myurl)
    syn <-
      html %>%
      html_nodes(xpath = xpath) %>%
      html_text()
    
    combined <- c(word, syn)

    
    wordmatrix <- as.data.frame(
      inspect(DocumentTermMatrix(mycorpus, list(dictionary = combined)))
    )
    
    col <- ncol(wordmatrix)
    sum <- summarise_each(wordmatrix, funs(sum))
    sum <- rbind(colnames(wordmatrix), sum)
    colnames(sum) <- c(1:col)
    rownames(sum) <- c("element","count")
    sum <- as.data.frame(t(sum))
    sum <- mutate(sum, count=as.numeric(extract_numeric(count)))
    sum0 <- sum %>%
      filter(element==word) %>%
      mutate(group = "orig")
    sum1 <- sum[!(sum$element==word),]
    sum1 <- sum1 %>%
      mutate(group = "new") %>%
      arrange(desc(count))
    sum_combined <- rbind(sum0, sum1)
    sum_combined <- sum_combined[1:nthes,]
    

    # sum <- sum[!(sum$count==0),]
    # sum <- arrange(sum, desc(count))
    
    hist <- ggplot(sum_combined, aes(x=element, y=count, fill=group)) + 
      geom_bar(stat="identity") + 
      theme_minimal() + scale_fill_brewer(palette="Dark") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15), legend.position="none",
            axis.title.x=element_blank(), axis.title.y=element_blank()) +
      scale_x_discrete(limits=sum_combined$element) +
      geom_text(aes(label=count), color="dark green", size=3.5)
    
    hist
  })
  
  
  output$histsyn <- renderPlot({
    hold()
  })
  
}
)
library(shiny)

shinyUI(fluidPage(
  # Application title
  titlePanel("Synonyms and Antonyms in Emily Dickinson's Poems (Gutenberg edition)"),
  p("code and inspiration by Christina Wang (STAT495)"),
  tabsetPanel(
    # First tab
    tabPanel("Emily's Wordcloud", 
    sidebarLayout(
      sidebarPanel(
        sliderInput("ncloud", "Number of words in wordcloud", min = 10, max = 50, value = 35),
        checkboxInput("random.order", "Random order?", F),
        checkboxInput("random.color", "Random color?", F)
      ),
      mainPanel(plotOutput("wordcloud"))
    )
             
             ), # End of first tab
    
    # Second tab
    tabPanel("Frequency Analyzer",
    sidebarLayout(
      sidebarPanel(
        textInput("word", "Enter a word here", "like"),
        sliderInput("nthes", "How many words to display?", min = 10, max = 25, value = 15),
        actionButton("findsyn", "I'm ready!")),
      # Show a plot of the generated distribution
      mainPanel(plotOutput("histsyn"))
    )
    )
  )

))