rm(list=ls(all=TRUE))
getwd()
setwd("C:/Users/mw/Dropbox (Personale)/TOPIC MODEL")
getwd()
library(e1071)
library(caTools)
library(randomForest)
library(caret)
library(quanteda)
library(readtext)

### TRAIN-TEST
# This dataset is a sample of tweets mentioning the official account of Donald Trump “@realDonaldTrump”, 
# on dates 7–13 June 2016, written in English and coming from the US. 
# Data have been collected through Twitter API also specifying language and origin of tweets. 
# This dataset include a sample of around 482 tweets that have been manually codified by a group
# of students. The coding stage involved detecting the sentiment towards Trump (negative, positive,
# neutral).

x <- read.csv("Trump-orig3.csv", stringsAsFactors=FALSE)
str(x)
table(x$Sentiment)
x$Sentiment <- as.factor(x$Sentiment)
str(x)
table(x$Sentiment)
prop.table(table(x$Sentiment))
myCorpusTrain <- corpus(x)
summary(myCorpusTrain )

### TEST-SET
### This is a sample of 1000 tweets written in English about Trump and published since 1.17.2018 till 1.19.2018
x10 <- read.csv("Trump_tweets2.csv", stringsAsFactors=FALSE)
str(x10)
x10$X <- NULL
myCorpusTest <- corpus(x10)
summary(myCorpusTest)

MyTotalCorpus <-corpus (myCorpusTrain +myCorpusTest)
head(summary(MyTotalCorpus ))

###########################################
# Try a random forest model
###########################################

myDfm <- dfm(MyTotalCorpus , remove = c(stopwords("english"), ("pic.twitter.com"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
topfeatures(myDfm , 20)  # 20 top words

# Keep terms that appear at least in the 5% or more of the tweets.
trim <- dfm_trim(myDfm , min_docfreq= 0.05)
data <- as.data.frame(as.matrix(trim))
str(data )
colnames(data )

# this is important: randomForest can't recognize the colname that begins with space, comma, number or other specific punctuation.
# the command below would add a letter in front of a number (if you have any numbers left in the tdm). Highly suggested
colnames(data ) <- make.names(colnames(data ))
colnames(data )

str(MyTotalCorpus )
MyTotalCorpus$documents$Sentiment
table(MyTotalCorpus$documents$Sentiment)
data$sentiment<- MyTotalCorpus$documents$Sentiment
table(data$sentiment)

# train <- subset(data, sentiment!="NA's")
train <- subset(data,!is.na(sentiment))
nrow(train)
test <- subset(data,is.na(sentiment))
nrow(test )

colnames(train )
table(train $sentiment)
str(train$sentiment)

set.seed(123)
system.time(RF <- randomForest(sentiment~ ., data=train, type="classification"))
predictRF <- predict(RF, newdata=test)
table( predictRF)
rf_results <- prop.table(table(predictRF ))
rf_results

###########################################
## Try a Naive Bayes model 
###########################################

summary(MyTotalCorpus )
str(MyTotalCorpus )
str(MyTotalCorpus$documents$Sentiment)
table(MyTotalCorpus$documents$Sentiment)
table(is.na(MyTotalCorpus$documents$Sentiment)) 

# get training set (documents in id_train) and compute the dfm out of it
training_corpus <- corpus_subset(MyTotalCorpus, !is.na(Sentiment))
# or you directly start with the corpus of the training-set
training_corpus <- myCorpusTrain
summary(training_corpus )
training_dfm <- dfm(training_corpus  , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
topfeatures(training_dfm , 20)  # 20 top words
# Keep terms that appear at least in the 5% or more of the tweets (tweets are very short texts...)
training_dfm <- dfm_trim(training_dfm , min_docfreq= 0.05)

# get training set (documents in id_train) and compute the dfm out of it
test_corpus <- corpus_subset(MyTotalCorpus,is.na(Sentiment))
# or you directly start with the corpus of the test-set
test_corpus <- myCorpusTest
summary(test_corpus )
test_dfm <-  dfm(test_corpus , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
topfeatures(test_dfm , 20)  # 20 top words
# Keep terms that appear at least in the 5% or more of the tweets (tweets are very short texts...)
test_dfm <- dfm_trim(test_dfm , min_docfreq= 0.05)

# train the naive Bayes classifier using textmodel_nb() and a Multinomial distribution
nb <- textmodel_nb(training_dfm, docvars(training_dfm, "Sentiment"), distribution = c("multinomial"))
summary(nb)

# Let's make the features identical by passing training_dfm to dfm_select() as a pattern.
test_dfm <- dfm_select(test_dfm, training_dfm)
predicted_class <- predict(nb, test_dfm)
table(predicted_class)
str(predicted_class)
nb_results <- prop.table(table(predicted_class ))
nb_results

#######################################
#### plot RF e NB results
#######################################

myFrame1 <- as.data.frame(prop.table(table(predictRF )))
str(myFrame1)
colnames(myFrame1)[1] <- "Prediction"
myFrame1$class <- c("RF", "RF", "RF")
str(myFrame1)
# myFrame1[,1] = NULL

myFrame2 <- as.data.frame(prop.table(table(predicted_class )))
str(myFrame2)
colnames(myFrame2)[1] <- "Prediction"
myFrame2$class <- c("NB", "NB", "NB")
str(myFrame2)
# myFrame2[,1] = NULL

myFrame_tot <- rbind(myFrame1, myFrame2)
str(myFrame_tot)

library(reshape2)
df.long<-melt(myFrame_tot,id.vars=c("class", "Freq"))
str(df.long)

ggplot(df.long,aes(class,Freq,fill=value))+
 geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + 
coord_flip() +  
ylab(label="Frequency") +  xlab("Algorithm") + ggtitle("RF vs. Naive Bayes predictions for the test-set")

###########################################
# cross-validation with NB with K=4 (each TS of 118)
###########################################

summary( myCorpusTrain)
N <- ndoc(myCorpusTrain)
# Number of desired splits
folds <- 4
# Generate indices of holdout observations
holdout <- split(sample(1:N), 1:folds)
str(holdout)
# Check that each observation appears exactly once in the holdout object:
holdout %>% unlist() %>% length() == N
holdout[[1]]

# create docvar with ID
docvars(myCorpusTrain, "id_numeric") <- 1:ndoc(myCorpusTrain)
summary(myCorpusTrain)

# get training set_other1 (all the documents NOT in holdout[[1]]) and compute the dfm out of it
tr_other1 <- corpus_subset(myCorpusTrain,  !id_numeric %in% holdout[[1]])
tr_other1_dfm <- dfm(tr_other1  , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_other1_dfm <- dfm_trim(tr_other1_dfm , min_docfreq= 0.05)

# get training set_other2 (all the documents NOT in holdout[[2]]) and compute the dfm out of it
tr_other2 <- corpus_subset(myCorpusTrain,  !id_numeric %in% holdout[[2]])
tr_other2_dfm <- dfm(tr_other2  , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_other2_dfm <- dfm_trim(tr_other2_dfm , min_docfreq= 0.05)

# get training set_other3 (all the documents NOT in holdout[[3]]) and compute the dfm out of it
tr_other3 <- corpus_subset(myCorpusTrain,  !id_numeric %in% holdout[[3]])
tr_other3_dfm <- dfm(tr_other3  , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_other3_dfm <- dfm_trim(tr_other3_dfm , min_docfreq= 0.05)

# get training set_other4 (all the documents NOT in holdout[[4]]) and compute the dfm out of it
tr_other4 <- corpus_subset(myCorpusTrain,  !id_numeric %in% holdout[[4]])
tr_other4_dfm <- dfm(tr_other4  , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_other4_dfm <- dfm_trim(tr_other4_dfm , min_docfreq= 0.05)

# get training set1 (all the documents in holdout[[1]]) and compute the dfm out of it
tr_1 <- corpus_subset(myCorpusTrain,  id_numeric %in% holdout[[1]])
tr_1_dfm <- dfm(tr_1   , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_1_dfm <- dfm_trim(tr_1_dfm , min_docfreq= 0.05)

# get training set2 (all the documents in holdout[[2]]) and compute the dfm out of it
tr_2 <- corpus_subset(myCorpusTrain,  id_numeric %in% holdout[[2]])
tr_2_dfm <- dfm(tr_2   , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_2_dfm <- dfm_trim(tr_2_dfm , min_docfreq= 0.05)

# get training set3 (all the documents in holdout[[3]]) and compute the dfm out of it
tr_3 <- corpus_subset(myCorpusTrain,  id_numeric %in% holdout[[3]])
tr_3_dfm <- dfm(tr_3   , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_3_dfm <- dfm_trim(tr_3_dfm , min_docfreq= 0.05)

# get training set4 (all the documents in holdout[[4]]) and compute the dfm out of it
tr_4 <- corpus_subset(myCorpusTrain,  id_numeric %in% holdout[[4]])
tr_4_dfm <- dfm(tr_4   , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
tr_4_dfm <- dfm_trim(tr_4_dfm , min_docfreq= 0.05)

#######################################
# K=1 (train 1) NB
#######################################
nb <- textmodel_nb(tr_other1_dfm , docvars(tr_other1_dfm, "Sentiment"), distribution = c("multinomial"))
summary(nb)
# Let's make the features identical by passing training_dfm to dfm_select() as a pattern.
test_dfm <- dfm_select(tr_1_dfm, tr_other1_dfm)
# Let’s inspect how well the classification worked
actual_class1 <- docvars(test_dfm, "Sentiment")
predicted_class1 <- predict(nb, test_dfm)
table(predicted_class1)
prop.table(table(predicted_class1 ))
class_table1 <- table(actual_class1, predicted_class1)
class_table1
confusionMatrix(class_table1, mode = "everything")

#######################################
# K=2 (train 2) NB
#######################################
nb <- textmodel_nb(tr_other2_dfm , docvars(tr_other2_dfm, "Sentiment"), distribution = c("multinomial"))
summary(nb)
# Let's make the features identical by passing training_dfm to dfm_select() as a pattern.
test_dfm <- dfm_select(tr_2_dfm, tr_other2_dfm)
# Let’s inspect how well the classification worked
actual_class2 <- docvars(test_dfm, "Sentiment")
predicted_class2 <- predict(nb, test_dfm)
table(predicted_class2)
prop.table(table(predicted_class2 ))
class_table2 <- table(actual_class2, predicted_class2)
class_table2
confusionMatrix(class_table2, mode = "everything")

#######################################
# K=3 (train 3) NB
#######################################
nb <- textmodel_nb(tr_other3_dfm , docvars(tr_other3_dfm, "Sentiment"), distribution = c("multinomial"))
summary(nb)
# Let's make the features identical by passing training_dfm to dfm_select() as a pattern.
test_dfm <- dfm_select(tr_3_dfm, tr_other3_dfm)
# Let’s inspect how well the classification worked
actual_class3 <- docvars(test_dfm, "Sentiment")
predicted_class3 <- predict(nb, test_dfm)
table(predicted_class3)
prop.table(table(predicted_class3 ))
class_table3 <- table(actual_class3, predicted_class3)
class_table3
confusionMatrix(class_table3, mode = "everything")

#######################################
# K=4 (train 4) NB
#######################################
nb <- textmodel_nb(tr_other4_dfm , docvars(tr_other4_dfm, "Sentiment"), distribution = c("multinomial"))
summary(nb)
# Let's make the features identical by passing training_dfm to dfm_select() as a pattern.
test_dfm <- dfm_select(tr_4_dfm, tr_other4_dfm)
# Let’s inspect how well the classification worked
actual_class4 <- docvars(test_dfm, "Sentiment")
predicted_class4 <- predict(nb, test_dfm)
table(predicted_class4)
prop.table(table(predicted_class4 ))
class_table4 <- table(actual_class4, predicted_class4)
class_table4
confusionMatrix(class_table4, mode = "everything")

#######################################
# NB cross-validation accuracy, precision & recall for each class in Sentiment
#######################################

cm1 <- confusionMatrix(class_table1, mode = "everything")
cm2 <-confusionMatrix(class_table2, mode = "everything")
cm3 <-confusionMatrix(class_table3, mode = "everything")
cm4 <-confusionMatrix(class_table4, mode = "everything")

str(cm1)
str(cm1$overall)
(cm1$overall[1]+cm2$overall[1]+cm3$overall[1]+cm4$overall[1])/4
accuracy_mean_nb <- rbind(cm1$overall[1], cm2$overall[1], cm3$overall[1], cm4$overall[1])
accuracy_mean_nb
accuracy_nb_avg <- mean(accuracy_mean_nb)
accuracy_nb_sd <-sd(accuracy_mean_nb)

# Recall (=Sensitivity; True Positive/Actual Positive or TP/(TP+FN))
# Precision (=Pos Pred Value; True Positive/Predicted Positive or TP/(TP+FP))

confusionMatrix(class_table1, mode = "everything")
str(cm1$byClass)
cm1$byClass[1:3] # recall row
cm1$byClass[7:9] # precision row

precision_nb <-  as.data.frame(cbind(cm1$byClass[7:9], cm2$byClass[7:9], cm3$byClass[7:9], cm4$byClass[7:9]))
precision_nb$sentiment <- c("Negative", "Neutral", "Positive") 
str(precision_nb )
precision_nb [1:4]
precision_nb_avg <- as.data.frame(rowMeans(precision_nb [1:4], na.rm = TRUE))
precision_nb_avg $sentiment <- c("Negative", "Neutral", "Positive") 
str(precision_nb_avg)
colnames(precision_nb_avg)[1] <- "Precision NB"
str(precision_nb_avg)

recall_nb <-  as.data.frame(cbind(cm1$byClass[1:3], cm2$byClass[1:3], cm3$byClass[1:3], cm4$byClass[1:3]))
recall_nb$sentiment <- c("Negative", "Neutral", "Positive") 
str(recall_nb )
recall_nb [1:4]
recall_nb_avg <- as.data.frame(rowMeans(recall_nb [1:4], na.rm = TRUE))
recall_nb_avg $sentiment <- c("Negative", "Neutral", "Positive") 
str(recall_nb_avg)
colnames(recall_nb_avg)[1] <- "Recall NB"
str(recall_nb_avg)

accuracy_nb_avg
accuracy_nb_sd
precision_nb_avg
recall_nb_avg

###########################################
# cross-validation with RF with K=4 (each TS of 118)
###########################################

library(randomForest)
N <- nrow(train)
# Number of desired splits
folds <- 4
# Generate indices of holdout observations
holdout <- split(sample(1:N), 1:folds)
str(holdout)
# Check that each observation appears exactly once in the holdout object:
holdout %>% unlist() %>% length() == N
holdout[[1]]

# training set with all the data but the ones included in the sampled train 1
data=train[-holdout$`1`,]
nrow(data)
472-118

# training set with only the data included in the sampled train 1
newdata=train[holdout$`1`,]
nrow(data)

#######################################
# K=1 (train 1) RF
#######################################

set.seed(123)
system.time(RF <- randomForest(sentiment~ ., data=train[-holdout$`1`,], type="classification"))
predictRF1 <- predict(RF, newdata=train[holdout$`1`,])
table( predictRF1)
rf_results1 <- prop.table(table(predictRF1 ))
rf_results1
newdata<-train[holdout$`1`,]
table("Predictions"= predictRF1, "Actual"=newdata$sentiment)
# Let's use the confusionMatrix command 
conf.rf1 <- confusionMatrix( predictRF1, newdata$sentiment)
conf.rf1 

#######################################
# K=2 (train 2) RF
#######################################

set.seed(123)
system.time(RF <- randomForest(sentiment~ ., data=train[-holdout$`2`,], type="classification"))
predictRF2 <- predict(RF, newdata=train[holdout$`2`,])
table( predictRF2)
rf_results2 <- prop.table(table(predictRF2 ))
rf_results2
newdata<-train[holdout$`2`,]
table("Predictions"= predictRF2, "Actual"=newdata$sentiment)
# Let's use the confusionMatrix command 
conf.rf2 <- confusionMatrix( predictRF2, newdata$sentiment)
conf.rf2 

#######################################
# K=3 (train 3) RF
#######################################

set.seed(123)
system.time(RF <- randomForest(sentiment~ ., data=train[-holdout$`3`,], type="classification"))
predictRF3 <- predict(RF, newdata=train[holdout$`3`,])
table( predictRF3)
rf_results3 <- prop.table(table(predictRF3 ))
rf_results3
newdata<-train[holdout$`3`,]
table("Predictions"= predictRF3, "Actual"=newdata$sentiment)
# Let's use the confusionMatrix command 
conf.rf3 <- confusionMatrix( predictRF3, newdata$sentiment)
conf.rf3 

#######################################
# K=4 (train 4) RF
#######################################

set.seed(123)
system.time(RF <- randomForest(sentiment~ ., data=train[-holdout$`4`,], type="classification"))
predictRF4 <- predict(RF, newdata=train[holdout$`4`,])
table( predictRF4)
rf_results4 <- prop.table(table(predictRF4 ))
rf_results4
newdata<-train[holdout$`4`,]
table("Predictions"= predictRF4, "Actual"=newdata$sentiment)
# Let's use the confusionMatrix command 
conf.rf4 <- confusionMatrix( predictRF4, newdata$sentiment)
conf.rf4

#######################################
# RF cross-validation accuracy, precision & recall for each class in Sentiment
#######################################

(conf.rf1$overall[1] + conf.rf2$overall[1] + conf.rf3$overall[1] + conf.rf4$overall[1])/4
accuracy_mean_rf <- rbind(conf.rf1$overall[1], conf.rf2$overall[1], conf.rf3$overall[1], conf.rf4$overall[1])
accuracy_mean_rf
accuracy_rf_avg <- mean(accuracy_mean_rf)
accuracy_rf_sd <- sd(accuracy_mean_rf) 

recall_rf<-  as.data.frame(cbind(conf.rf1$byClass[1:3], conf.rf2$byClass[1:3], conf.rf3$byClass[1:3], conf.rf4$byClass[1:3]))
recall_rf$sentiment <- c("Negative", "Neutral", "Positive") 
str(recall_rf)
recall_rf[1:4]
recall_rf_avg <- as.data.frame(rowMeans(recall_rf[1:4], na.rm = TRUE))
recall_rf_avg $sentiment <- c("Negative", "Neutral", "Positive") 
colnames(recall_rf_avg)[1] <- "Recall RF"
str(recall_rf_avg)

precision_rf <-  as.data.frame(cbind(conf.rf1$byClass[7:9], conf.rf2$byClass[7:9], conf.rf3$byClass[7:9],  conf.rf4$byClass[7:9]))
precision_rf$sentiment <- c("Negative", "Neutral", "Positive") 
str(precision_rf)
precision_rf[1:4]
precision_rf_avg <- as.data.frame(rowMeans(precision_rf[1:4], na.rm = TRUE))
precision_rf_avg $sentiment <- c("Negative", "Neutral", "Positive") 
colnames(precision_rf_avg)[1] <- "Precision RF"
str(precision_rf_avg)

accuracy_rf_avg
accuracy_rf_sd 
recall_rf_avg
precision_rf_avg

######################
######################
## comparing NB and RF cross validation and plotting the results
######################
######################

gb1 <- as.data.frame(accuracy_mean_rf )
colnames(gb1)[1] <- "Accuracy RF"
gb2 <- as.data.frame(accuracy_mean_nb )
colnames(gb2)[1] <- "Accuracy NB"

gb_tot <- cbind(gb1, gb2)
gb_tot
str(gb_tot)
df.long_gb_tot<-melt(gb_tot)
str(df.long_gb_tot)

ggplot(df.long_gb_tot, aes(x=variable, y=value)) + 
  geom_boxplot() + coord_flip() + xlab("Algorithm") + ylab(label="Value") + 
ggtitle("RF vs. Naive Bayes K-fold cross-validation (K=4): Accuracy")

p2 <- ggplot(df.long_gb_tot, aes(x=variable, y=value)) + 
  geom_boxplot() + coord_flip() + xlab("Algorithm") + ylab(label="Value") + 
ggtitle("RF vs. Naive Bayes K-fold cross-validation (K=4): Accuracy")

cv_rf <- merge(recall_rf_avg, precision_rf_avg, by=c("sentiment")) 
str(cv_rf)
cv_nb <- merge(recall_nb_avg, precision_nb_avg, by=c("sentiment")) 
cv <- merge(cv_rf, cv_nb, by=c("sentiment")) 
str(cv)
cv<- cv[c(1,5,3,4,2)]
str(cv)
library(gridExtra)
grid.table(cv)
tt1 <- ttheme_default()
t <-tableGrob(cv, theme=tt1)

grid.arrange(
  tableGrob(cv, theme=tt1))

str(cv)
cv<- cv[c(1,4,5,2,3)]
str(cv)
df.long2<-melt(cv,id.vars=c("sentiment"))
str(df.long2)

p <- ggplot(df.long2,aes(variable,value,fill=sentiment))+
 geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + 
coord_flip() +  
ylab(label="Frequency") +  xlab("Algorithm") + ggtitle("RF vs. Naive Bayes K-fold cross-validation (K=4): Precision and Recall")

# Plot chart and table into one object
grid.arrange(p, t,
             nrow=2,
             as.table=TRUE,
             heights=c(3,1))

# Plot together chart and table
p3 <- ggplot(df.long2,aes(variable,value,fill=sentiment))+
 geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + 
coord_flip() +  
ylab(label="Frequency") +  xlab("Algorithm") + ggtitle("RF vs. Naive Bayes K-fold cross-validation (K=4)") +
annotation_custom(tableGrob(cv), xmin=0.5, xmax=1, ymin=0.5, ymax=1)

# Plot everything together
grid.arrange(p2, p,
             nrow=2)