rm(list=ls(all=TRUE))
getwd()
setwd("C:/Users/mw/Dropbox (VOICES)/TOPIC MODEL")
getwd()
library(e1071)
library(caTools)
library(randomForest)
library(caret)
library(quanteda)
library(readtext)
library(iSAX)
library(tm) 
library(stringr)
library(glmnet)

##################################################
# iSA
##################################################
### TRAINING-TEST
# Let's start with our usual training-set about Trump
x <- read.csv("trainTrump.csv", stringsAsFactors=FALSE)
str(x)
# removing all punctuations also here (it is a mess with tweets!). Better doing that directly at this stage
x$text <- str_replace_all(x$text, "[^[:alnum:]]", " ")
str(x)
x$Sentiment <- as.factor(x$Sentiment) # let's transform the variable "Sentiment" in a factor variable
str(x)
table(x$Sentiment)
prop.table(table(x$Sentiment))

### TEST-SET
### That's our usual test-set: sample of 1000 tweets written in English about Trump and published since 1.17.2018 till 1.19.2018
x10 <- read.csv("testTrump.csv", stringsAsFactors=FALSE)
x10$text <- str_replace_all(x10$text, "[^[:alnum:]]", " ")
str(x10)

### Let's create a unique dataset including both TEST and TRAINING SET 
x10$Sentiment <- NA # for doing that let's add a new column called "Sentiment" in the test-set given that such column is presented in the training-set
str(x10)
documents <- rbind(x10, x) # let's combine the test and the training-set
str(documents )
table(documents $Sentiment)
prop.table(table(documents $Sentiment))

### iSA needs the TM package (not Quanteda!) to build the DFM
corpus <- VCorpus(VectorSource(documents $text))
length(corpus) # we have 1472 texts (1,000 as test-set and 472 as training-set)
str(corpus[[1]])
ocome <- prep.data(corpus,verbose=TRUE, th=0.995) # let's prepares data for iSA algorithm. 
# This is a pre-processing step which performs stemming and other cleaning steps.
# th=0.995 means that we drop those features that appear in less than 5% of the texts
str(ocome)

### let's separate the resulting object "ocome" according to the presence or absence of info about the Sentiment 
### (i.e., training vs. test-set)

train <- !is.na(documents $Sentiment) # I create an index=TRUE for the training-set documents (i.e., those texts with Sentiment 
# different than NA)
train
summary(train)
D <- documents$Sentiment[train] # I recover the vector of the values for the Sentiment in the training-set
str(D)
# Same results indeed!
prop.table(table(D))
prop.table(table(x$Sentiment))

Strain <- ocome$S[which(train)] # I select out of "ocome" the vector of stems belonging to the training-set
Stest <- ocome$S[-which(train)] # I select out of "ocome" the vector of stems belonging to the test-set

length(Strain ) # 500!
length(Stest) # 500!

set.seed(123)
system.time(outSent <- iSA(Strain ,Stest , D)) # let's run iSA: D is the vector of codings belonging to the training set
# estimation for the test-test
round(outSent$btab, 5) # I have also bootstrapped s.e.!
# training-set %
prop.table(table(D ))

##################################################
# How to assess the quality of iSA estimates?
##################################################

# For example, you can create two random sets out of your original training-set, and estimate the MAE on them!

##################
# iSA - K1 we use train1_ISA to predict the documents of train2_ISA
##################

# create two training-sets from the original one
k <- 2
n <- length(Strain)
n
split <- rep(1:k,c(0.5*n,0.5*n))
table(split )
split
set.seed(123)
split <- sample(split) # I rearrange randomly the order of numbers in split
split

# note that if you want to generate 4 training-sets from the original one using rep, you should write
# k <- 4
# n <- length(Strain)
# split <- rep(1:k,c(0.25*n,0.25*n,0.25*n,0.25*n))
# if k <- 5, write after rep 0.2*2, etc.

train1_ISA <- subset(Strain, split==1)
train2_ISA <- subset(Strain, split==2)
length(train1_ISA)
length(train2_ISA)
D_train1_ISA <-subset(D, split==1)
prop.table(table(D_train1_ISA))
D_train2_ISA <-subset(D, split==2)
prop.table(table(D_train2_ISA))

set.seed(123)
outSent <- iSA(train1_ISA ,train2_ISA , D_train1_ISA) 
str(outSent)
round(outSent$btab, 5)

##################
# iSA - K2 we use train2_ISA to predict the documents of train1_ISA
##################

set.seed(123)
outSent2 <- iSA(train2_ISA ,train1_ISA , D_train2_ISA) 
str(outSent2)
round(outSent2$btab, 5)

##################
# Estimating the MAE when training-set1 vs. training-set2
##################

mat_all <-cbind(outSent$btab[,"Estimate"],prop.table(table(D_train2_ISA)))
colnames(mat_all) <- c("iSA1", "True1")
round(mat_all,3)
mae_ISA <- mean(abs(mat_all[,1]-mat_all[,2]))
mae_ISA

##################
# Estimating the MAE when training-set2 vs. training-set1
##################

mat_all2 <-cbind(outSent2$btab[,"Estimate"],prop.table(table(D_train1_ISA)))
colnames(mat_all2) <- c("iSA2", "True2")
round(mat_all2,3)

mae_ISA2 <- mean(abs(mat_all2[,1]-mat_all2[,2]))
mae_ISA2

####### Avg. MAE
(mae_ISA+mae_ISA2)/2

##################
# Is a better or a worst result that what we can get via other Machine Learning Algorithms? Let's compare!
##################

##################
# RandomForest - K1 we use train1_RF to predict the documents of train2_RF
##################

# REMEMBER: iSAX via the command prep.data creates a DfM with just 0/1 as frequencies.
# if you want to run an analysis on the original frequencies DfM, you should use Quanteda on the "Trump_train.csv" database
# and creating a DfM out of it before running the RF here

dtm <- ocome$dtm # create an object out of the DfM included in the object "ocomo" we created via the prep.data command of iSAX
dtm1 <- data.frame(dtm)
# this would add a letter in front of a number (if you have any numbers left in the tdm). Highly suggested
colnames(dtm1) <- make.names(colnames(dtm1))
dtm1
dtm1$Sentiment<- documents $Sentiment # let's add to the DfM also the Sentiment variable out from the training-set
str(dtm1$Sentiment)
dtm1$Sentiment<- as.factor(documents $Sentiment) # let's transform the Sentiment variable in a factor!
str(dtm1$Sentiment)

table(documents $Sentiment)
summary(dtm1$Sentiment)

train <- dtm1[ which(!is.na(dtm1$Sentiment)), ] # let's extract the part of the DfM that contains values for the "Sentiment" variable
# i.e., our original training-set! 
nrow(train)

# let's split the training-set in two
train1_RF <- subset(train , split==1) 
train2_RF <- subset(train , split==2)

# Same splitting of the training-set as we did with iSA!
prop.table(table(train1_RF$Sentiment))
prop.table(table(D_train1_ISA))
prop.table(table(train2_RF$Sentiment))
prop.table(table(D_train2_ISA))

set.seed(123)
system.time(tweetRF1 <- randomForest(Sentiment~ ., data=train1_RF, do.trace=TRUE))
predictRF1 <- predict(tweetRF1, newdata=train2_RF, type="class")
prop.table(table( predictRF1))

##################
# RandomForest - K2 we use train2_RF to predict the documents of train1_RF
##################

set.seed(123)
system.time(tweetRF2 <- randomForest(Sentiment~ ., data=train2_RF, do.trace=TRUE))
predictRF2 <- predict(tweetRF2, newdata=train1_RF, type="class")
prop.table(table( predictRF2))

##################
# SVM - K1 we use train1_SV to predict the documents of train2_SV
##################

train1_SV <- train1_RF 
train2_SV <- train2_RF

set.seed(123)
system.time(tweetSV1 <- svm(Sentiment~ . ,data=train1_SV, kernel='linear', cost = 10))
predictSV1 <- predict(tweetSV1, newdata=train2_SV, type="class")
prop.table(table( predictSV1))

##################
# SVM - K2 we use train2_SV to predict the documents of train1_SV
##################

train1_SV <- train1_RF 
train2_SV <- train2_RF

set.seed(123)
system.time(tweetSV2 <- svm(Sentiment~ . ,data=train2_SV, kernel='linear', cost = 10))
predictSV2 <- predict(tweetSV2, newdata=train1_SV, type="class")
prop.table(table( predictSV2))

##################
# Naive Bayes - K1 we use train1_NB to predict the documents of train2_NB
##################

str(x)
myCorpusTwitter <- corpus(x)

summary( myCorpusTwitter )
N <- ndoc(myCorpusTwitter )
# Number of desired splits
k <- 2
set.seed(123)
holdout <- split(sample(1:N), 1:k)
docvars(myCorpusTwitter , "id_numeric") <- 1:ndoc(myCorpusTwitter )

# create train1_NB (documents in id_train)

train1_NB <- dfm( corpus_subset(myCorpusTwitter, id_numeric %in% holdout[[1]]) , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
# Keep terms that appear at least in the 5% or more of the tweets 
train1_NB <- dfm_trim(train1_NB  , min_docfreq= 0.05)
#######################################################################
# let's recode all the values in the DfM as just 0/1 as a matter of compability with the DfM created via iSAX
train1_NB@x <- recode(train1_NB@x,"2:hi=1")
#######################################################################

# create train2NB (documents NOT in id_train)

train2_NB  <-  dfm(corpus_subset(myCorpusTwitter, id_numeric %in%holdout[[2]]) , remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https")),
                remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE)
# Keep terms that appear at least in the 5% or more of the tweets
train2_NB <- dfm_trim(train2_NB  , min_docfreq= 0.05)
#######################################################################
# let's recode all the values in the DfM as just 0/1 as a matter of compability with the DfM created via iSAX
train2_NB@x  <- recode(train2_NB @x,"2:hi=1")
#######################################################################

# train the naive Bayes classifier using textmodel_nb() and a Multinomial distribution 
nb <- textmodel_nb(train1_NB, docvars(train1_NB, "Sentiment"), distribution = c("multinomial"))
summary(nb)

# Let's make the features identical by passing train1_NB to dfm_select() as a pattern.
training_dfm2 <- dfm_select(train2_NB, train1_NB)

# Let’s inspect how well the classification worked
actual_class <- docvars(training_dfm2, "Sentiment")

predicted_classNB1 <- predict(nb, training_dfm2)
table(predicted_classNB1)
prop.table(table(predicted_classNB1 ))

# Here different splitting of the training-set compared to what we did with iSA/RF!
prop.table(table(train1_RF$Sentiment))
prop.table(table(D_train1_ISA))
prop.table(table(train1_NB@docvars$Sentiment ))

prop.table(table(train2_RF$Sentiment))
prop.table(table(D_train2_ISA))
prop.table(table(train2_NB@docvars$Sentiment ))

##################
# Naive Bayes - K2 we use train2_NB to predict the documents of train1_NB
##################

# train the naive Bayes classifier using textmodel_nb() and a Multinomial distribution (the default)
nb2 <- textmodel_nb(train2_NB , docvars(train2_NB , "Sentiment"), distribution = c("multinomial"))
summary(nb2)

# Let's make the features identical by passing training_dfm2 to dfm_select() as a pattern.
training_dfm1 <- dfm_select(train1_NB, train2_NB)

# Let’s inspect how well the classification worked
actual_class2 <- docvars(training_dfm1, "Sentiment")
predicted_classNB2 <- predict(nb2, training_dfm1)
table(predicted_classNB2)
prop.table(table(predicted_classNB2 ))

##################
# Estimating the MAE when training-set1 vs. training-set2 for all (=4) the algorithms employed 
##################

mat_all <-cbind(outSent$btab[,"Estimate"], prop.table(table( predictRF1)), prop.table(table( predictSV1)), 
prop.table(table(predicted_classNB1)), prop.table(table(D_train2_ISA)), prop.table(table(train2_NB@docvars$Sentiment )))

colnames(mat_all) <- c("iSA1", "RF1", "SV1", "BAYES1", "True1", "TrueNB1")
round(mat_all,3)

mae_ISA1 <- mean(abs(mat_all[,1]-mat_all[,5]))
mae_RF1 <- mean(abs(mat_all[,2]-mat_all[,5]))
mae_SV1 <- mean(abs(mat_all[,3]-mat_all[,5]))
mae_BAYES1 <- mean(abs(mat_all[,4]-mat_all[,6]))

##################
# Estimating the MAE when training-set2 vs. training-set1 for all (=4) the algorithms employed 
##################

mat_all2 <-cbind(outSent2$btab[,"Estimate"], prop.table(table( predictRF2)), prop.table(table( predictSV2)), 
prop.table(table(predicted_classNB2)), prop.table(table(D_train1_ISA)), prop.table(table(train1_NB@docvars$Sentiment )))

colnames(mat_all2) <- c("iSA2", "RF2", "SV2", "BAYES2", "True2", "TrueNB2")
round(mat_all2,3)

mae_ISA2 <- mean(abs(mat_all2[,1]-mat_all2[,5]))
mae_RF2 <- mean(abs(mat_all2[,2]-mat_all2[,5]))
mae_SV2 <- mean(abs(mat_all2[,3]-mat_all2[,5]))
mae_BAYES2 <- mean(abs(mat_all2[,4]-mat_all2[,6]))

####### Avg. MAE
(mae_ISA1+mae_ISA2)/2
(mae_RF1+mae_RF2)/2
(mae_SV1+mae_SV2)/2
(mae_BAYES1+mae_BAYES2)/2

# Does it mean that iSA is the best ML algorithm here when we focus on proportio? 
# Let's see some more serious CV!

##################
# Let's loop everything with any number of K! For example: K=10
##################

##################
# iSA K=10
##################

k <- 10
n <- length(Strain)
n
split <- rep(1:k,c(0.1*n,0.1*n,0.1*n,0.1*n,0.1*n,0.1*n,0.1*n,0.1*n,0.1*n,0.1*n))
table(split )
set.seed(123)
split <- sample(split) 

for(i in 1:k){
train <- subset(Strain, split!=(i))
validation <- subset(Strain, split==(i))
D_train_ISA <-subset(D, split!=(i))
D_train_true <-subset(D, split==(i))
set.seed(123)
outSent <- iSA(train ,validation  , D_train_ISA)
df<-outSent
df.name<-paste0("iSA",i)
assign(df.name,df)
pf<-prop.table(table(D_train_true ))
pf.name<-paste0("True",i)
assign(pf.name,pf)
}

iSAPredict <- data.frame(col1=vector(), col2=vector(), col3=vector()) # makes a blank data frame with three columns to fill with the predictions

for(i in  mget(ls(pattern = "iSA")) ) {
  col1 <-(i)$btab[1]
  col2 <-(i)$btab[2]
  col3 <-(i)$btab[3]
 iSAPredict <- rbind(iSAPredict, cbind(col1, col2, col3))
}
colnames(iSAPredict)[1] <- "Negative"
colnames(iSAPredict)[2] <- "Neutral"
colnames(iSAPredict)[3] <- "Positive"
str(iSAPredict)
iSAPredict<-as.data.frame(t(iSAPredict)) # transpose the matrix
colnames(iSAPredict) <- rep(1:k)
iSAPredict

TrueValue <- data.frame(col1=vector(), col2=vector(), col3=vector())   # makes a blank data frame with three columns to fill with the true values
for(i in  mget(ls(pattern = "True")) ) {
  col1 <-(i)[1]
  col2 <- (i)[2]
  col3 <- (i)[3]
 TrueValue <- rbind(TrueValue , cbind(col1, col2, col3))
}
str(TrueValue )
TrueValue <- as.data.frame(t(TrueValue )) # transpose the matrix
colnames(TrueValue ) <- rep(1:k) 
row.names(TrueValue ) <- row.names(iSAPredict)
TrueValue 

iSA_TABLE <- abs(iSAPredict-TrueValue )
MAE_iSA <- mean(colMeans(iSA_TABLE))
MAE_iSA

##################
# RF K=10
##################

dtm <- ocome$dtm # create an object out of the DfM included in the object "ocomo" we created via the prep.data command of iSAX
dtm1 <- data.frame(dtm)
colnames(dtm1) <- make.names(colnames(dtm1))
dtm1$Sentiment<- documents $Sentiment # let's add to the DfM also the Sentiment variable out from the training-set
dtm1$Sentiment<- as.factor(documents $Sentiment) # let's transform the Sentiment variable in a factor!
train <- dtm1[ which(!is.na(dtm1$Sentiment)), ] # let's extract the part of the DfM that contains values for the "Sentiment" variable
nrow(train)
ttrain <- train

for(i in 1:k){
train <- subset(ttrain , split!=(i))
validation <- subset(ttrain , split==(i))
set.seed(123)
newrf <- randomForest(Sentiment~ ., data=train, do.trace=TRUE, ntree=100) # set ntree=100 to make it faster during the Lab class
predict <- predict(newrf, newdata=validation, type="class")
df<- prop.table(table(predict))
df.name<-paste0("RFpred",i)
assign(df.name,df)
pf<-prop.table(table(validation$Sentiment))
pf.name<-paste0("TrueRF",i)
assign(pf.name,pf)
}

RFPredict <- data.frame(col1=vector(), col2=vector(), col3=vector()) # makes a blank data frame with three columns to fill with the predictions
for(i in  mget(ls(pattern = "RFpred")) ) {
  col1 <-(i)[1]
  col2 <- (i)[2]
  col3 <- (i)[3]
 RFPredict <- rbind(RFPredict , cbind(col1, col2, col3))
}
RFPredict <-as.data.frame(t(RFPredict )) # transpose the matrix
colnames(RFPredict ) <- rep(1:k)
row.names(RFPredict ) <- row.names(iSAPredict)
RFPredict

TrueValueRF <- data.frame(col1=vector(), col2=vector(), col3=vector())   # makes a blank data frame with three columns to fill with the true values
for(i in  mget(ls(pattern = "TrueRF")) ) {
  col1 <-(i)[1]
  col2 <- (i)[2]
  col3 <- (i)[3]
 TrueValueRF <- rbind(TrueValueRF , cbind(col1, col2, col3))
}
str(TrueValueRF )
TrueValueRF <- as.data.frame(t(TrueValueRF )) # transpose the matrix
colnames(TrueValueRF ) <- rep(1:k) 
row.names(TrueValueRF ) <- row.names(iSAPredict)
TrueValueRF

RF_TABLE <- abs(RFPredict -TrueValueRF)
MAE_RF<- mean(colMeans(RF_TABLE))
MAE_RF

##################
# SVM K=10
##################

for(i in 1:k){
train <- subset(ttrain , split!=(i))
validation <- subset(ttrain , split==(i))
set.seed(123)
newrf <- svm(Sentiment~ ., data=train,  kernel='linear', cost = 10) 
predict <- predict(newrf, newdata=validation, type="class")
df<- prop.table(table(predict))
df.name<-paste0("SVpred",i)
assign(df.name,df)
pf<-prop.table(table(validation$Sentiment))
pf.name<-paste0("TrueSV",i)
assign(pf.name,pf)
}

SVPredict <- data.frame(col1=vector(), col2=vector(), col3=vector()) # makes a blank data frame with three columns to fill with the predictions
for(i in  mget(ls(pattern = "SVpred")) ) {
  col1 <-(i)[1]
  col2 <- (i)[2]
  col3 <- (i)[3]
 SVPredict <- rbind(SVPredict , cbind(col1, col2, col3))
}
SVPredict <-as.data.frame(t(SVPredict )) # transpose the matrix
colnames(SVPredict ) <- rep(1:k)
row.names(SVPredict ) <- row.names(iSAPredict)
SVPredict 

TrueValueSV <- data.frame(col1=vector(), col2=vector(), col3=vector())   # makes a blank data frame with three columns to fill with the true values
for(i in  mget(ls(pattern = "TrueSV")) ) {
  col1 <-(i)[1]
  col2 <- (i)[2]
  col3 <- (i)[3]
 TrueValueSV <- rbind(TrueValueSV , cbind(col1, col2, col3))
}
str(TrueValueSV )
TrueValueSV  <- as.data.frame(t(TrueValueSV )) # transpose the matrix
colnames(TrueValueSV ) <- rep(1:k) 
row.names(TrueValueSV ) <- row.names(iSAPredict)
TrueValueSV 

SV_TABLE <- abs(SVPredict -TrueValueSV)
MAE_SV<- mean(colMeans(SV_TABLE))
MAE_SV

##################
# NB K=10
##################

str(x)
myCorpusTwitterTrain <- corpus(x)

summary( myCorpusTwitterTrain )
N <- ndoc(myCorpusTwitterTrain )
# Number of desired splits
k <- 10
set.seed(123)
holdout <- split(sample(1:N), 1:k)
docvars(myCorpusTwitterTrain , "id_numeric") <- 1:ndoc(myCorpusTwitterTrain )

for(i in 1:k){
train <- dfm(corpus_subset(myCorpusTwitterTrain , id_numeric %in% holdout[[i]]), remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https"), ("â"),
("com"), ("ly")),remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE) # Set the training set
train <- dfm_trim(train,min_docfreq= 2, verbose=TRUE) # Trim the dfm
validation <- dfm(corpus_subset(myCorpusTwitterTrain ,  !id_numeric %in% holdout[[i]]), remove = c(stopwords("english"), ("amp"), ("rt") ,("tco"), ("co"), ("u"), ("t"), ("s"), ("ed"), ("https"), ("â"),
("com"), ("ly")),remove_punct = TRUE, remove_numbers=TRUE, tolower = TRUE, remove_symbols=TRUE, remove_twitter = TRUE, 
remove_separators=TRUE, remove_url = TRUE) # Set the validation set
validation  <- dfm_trim(validation , min_docfreq= 2, verbose=TRUE) # Trim the dfm
set.seed(123)
newrf <- textmodel_nb(train, docvars(train, "Sentiment"), distribution = c("multinomial")) # Get your new model (just fit on the train data) and ADD the name of the output (in this case "Sentiment")
validation <- dfm_select(validation, train) # Let's make the features identical by passing training_dfm to dfm_select() as a pattern
newpred <- predict(newrf,validation) # Get the predicitons for the validation set (from the model just fit on the train data)
actual_class <- docvars(validation, "Sentiment") # Add name output (in this case "Sentiment")
predicted_class <- predict(newrf, newdata = validation)
df<- prop.table(table(predicted_class  ))
df.name<-paste0("NBpred",i)
assign(df.name,df)
pf<-prop.table(table(validation@docvars$Sentiment ))
pf.name<-paste0("TrueNB",i)
assign(pf.name,pf)
}

NBPredict <- data.frame(col1=vector(), col2=vector(), col3=vector()) # makes a blank data frame with three columns to fill with the predictions
for(i in  mget(ls(pattern = "NBpred")) ) {
  col1 <-(i)[1]
  col2 <- (i)[2]
  col3 <- (i)[3]
 NBPredict <- rbind(NBPredict , cbind(col1, col2, col3))
}
NBPredict <-as.data.frame(t(NBPredict )) # transpose the matrix
colnames(NBPredict ) <- rep(1:k)
row.names(NBPredict ) <- row.names(iSAPredict)
NBPredict 

TrueValueNB <- data.frame(col1=vector(), col2=vector(), col3=vector())   # makes a blank data frame with three columns to fill with the true values
for(i in  mget(ls(pattern = "TrueNB")) ) {
  col1 <-(i)[1]
  col2 <- (i)[2]
  col3 <- (i)[3]
 TrueValueNB <- rbind(TrueValueNB , cbind(col1, col2, col3))
}
str(TrueValueNB )
TrueValueNB <- as.data.frame(t(TrueValueNB )) # transpose the matrix
colnames(TrueValueNB ) <- rep(1:k) 
row.names(TrueValueNB ) <- row.names(iSAPredict)

NB_TABLE <- abs(NBPredict -TrueValueNB)
MAE_NB<- mean(colMeans(NB_TABLE))
MAE_NB

############ GRAPH

library(reshape2)

gb1 <- as.data.frame(MAE_iSA)
colnames(gb1)[1] <- "MAE iSA"
gb2 <- as.data.frame(MAE_RF)
colnames(gb2)[1] <- "MAE RF"
gb3 <- as.data.frame(MAE_SV)
colnames(gb3)[1] <- "MAE SV"
gb4 <- as.data.frame(MAE_NB )
colnames(gb4)[1] <- "MAE NB"
ac_tot <- cbind(gb1, gb2, gb3, gb4)
ac_tot
str(ac_tot)
df.long_ac_tot<-melt(ac_tot)
str(df.long_ac_tot)

ggplot(df.long_ac_tot, aes(x=variable, y=value)) + 
   geom_boxplot() + xlab("Algorithm") + ylab(label="Value") + 
ggtitle("Different ML K-fold cross-validation (K=10): MAE") + coord_flip()