#################################
# Applying dictionaries to UK Party Manifestoes 2015!
#################################

rm(list=ls(all=TRUE))
getwd()
setwd("C:/Users/mw/Dropbox (VOICES)/TOPIC MODEL")
getwd()
library(quanteda)
library(readtext)
library(gridExtra)
library(ggplot2)
library(syuzhet)
library(plotly)

library(manifestoR)
mp_setapikey(key.file = NULL, key = "a86bb74f932c68f53d5328578ef9c51d")

# Let's focus on the UK 2015 manifestoes 
cmp <- mp_maindataset()
uk<- cmp[ which(cmp$countryname=="United Kingdom" & cmp$date == 201505 ),]
print(uk[c("partyname", "party", "edate", "date", "partyabbrev")])
uk$partyname 
uk$party

uk2 <- mp_availability(countryname == "United Kingdom" & date==201505 )
uk2
 
uk_2 <- mp_corpus(uk2 )
summary(uk_2)

quanteda_UK_2 <- corpus(uk_2)
summary(quanteda_UK_2 )
summary(head(quanteda_UK_2 ))

# Which of the two party manifestoes was more negative?
# making the DFM, grouping by party and considering only words negative and positive
dfm_uk2015 <- dfm(quanteda_UK_2, remove = stopwords("english"), tolower = TRUE, 
                remove_punct = TRUE, remove_numbers=TRUE, group=c("party"), dictionary = data_dictionary_LSD2015[1:2])

dfm_uk2015
str(dfm_uk2015)
dfm_uk2015@Dimnames$docs
dfm_uk2015@Dimnames$docs <- uk$partyname 
dfm_uk2015@Dimnames$docs
dfm_uk2015

Dictionary <-convert(dfm_uk2015, to="data.frame")
str(Dictionary )

library(plyr)
tokens <- as.data.frame(ntoken(quanteda_UK_2, remove_punct = TRUE,  remove_numbers=TRUE))
str(tokens)
colnames(tokens)[1] <- "tokens"
str(tokens)

str(quanteda_UK_2)
table(quanteda_UK_2$documents$party)
tokens_party <- cbind(tokens,quanteda_UK_2$documents$party)
str( tokens_party)
colnames(tokens_party)[2] <- "party"
str( tokens_party)
table(tokens_party$party)
total <- ddply(tokens_party, ~ party, summarize, num_tokens = sum(tokens)) 
str(total)
Dictionary$tokens <- total$num_tokens
str(Dictionary)

Dictionary$prop_neg <- Dictionary$negative/Dictionary$tokens
Dictionary$prop_pos <- Dictionary$positive/Dictionary$tokens
str(Dictionary)

str(Dictionary )
Dictionary2 <- Dictionary[,c(1:3)]
library(reshape2)
str(Dictionary2 )
df.long<-melt(Dictionary2,id.vars=c("document"))
str(df.long)

p <- ggplot(df.long,aes(document,value,fill=variable))+
 geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() +  
ylab(label="Frequency positive/negative words") +  xlab("Party") 
p

str(Dictionary )
Dictionary3 <- Dictionary[,c(1, 5:6)]
str(Dictionary3 )
df.long2<-melt(Dictionary3,id.vars=c("document"))
str(df.long2)

p2 <- ggplot(df.long2,aes(document,value,fill=variable))+
 geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() +  
ylab(label="% positive/negative words") +  xlab("Party") 
p2

grid.arrange(p, p2, ncol=2)

#################################################
# What about the role of State in economy? and along other dimensions?
#################################################

dictfile <- tempfile()
download.file("https://provalisresearch.com/Download/LaverGarry.zip", dictfile, mode = "wb")
unzip(dictfile, exdir = (td <- tempdir()))
lgdict <- dictionary(file = paste(td, "LaverGarry.cat", sep = "/"))
str(lgdict)

dfm_uk2015_eco <- dfm(quanteda_UK_2, group=c("party"), remove = stopwords("english"), tolower = TRUE, 
dictionary = lgdict, remove_punct = TRUE, remove_numbers=TRUE)
dfm_uk2015_eco
dfm_uk2015_eco@Dimnames$docs <- uk$partyname 
dfm_uk2015_eco

Dictionary <-convert(dfm_uk2015_eco, to="data.frame")
str(Dictionary )
colnames(Dictionary )
names(Dictionary )[6] <- "More_State"
names(Dictionary )[8] <- "Less_State"
colnames(Dictionary )
Dictionary$tokens <- total$num_tokens
str(Dictionary)

Dictionary$prop_More_State <- Dictionary$More_State/Dictionary$tokens
Dictionary$prop_Less_State <- Dictionary$Less_State/Dictionary$tokens
Dictionary$eco_position <- Dictionary$prop_Less_State-Dictionary$prop_More_State
str(Dictionary)

Dictionary2 <- Dictionary[,c(1,6,8)]
str(Dictionary2 )
library(reshape2)
str(Dictionary2 )
df.long<-melt(Dictionary2,id.vars=c("document"))
str(df.long)

p <- ggplot(df.long,aes(document,value,fill=variable))+
 geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() +  
ylab(label="Frequency More/Less State words") +  xlab("Party") 
p

str(Dictionary )
Dictionary3 <- Dictionary[,c(1, 23:24)]
str(Dictionary3 )
df.long2<-melt(Dictionary3,id.vars=c("document"))
str(df.long2)

p2 <- ggplot(df.long2,aes(document,value,fill=variable))+
 geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() +  
ylab(label="% More/Less State words") +  xlab("Party") 
p2

p3 <- ggplot(Dictionary, aes(x=reorder (document, eco_position), y=eco_position, group=1)) +
    geom_point(aes()) + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() +
ylab(label="Economic position (left-right)") +  xlab("Party") 

grid.arrange(p, p2, ncol=2)
grid.arrange(p, p2, p3, ncol=2)

library(gridExtra)
grid.arrange(p, p2, p3, ncol=2, nrow =2)
grid.arrange(p3, arrangeGrob(p, p2), ncol = 2)