################################# # Applying dictionaries to UK Party Manifestoes 2015! ################################# rm(list=ls(all=TRUE)) getwd() setwd("C:/Users/mw/Dropbox (VOICES)/TOPIC MODEL") getwd() library(quanteda) library(readtext) library(gridExtra) library(ggplot2) library(syuzhet) library(plotly) library(manifestoR) mp_setapikey(key.file = NULL, key = "a86bb74f932c68f53d5328578ef9c51d") # Let's focus on the UK 2015 manifestoes cmp <- mp_maindataset() uk<- cmp[ which(cmp$countryname=="United Kingdom" & cmp$date == 201505 ),] print(uk[c("partyname", "party", "edate", "date", "partyabbrev")]) uk$partyname uk$party uk2 <- mp_availability(countryname == "United Kingdom" & date==201505 ) uk2 uk_2 <- mp_corpus(uk2 ) summary(uk_2) quanteda_UK_2 <- corpus(uk_2) summary(quanteda_UK_2 ) summary(head(quanteda_UK_2 )) # Which of the two party manifestoes was more negative? # making the DFM, grouping by party and considering only words negative and positive dfm_uk2015 <- dfm(quanteda_UK_2, remove = stopwords("english"), tolower = TRUE, remove_punct = TRUE, remove_numbers=TRUE, group=c("party"), dictionary = data_dictionary_LSD2015[1:2]) dfm_uk2015 str(dfm_uk2015) dfm_uk2015@Dimnames$docs dfm_uk2015@Dimnames$docs <- uk$partyname dfm_uk2015@Dimnames$docs dfm_uk2015 Dictionary <-convert(dfm_uk2015, to="data.frame") str(Dictionary ) library(plyr) tokens <- as.data.frame(ntoken(quanteda_UK_2, remove_punct = TRUE, remove_numbers=TRUE)) str(tokens) colnames(tokens)[1] <- "tokens" str(tokens) str(quanteda_UK_2) table(quanteda_UK_2$documents$party) tokens_party <- cbind(tokens,quanteda_UK_2$documents$party) str( tokens_party) colnames(tokens_party)[2] <- "party" str( tokens_party) table(tokens_party$party) total <- ddply(tokens_party, ~ party, summarize, num_tokens = sum(tokens)) str(total) Dictionary$tokens <- total$num_tokens str(Dictionary) Dictionary$prop_neg <- Dictionary$negative/Dictionary$tokens Dictionary$prop_pos <- Dictionary$positive/Dictionary$tokens str(Dictionary) str(Dictionary ) Dictionary2 <- Dictionary[,c(1:3)] library(reshape2) str(Dictionary2 ) df.long<-melt(Dictionary2,id.vars=c("document")) str(df.long) p <- ggplot(df.long,aes(document,value,fill=variable))+ geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() + ylab(label="Frequency positive/negative words") + xlab("Party") p str(Dictionary ) Dictionary3 <- Dictionary[,c(1, 5:6)] str(Dictionary3 ) df.long2<-melt(Dictionary3,id.vars=c("document")) str(df.long2) p2 <- ggplot(df.long2,aes(document,value,fill=variable))+ geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() + ylab(label="% positive/negative words") + xlab("Party") p2 grid.arrange(p, p2, ncol=2) ################################################# # What about the role of State in economy? and along other dimensions? ################################################# dictfile <- tempfile() download.file("https://provalisresearch.com/Download/LaverGarry.zip", dictfile, mode = "wb") unzip(dictfile, exdir = (td <- tempdir())) lgdict <- dictionary(file = paste(td, "LaverGarry.cat", sep = "/")) str(lgdict) dfm_uk2015_eco <- dfm(quanteda_UK_2, group=c("party"), remove = stopwords("english"), tolower = TRUE, dictionary = lgdict, remove_punct = TRUE, remove_numbers=TRUE) dfm_uk2015_eco dfm_uk2015_eco@Dimnames$docs <- uk$partyname dfm_uk2015_eco Dictionary <-convert(dfm_uk2015_eco, to="data.frame") str(Dictionary ) colnames(Dictionary ) names(Dictionary )[6] <- "More_State" names(Dictionary )[8] <- "Less_State" colnames(Dictionary ) Dictionary$tokens <- total$num_tokens str(Dictionary) Dictionary$prop_More_State <- Dictionary$More_State/Dictionary$tokens Dictionary$prop_Less_State <- Dictionary$Less_State/Dictionary$tokens Dictionary$eco_position <- Dictionary$prop_Less_State-Dictionary$prop_More_State str(Dictionary) Dictionary2 <- Dictionary[,c(1,6,8)] str(Dictionary2 ) library(reshape2) str(Dictionary2 ) df.long<-melt(Dictionary2,id.vars=c("document")) str(df.long) p <- ggplot(df.long,aes(document,value,fill=variable))+ geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() + ylab(label="Frequency More/Less State words") + xlab("Party") p str(Dictionary ) Dictionary3 <- Dictionary[,c(1, 23:24)] str(Dictionary3 ) df.long2<-melt(Dictionary3,id.vars=c("document")) str(df.long2) p2 <- ggplot(df.long2,aes(document,value,fill=variable))+ geom_bar(position="dodge",stat="identity") + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() + ylab(label="% More/Less State words") + xlab("Party") p2 p3 <- ggplot(Dictionary, aes(x=reorder (document, eco_position), y=eco_position, group=1)) + geom_point(aes()) + theme(axis.text.x = element_text(color="#993333", size=10, angle=90)) + coord_flip() + ylab(label="Economic position (left-right)") + xlab("Party") grid.arrange(p, p2, ncol=2) grid.arrange(p, p2, p3, ncol=2) library(gridExtra) grid.arrange(p, p2, p3, ncol=2, nrow =2) grid.arrange(p3, arrangeGrob(p, p2), ncol = 2)