rm(list=ls(all=TRUE))
getwd()
setwd("C:/Users/mw/Dropbox (VOICES)/TOPIC MODEL")
getwd()
library(manifestoR)
library(Hmisc)
library(psych)
library(PerformanceAnalytics)
library(corrplot)
library(corrgram)
library(latticeExtra)
sessionInfo()

mp_setapikey(key.file = NULL, key = "a86bb74f932c68f53d5328578ef9c51d")
# all the versions of the CMP dataset
mp_coreversions()

# open the entire CMP dataset (by default it opens the most recent one; otherwise
# you could have written something as: mpds <- mp_maindataset(version = "MPDS2017b")
mpds <- mp_maindataset()
print(head(names(mpds)))
print(mpds[c("partyname", "countryname", "edate")])
fix(mpds)

# select the CMP dataset you are interested about: some examples
# let's focus on parties with a rile>60 (i.e., extreme right parties)
right <- mpds[ which(mpds$rile>60),]
print(right [c("partyname", "countryname", "edate",  "rile")])

# let's focus on a specific party belonging to our "right" object
greece <- right[ which(right$partyname=="Communist Party of Greece"),]
print(greece [c("partyname", "countryname", "edate")])

# let's focus on Italy
italy <- mpds[ which(mpds$countryname=="Italy"),]
print(head(italy [c("partyname", "countryname", "edate",  "rile")]))

# let's focus on Italy since 1960
italy60 <- mpds[ which(mpds$countryname=="Italy" & mpds$date>196001),]
print(head(italy60 [c("partyname", "countryname", "edate")]))

# estimate the left-right positions of parties according to different methods

# let's focus on the Italian CMP dataset
# rile
attach(italy)
italy$right <- per104+per201+per203+per305+per401+per402+per407+per414+per505+per601+per603+per605+per606
italy$left <- per103+per105+per106+per107+per202+per403+per404+per406+per412+per413+per504+per506+per701
italy$RILE <- italy$right-italy$left
colnames(italy)
# same results you have in the CMP dataset under rile!
cor(italy$RILE, italy$rile)

# ratio scale
italy$RATIO <- (italy$right-italy$left)/(italy$right+italy$left)
summary(italy$RATIO)
# more ideological range covered using RATIO (as expected)
summary(italy$RILE)
# correlation between RILE and RATIO for the Italian case
cor(italy$RILE, italy$RATIO)

# logit scale
# calling it directly from the ManifestoR
italy$LOGIT <- mp_scale(italy, scalingfun = logit_rile)

attach(italy)
set_lr <- cbind(RILE, RATIO, LOGIT)
rcorr(set_lr, type="pearson")
pairs.panels(set_lr) 
chart.Correlation(set_lr)

#  you can also draw a scatter with a fit lines and party names
plot(italy$RILE, italy$RATIO , main="Scatterplot Example", 
  	xlab="RILE", ylab="RATIO  ", pch=19)
text(italy$RILE, italy$RATIO , labels = italy$partyname, pos = 4,  col = "royalblue" , cex = 0.8)
abline(lm(italy$RATIO~italy$RILE ), col="red") # regression line (y~x) 

# Calling the Vanilla method directly from ManifestoR 
# (using all manifestoes included in the CMP dataset, as suggested by Gabel and Huber)
mpds$vanilla <- vanilla(mpds)
# to have as positive scores the party along the right of the scale, use the "invert=TRUE" option
mpds$vanilla_invert <- vanilla(mpds, invert=TRUE)
colnames(mpds)
corr.test(mpds[175:176])

# Estimating the vanilla method directly via the Factor Analysis
attach(mpds)
colnames(mpds)
X  <- subset(mpds, select = per101:per706)
str(X)
fit <- fa(X,1,scores="regression")
mpds$VANILLA <-  fit$scores
colnames(mpds)
corr.test(mpds[175:177])
print(mpds[175:177])

italy2 <- mpds[ which(mpds$countryname=="Italy"),]
italy$VANILLA <- italy2$vanilla_invert
colnames(italy)

print(italy [c("partyname", "edate", "RILE", "RATIO", "LOGIT", "VANILLA")])
attach(italy)
set_lr <- cbind(RILE, RATIO, LOGIT, VANILLA)
rcorr(set_lr, type="pearson")
chart.Correlation(set_lr)

# Add fit lines and party names
plot(italy$RILE, italy$VANILLA, main="Scatterplot Example", 
  	xlab="RILE", ylab="VANILLA", pch=19)
text(italy$RILE, italy$VANILLA, labels = italy$partyname, pos = 4,  col = "royalblue" , cex = 0.8)
abline(lm(italy$VANILLA~italy$RILE), col="red") # regression line (y~x) 

##########################################
################ some more graphs: the US case
##########################################

mpds$VANILLA <- vanilla(mpds, invert=TRUE)
colnames(mpds)

usa <- mpds[ which(mpds$countryname=="United States"),]
print(usa [c("partyname", "countryname", "edate",  "rile")])

# rile
attach(usa)
usa$right <- per104+per201+per203+per305+per401+per402+per407+per414+per505+per601+per603+per605+per606
usa$left <- per103+per105+per106+per107+per202+per403+per404+per406+per412+per413+per504+per506+per701
usa$RILE <- usa$right-usa$left

# ratio scale
usa$RATIO <- (usa$right-usa$left)/(usa$right+usa$left)

# logit scale
usa$LOGIT <- mp_scale(usa, scalingfun = logit_rile)

attach(usa)
set_lr <- cbind(RILE, RATIO, LOGIT, VANILLA)
chart.Correlation(set_lr)

####################################################################
# adding the mean weighted by votes (pervote) in the graph
####################################################################

panel.wm <- function(x, y, col.wm="black", ...) {
  xs <- if(is.factor(x)) {
    factor(c(levels(x) , rev(levels(x))), levels=levels(x))
  } else {
    xx <- sort(unique(x))
    c(xx)
  }
  wm <- unlist(lapply(split(usa,usa$date), function(z) weighted.mean(z$rile,z$pervote)))
  panel.lines(xs, wm, col=col.wm, lty=1, lwd=4)
}

xyplot(rile~date, groups=partyname, data=usa, ty=c("l", "p"),
       panel = function(x, y, ...) {
         panel.xyplot(x, y, ...)
         panel.wm(x,y)
panel.abline(h = 0, col = "black")            }
       ,xlab="Elections (DATE)", 
       ylab="Left-Right (RILE)", main="Evolution of USA Left-Right Scale (black line: weighted mean)",  
auto.key=list(space="right", columns=1, title="Parties", cex.title=2))

####################################################################
# focusing on a subset of parties
####################################################################

c <- subset(usa, usa$partyname=="Democratic Party" | usa$partyname=="Republican Party")
table(c$partyname)
str(c)

xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", grid = TRUE, main="Evolution of USA Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )

xyplot(rile~date | partyname, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", grid = TRUE, main="Evolution of USA Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3)

xyplot(rile~date | partyname, groups=partyname, data=c, ty=c("smooth", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", grid = TRUE, main="Evolution of USA Left-Right Scale (smooth line)",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3)

#################
# combine graphs
#################

# in separated panels
rile_p <- xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"))
vanilla_p <- xyplot(VANILLA~date, groups=partyname, data=c, ty=c("l", "p"))
c(RILE = rile_p, VANILLA = vanilla_p)

rile_p <- xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of USA Left-Right Scale: Cons vs Lab",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )
vanilla_p <-xyplot(VANILLA~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of UK Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )
c(RILE = rile_p, VANILLA = vanilla_p)

logit_p <-xyplot(LOGIT~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of UK Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )

ratio_p <-xyplot(RATIO~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of UK Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )

c(RILE = rile_p, VANILLA = vanilla_p, LOGIT = logit_p, RATIO = ratio_p)

#################
# Writing custom scaling functions
#################

italy <- mpds[ which(mpds$countryname=="Italy"),]
print(head(italy [c("partyname", "countryname", "edate",  "rile")]))

# write your scaling function
ratio_scale <- function(data) {
((data$per104+data$per201+data$per203+data$per305+data$per401+data$per402+data$per407+data$per414+data$per505
+data$per601+data$per603+data$per605+data$per606)-(data$per103+data$per105+data$per106+data$per107+data$per202+data$per403
+data$per404+data$per406+data$per412+data$per413+data$per504+data$per506+data$per701))/(data$per104+data$per201+data$per203+data$per305+data$per401+data$per402+data$per407+data$per414+data$per505
+data$per601+data$per603+data$per605+data$per606+data$per103+data$per105+data$per106+data$per107+data$per202+data$per403
+data$per404+data$per406+data$per412+data$per413+data$per504+data$per506+data$per701)
}

# apply your scaling function to data now
italy$ratio_scale <- mp_scale(italy , scalingfun = ratio_scale)
colnames(italy)
print(head(italy [c("partyname", "edate", "rile", "ratio_scale")]))

# write another function, such estimating the position about European Union (per108: European integration positive; 
# per110: European integration negative)

# write your scaling function
european_scale <- function(data) {
((data$per108)-(data$per110))
}

# apply your scaling function to data now
italy$european_scale <- mp_scale(italy , scalingfun = european_scale)
colnames(italy)
print(head(italy [c("partyname", "edate", "rile", "ratio_scale", "european_scale")]))

# correlation between rile and european_scale
cor(italy$rile, italy$european_scale)

plot(italy$rile, italy$european_scale, main="Scatterplot Example", 
  	xlab="rile", ylab="european_scale", pch=19)
text(italy$rile, italy$european_scale, labels = italy$partyname, pos = 4,  col = "royalblue" , cex = 0.8)
abline(lm(italy$european_scale~italy$rile), col="red") # regression line (y~x)