rm(list=ls(all=TRUE))
getwd()
setwd("YOUR WORKING DIRECTORY")
getwd()
library(manifestoR)
library(Hmisc)
library(psych)
library(PerformanceAnalytics)
library(corrplot)
library(corrgram)
library(latticeExtra)

mp_setapikey(key.file = NULL, key = "YOUR KEY NUMBER")

# open the entire CMP dataset
mpds <- mp_maindataset()
print(head(names(mpds)))
print(mpds[c("partyname", "countryname", "edate")])
fix(mpds)

# select the CMP dataset you are interested about: some examples
# let's focus on parties with a rile>60 (i.e., extreme right parties)
right <- mpds[ which(mpds$rile>60),]
print(right [c("partyname", "countryname", "edate",  "rile")])

# let's focus on a specific party belonging to our "right" object
greece <- right[ which(right$partyname=="Communist Party of Greece"),]
print(greece [c("partyname", "countryname", "edate")])

# let's focus on Italy
italy <- mpds[ which(mpds$countryname=="Italy"),]
print(head(italy [c("partyname", "countryname", "edate",  "rile")]))

# let's focus on Italy since 1960
italy60 <- mpds[ which(mpds$countryname=="Italy" & mpds$date>196001),]
print(head(italy60 [c("partyname", "countryname", "edate")]))

# estimate the left-right positions of parties according to different methods

# let's focus on the Italian CMP dataset
# rile
attach(italy)
italy$right <- per104+per201+per203+per305+per401+per402+per407+per414+per505+per601+per603+per605+per606
italy$left <- per103+per105+per106+per107+per202+per403+per404+per406+per412+per413+per504+per506+per701
italy$RILE <- italy$right-italy$left
colnames(italy)
# same results you have in the CMP dataset under rile!
rcorr(italy$RILE, italy$rile)

# ratio scale
italy$RATIO <- (italy$right-italy$left)/(italy$right+italy$left)
summary(italy$RATIO)
summary(italy$RILE)
# more ideological range covered using RATIO (as expected)
rcorr(italy$RILE, italy$RATIO)

# logit scale
# call it directly from the CMP
italy$logit <- mp_scale(italy, scalingfun = logit_rile)
# estimate it directly
italy$uncodified_sentences <- total*(italy$peruncod/100)
italy$codified_sentences <- italy$total-italy$uncodified_sentences
italy$offset_new <- 100*(0.5/italy$codified_sentences)
italy$LOGIT <- log(italy$right+italy$offset_new)-log(italy$left+italy$offset_new)
# same result!
rcorr(italy$LOGIT , italy$logit)

attach(italy)
set_lr <- cbind(RILE, RATIO, LOGIT)
rcorr(set_lr, type="pearson")
corr.test(set_lr)
pairs.panels(set_lr) 
chart.Correlation(set_lr)
x <- cor(set_lr)
corrplot(x, type="upper", order="hclust")

#  you can also draw a scatter with a fit lines and party names
plot(italy$RILE, italy$RATIO , main="Scatterplot Example", 
  	xlab="RILE", ylab="RATIO  ", pch=19)
text(italy$RILE, italy$RATIO , labels = italy$partyname, pos = 4,  col = "royalblue" , cex = 0.8)
abline(lm(italy$RATIO~italy$RILE ), col="red") # regression line (y~x) 

# Vanilla method (using all manifestoes included in the CMP dataset, as suggested by Gabel and Huber)
mpds$vanilla <- vanilla(mpds)
# to have as positive scores the party along the right of the scale, use the "invert=TRUE" option
mpds$vanilla_invert <- vanilla(mpds, invert=TRUE)
colnames(mpds)
corr.test(mpds[175:176])

# Estimating the vanilla method directly
attach(mpds)
colnames(mpds)
X  <- subset(mpds, select = per101:per706)
str(X)
fit <- fa(X,1,scores="regression")
mpds$VANILLA <-  fit$scores
colnames(mpds)
corr.test(mpds[175:177])
print(mpds[175:177])

italy2 <- mpds[ which(mpds$countryname=="Italy"),]
italy$VANILLA <- italy2$vanilla_invert
colnames(italy)

print(italy [c("partyname", "edate", "RILE", "RATIO", "LOGIT", "VANILLA")])
attach(italy)
set_lr <- cbind(RILE, RATIO, LOGIT, VANILLA)
rcorr(set_lr, type="pearson")
chart.Correlation(set_lr)
x <- cor(set_lr)
corrplot(x, type="upper", order="hclust")

# Add fit lines and party names
plot(italy$RILE, italy$VANILLA, main="Scatterplot Example", 
  	xlab="RILE", ylab="VANILLA", pch=19)
text(italy$RILE, italy$VANILLA, labels = italy$partyname, pos = 4,  col = "royalblue" , cex = 0.8)
abline(lm(italy$VANILLA~italy$RILE), col="red") # regression line (y~x) 

##########################################
################ some more graphs: US case

mpds$VANILLA <- vanilla(mpds, invert=TRUE)
colnames(mpds)

usa <- mpds[ which(mpds$countryname=="United States"),]
print(usa [c("partyname", "countryname", "edate",  "rile")])

# rile
attach(usa)
usa$right <- per104+per201+per203+per305+per401+per402+per407+per414+per505+per601+per603+per605+per606
usa$left <- per103+per105+per106+per107+per202+per403+per404+per406+per412+per413+per504+per506+per701
usa$RILE <- usa$right-usa$left

# ratio scale
usa$RATIO <- (usa$right-usa$left)/(usa$right+usa$left)

# logit scale
# call it directly from the CMP
usa$LOGIT <- mp_scale(usa, scalingfun = logit_rile)

attach(usa)
set_lr <- cbind(RILE, RATIO, LOGIT, VANILLA)
chart.Correlation(set_lr)

####################################################################
# adding the overall mean al grafico
####################################################################
attach(usa)

xyplot(rile~date, groups=partyname, data=usa, ty=c("l", "p"),
 panel = function(x, y, ...) {
           panel.xyplot(x, y, ...)
           panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
panel.abline(h = 0, col = "black")
       }
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", main="Evolution of USA Left-Right Scale (black line: mean)",  auto.key=list(space="right", columns=1, 
                       title="Parties", cex.title=2))

####################################################################
# including weighted mean weighted for pervote
####################################################################

panel.wm <- function(x, y, col.wm="black", ...) {
  xs <- if(is.factor(x)) {
    factor(c(levels(x) , rev(levels(x))), levels=levels(x))
  } else {
    xx <- sort(unique(x))
    c(xx)
  }
  wm <- unlist(lapply(split(usa,usa$date), function(z) weighted.mean(z$rile,z$pervote)))
  panel.lines(xs, wm, col=col.wm, lty=1, lwd=4)
}

xyplot(rile~date, groups=partyname, data=usa, ty=c("l", "p"),
       panel = function(x, y, ...) {
         panel.xyplot(x, y, ...)
         panel.wm(x,y)
panel.abline(h = 0, col = "black")            }
       ,xlab="Elections (DATE)", 
       ylab="Left-Right (RILE)", main="Evolution of USA Left-Right Scale (black line: weighted mean)",  
auto.key=list(space="right", columns=1, title="Parties", cex.title=2))

####################################################################
# focusing on a subset of parties
####################################################################

c <- subset(usa, usa$partyname=="Democratic Party" | usa$partyname=="Republican Party")
table(c$partyname)
str(c)

xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", main="Evolution of USA Left-Right Scale",  auto.key=list(space="right", columns=1, 
                       title="Parties", cex.title=2))

xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", grid = TRUE, main="Evolution of USA Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )

# png(file = "C:/Users/mw/Desktop/USA.png", bg = "transparent")
panel.wm <- function(x, y, col.wm="black", ...) {
  xs <- if(is.factor(x)) {
    factor(c(levels(x) , rev(levels(x))), levels=levels(x))
  } else {
    xx <- sort(unique(x))
    c(xx)
  }
  wm <- unlist(lapply(split(c,c$date), function(z) weighted.mean(z$rile,z$pervote)))
  panel.lines(xs, wm, col=col.wm, lty=1, lwd=4)
}

xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"),
       panel = function(x, y, ...) {
         panel.xyplot(x, y, ...)
         panel.wm(x,y)
            }
       ,xlab="Elections (DATE)", 
       ylab="Left-Right (RILE)", grid = TRUE,  main="Evolution of USA Left-Right Scale (black line: weighted mean)",  
auto.key=list(space="top", lines=TRUE,points=FALSE, columns=2, title="Parties", cex.title=2) , lwd=3 )

xyplot(rile~date | partyname, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", grid = TRUE, main="Evolution of USA Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3)

xyplot(rile~date | partyname, groups=partyname, data=c, ty=c("smooth", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right (RILE)", grid = TRUE, main="Evolution of USA Left-Right Scale (smooth line)",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3)

#################
# combine graphs
#################

rile_p <- xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"))
vanilla_p <- xyplot(VANILLA~date, groups=partyname, data=c, ty=c("l", "p"))
c(RILE = rile_p, VANILLA = vanilla_p)

rile_p <- xyplot(rile~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of USA Left-Right Scale: Cons vs Lab",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )
vanilla_p <-xyplot(VANILLA~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of UK Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )
c(RILE = rile_p, VANILLA = vanilla_p)

logit_p <-xyplot(LOGIT~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of UK Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )

ratio_p <-xyplot(RATIO~date, groups=partyname, data=c, ty=c("l", "p"),
     ,xlab="Elections (DATE)", 
ylab="Left-Right", grid = TRUE, main="Evolution of UK Left-Right Scale",  auto.key=list(space="right",lines=TRUE,points=FALSE,
columns=1, title="Parties", cex.title=2) , lwd=3 )

c(RILE = rile_p, VANILLA = vanilla_p, LOGIT = logit_p, RATIO = ratio_p)

#################
######## doing bootstrapping
#################

# select Albania
data <- subset(mpds, countryname == "Albania")
print(head(data [c("partyname", "countryname", "edate",  "rile")]))

# bootstrap the RILE
p <- mp_bootstrap(data, fun = "rile", statistics = list(var, 0.025, 0.975))
str(p)

colnames(p)[1] <- "RILE"
colnames(p)[3] <- "lower"
colnames(p)[4] <- "upper"
colnames(p)

data$RILE <- p$RILE
data$lower <- p$lower
data$upper <- p$upper
colnames(data)

rcorr(data$rile, data$RILE)

# ordering the parties in a ascending order (otherwise: step <-order(-data $rile)
step <-order(data $RILE)
data <-data[step , ]

title <- "Parties positions with bootstrapped 95% Confidence Intervals"
dotchart(data$rile, labels=data$partyabbrev, col="blue", xlim=c(floor(min(data$lower)), ceiling(max(data$upper))), main=title )
for (i in 1:nrow(data)){
    lines(x=c(data$lower [i],data$upper[i]), y=c(i,i))
}

data$combine <- paste(data$partyabbrev, data$edate, sep = " ")
data$combine 

title <- "Parties positions with bootstrapped 95% Confidence Intervals"
dotchart(data$rile, labels=data$combine , col="blue", xlim=c(floor(min(data$lower)), ceiling(max(data$upper))), main=title )
for (i in 1:nrow(data)){
    lines(x=c(data$lower [i],data$upper[i]), y=c(i,i))
}

#################
# Writing custom scaling functions
#################

italy <- mpds[ which(mpds$countryname=="Italy"),]
print(head(italy [c("partyname", "countryname", "edate",  "rile")]))

# write your scaling function
ratio_scale <- function(data) {
((data$per104+data$per201+data$per203+data$per305+data$per401+data$per402+data$per407+data$per414+data$per505
+data$per601+data$per603+data$per605+data$per606)-(data$per103+data$per105+data$per106+data$per107+data$per202+data$per403
+data$per404+data$per406+data$per412+data$per413+data$per504+data$per506+data$per701))/(data$per104+data$per201+data$per203+data$per305+data$per401+data$per402+data$per407+data$per414+data$per505
+data$per601+data$per603+data$per605+data$per606+data$per103+data$per105+data$per106+data$per107+data$per202+data$per403
+data$per404+data$per406+data$per412+data$per413+data$per504+data$per506+data$per701)
}

# apply your scaling function to data now
italy$ratio_scale <- mp_scale(italy , scalingfun = ratio_scale)
colnames(italy)
print(head(italy [c("partyname", "edate", "rile", "ratio_scale")]))

# using bootstrap procedure with custom scaling functions
custom_scale <- function(data) {
data$per402 - data$per401
}

italy$custom_scale <- mp_scale(italy , scalingfun = custom_scale)
colnames(italy)
print(head(italy [c("partyname", "edate", "rile", "ratio_scale", "custom_scale")]))
p2 <- mp_bootstrap(italy, fun = "custom_scale", statistics = list(var, 0.025, 0.975))
str(p2)