####################################### # English example with economic policy positions ####################################### rm(list=ls(all=TRUE)) sessionInfo() # identify the English language Sys.setlocale("LC_CTYPE","english") library(austin) getwd() # with setwd() you change the current working directory of the R process setwd("C:/Users/mw/Dropbox/JAPAN JSPS2013 WIAS2012 WIAS2013/Tokyo 2016/New Lectures/Lecture 7 2016/Lab") getwd() # using Jfreq 5.4 (in columns: words , in rows: documents) scores <- read.csv(file="UK data.csv",head=TRUE,strip.white=TRUE, fileEncoding="UTF-8") # Prepare the dataset for WordScores rownames(scores) <- scores$X str(scores) scores$X <- NULL data2 <- wfm(scores, word.margin=2) str(data2) # list of included words words(data2) # list of documents docs(data2) # reference texts: 1992 parties manifestos ref <- getdocs(data2, c(3,1,5)) # reference texts scores: 1992 parties manifestos ws <- classic.wordscores(ref, scores=c(5.35, 8.21, 17.21)) summary(ws) # to inspect the word scores: ws$pi plot(ws$pi) # if you want you can also save the matrix of words weight write.csv(ws$pi, "words.csv") # identify the virgin texts vir <- getdocs (data2, c(2,4,6)) # estimates the scores for the virgin texts scores <- predict(ws, newdata=vir) summary(scores) str(scores) # Save the results write.csv(scores, "results_ws.csv") title <- "Parties positions with 95% Confidence Intervals" dotchart(scores$Rescaled, labels=row.names(scores), col="blue", xlim=c(floor(min(scores$Lower )/10)*5, ceiling(max(scores$Upper)/10)*10), main=title ) for (i in 1:nrow(scores)){ lines(x=c(scores$Lower [i],scores$Upper[i]), y=c(i,i)) } # ordering the parties in a ascending order (otherwise: step <-order(-scores $Rescaled) step <-order(scores $Rescaled) scores[step , ] scores <-scores[step , ] str(scores ) title <- "Parties positions with 95% Confidence Intervals" dotchart(scores$Rescaled, labels=row.names(scores), col="blue", xlim=c(floor(min(scores$Lower )/10)*5, ceiling(max(scores$Upper)/10)*10), main=title ) for (i in 1:nrow(scores)){ lines(x=c(scores$Lower [i],scores$Upper[i]), y=c(i,i)) } # Add the reference scores str(scores) scores <- rbind(scores , c( 5.35, 5.35, 5.35, 5.35, 5.35)) scores row.names(scores )[4]<-"UKLAB92a.txt" scores scores <- rbind(scores , c(8.21, 8.21, 8.21,8.21, 8.21)) scores row.names(scores )[5]<-"UKLIBDEM92a.txt" scores scores <- rbind(scores , c(17.21, 17.21, 17.21,17.21, 17.21)) scores row.names(scores )[6]<-"UKCONS92a.txt" scores # ordering the parties in a ascending order (otherwise: step <-order(-scores $Rescaled) step <-order(scores $Rescaled) scores[step , ] scores <-scores[step , ] str(scores ) title <- "Parties positions with 95% Confidence Intervals" dotchart(scores$Rescaled, labels=row.names(scores), col="blue", xlim=c(floor(min(scores$Lower )/10)*5, ceiling(max(scores$Upper)/10)*10), main=title ) for (i in 1:nrow(scores)){ lines(x=c(scores$Lower [i],scores$Upper[i]), y=c(i,i)) } ################################## # As a possible alternativa (as you like!): # SET REFERENCES ref <- c(3,1,5) vir <- 1:6 # all the texts vir <- vir[-ref] # everything minus the reference texts ref <- getdocs (data2, ref) ws <- classic.wordscores(ref, scores=c(5.35, 8.21, 17.21)) summary(ws) # PREDICT v <- getdocs (data2, vir) scores <- predict(ws,newdata=v) ################################## # ################################# # English example with social policy positions # ################################# rm(list=ls(all=TRUE)) sessionInfo() # identify the English language Sys.setlocale("LC_CTYPE","english") library(austin) getwd() # with setwd() you change the current working directory of the R process setwd("C:/Users/mw/Dropbox/JAPAN JSPS2013 WIAS2012 WIAS2013/Tokyo 2016/New Lectures/Lecture 7 2016/Lab") getwd() # using Jfreq 5.4 (in columns: words , in rows: documents) scores <- read.csv(file="UK data.csv",head=TRUE,strip.white=TRUE, fileEncoding="UTF-8") # Prepare the dataset for WordScores rownames(scores) <- scores$X str(scores) scores$X <- NULL data2 <- wfm(scores, word.margin=2) str(data2) # list of included words words(data2) # list of documents docs(data2) ref <- getdocs(data2, c(3,1,5)) ws <- classic.wordscores(ref, scores=c(6.87, 6.53, 15.34)) summary(ws) # to inspect the word scores: ws$pi vir <- getdocs (data2, c(2,4,6)) scores <- predict(ws, newdata=vir) summary(scores ) str(scores) title <- "Parties positions with 95% Confidence Intervals" dotchart(scores$Rescaled, labels=row.names(scores), col="blue", xlim=c(floor(min(scores$Lower )/10)*5, ceiling(max(scores$Upper)/10)*10), main=title ) for (i in 1:nrow(scores)){ lines(x=c(scores$Lower [i],scores$Upper[i]), y=c(i,i)) } # ordering the parties in a ascending order (otherwise: step <-order(-scores $Rescaled) step <-order(scores $Rescaled) scores[step , ] scores <-scores[step , ] str(scores ) title <- "Parties positions with 95% Confidence Intervals" dotchart(scores$Rescaled, labels=row.names(scores), col="blue", xlim=c(floor(min(scores$Lower )/10)*5, ceiling(max(scores$Upper)/10)*10), main=title ) for (i in 1:nrow(scores)){ lines(x=c(scores$Lower [i],scores$Upper[i]), y=c(i,i)) } # Add the reference scores str(scores) scores <- rbind(scores , c( 6.87, 6.87, 6.87, 6.87, 6.87)) scores row.names(scores )[4]<-"UKLAB92a.txt" scores scores <- rbind(scores , c(6.53, 6.53, 6.53,6.53, 6.53)) scores row.names(scores )[5]<-"UKLIBDEM92a.txt" scores scores <- rbind(scores , c(15.34, 15.34, 15.34,15.34, 15.34)) scores row.names(scores )[6]<-"UKCONS92a.txt" scores # ordering the parties in a ascending order (otherwise: step <-order(-scores $Rescaled) step <-order(scores $Rescaled) scores[step , ] scores <-scores[step , ] str(scores ) title <- "Parties positions with 95% Confidence Intervals" dotchart(scores$Rescaled, labels=row.names(scores), col="blue", xlim=c(floor(min(scores$Lower )/10)*5, ceiling(max(scores$Upper)/10)*10), main=title ) for (i in 1:nrow(scores)){ lines(x=c(scores$Lower [i],scores$Upper[i]), y=c(i,i)) }