# Let's beging with an example
library(quanteda)

# 3 random texts in our corpus: the first two are the reference texts and the last one is the virgin text
testText <-c("The quick brown fox named over brown brown", "with the newspaper a boy named in his brown", 
"Text pre-processing: make some fox cleaning over brown")
testText
testCorpus <- corpus(testText)
tok3 <- tokens(testCorpus,  remove_punct = TRUE, remove_numbers=TRUE, remove_symbols = TRUE, split_hyphens = TRUE, remove_separators = TRUE)
tok3 
myDfm <- dfm(tok3)
topfeatures(myDfm , 10)  # 20 top features
myDfm[,1:18]

# let's define the reference scores 
refscores <- c(3, 8, NA)
refscores

# Let's assign the reference scores to your dfm
ws <- textmodel_wordscores(myDfm, refscores)
summary(ws) 

# how do you get such scores for words? Let's redo the analysis step-by-step w/o the textmodel_wordscores function
tFwr <- t(dfm_weight(myDfm[1:2,] , "prop"))
tFwr
Pwr <- tFwr / rowSums(tFwr)
Pwr 
Sw <- Pwr %*% refscores[1:2]
# same score for words!
Sw[,1]
summary(ws)

# Let's predict the virgin text 
predict(ws, newdata = myDfm[c(3), ])

# how do you get such scores for the virgin text? Let's redo the analysis step-by-step w/o the predict function
# note: you have just 3 words in common between virgin and the reference texts: brown, fox, over; 
# each of this 3 words appear once in the virgin text (i.e., the relative frequency of each word is therefore .3333)
# and given that the score for brown=4.143; fox=3; over=3, then the predicted score for the virgin text is:
(4.143*.3333)+(3*.3333) +(3*.3333)

# a different way to get the same outcome:
sw <- coef(ws)
sw
# force_conformance is an internal function of quanteda.textmodels that takes a dfm and a set of features, 
# and makes them match the features listed in the set (remember: you have fewer features in sw than in the original dfm)
data <- quanteda.textmodels:::force_conformance(myDfm, names(sw))
rowSums(dfm_weight(data[3,], "prop") %*% sw)