library(readtext)
library(quanteda)
library(rtweet)

#########################
## A seach query with emoji
#########################

# for the list of emoji: https://github.com/hadley/emo
library(emo)
x <- emo::ji("smile")
x
smile <- search_tweets(x, n = 200, lang = "en", include_rts = FALSE)
print(smile $text[1:5])

# how to convert the emoji to text? 
library(textclean)
replace_non_ascii(replace_emoji(smile $text[1:5]))

# note the difference!

# here without conversion of emoji to text
myCorpusTwitter<- corpus(smile )
as.character(myCorpusTwitter)[1:2]
tok  <- tokens(myCorpusTwitter, remove_punct = TRUE, remove_numbers=TRUE, remove_symbols = TRUE, split_hyphens = TRUE, 
remove_separators = TRUE, remove_url = TRUE)
tok   <- tokens_remove(tok   , stopwords("english"))
tok   <- tokens_wordstem (tok   , language =("english"))
myDfm <- dfm(tok)
topfeatures(myDfm , 20)  # 20 top word
length(myDfm@Dimnames$features)

# here with conversion of emoji to text
smile2 <- smile
smile2 $text <- replace_non_ascii(replace_emoji(smile2 $text))

myCorpusTwitter2<- corpus(smile2 )
as.character(myCorpusTwitter2)[1:2]
tok  <- tokens(myCorpusTwitter2, remove_punct = TRUE, remove_numbers=TRUE, remove_symbols = TRUE, split_hyphens = TRUE, 
remove_separators = TRUE, remove_url = TRUE)
tok   <- tokens_remove(tok   , stopwords("english"))
tok   <- tokens_wordstem (tok   , language =("english"))
myDfm2 <- dfm(tok)

# of course now we have more features in the new dfm (after conversion)
length(myDfm2@Dimnames$features)
length(myDfm@Dimnames$features)

topfeatures(myDfm , 20)  # 20 top word
topfeatures(myDfm2 , 20)  # 20 top word