rm(list=ls(all=TRUE))
setwd("C:/Users/luigi/Dropbox/TOPIC MODEL")
getwd()
library(rtweet)
library(readtext)
library(quanteda)
library(ggplot2)
library(ggmap)
library(httpuv)
library(dplyr)
library(maps)
library(leaflet)
library(stringr)
library(htmlwidgets)
library(knitr)

#########################################################################################
# STREAMING data
#########################################################################################

# with stream_tweets (so called Streaming API) you connect to the “stream” of tweets as they are being published
# through that you capture the tweets that contain one or more of the keywords given in the track argument

# There are four possible methods. 
# Note HOWEVER the following issue:
# with the CRAN version of the rtweet package you could have some issues with the first 3 methods (an issue related to parsing data).
# One solution here would be to use the development version of rweet.
# You can download it via "devtools::install_github("mkearney/rtweet")"
# Given that we will use the Streaming API today for getting the geo coordinates of tweets, and that the CRAN version
# has no problem with that, we will stick to this version 

# (1) The default, q = "", returns a small random sample of all publicly available Twitter statuses 
# (1% random sample of all the tweets posted in that specific moment) 

# dt <- stream_tweets("", timeout = 10)
# print(dt$text[1:10])

# (2) To filter by keyword, provide a comma separated character string with the desired phrase(s) and keyword(s)
# in this case it returns ALL the tweets, UNLESS that number of tweets is higher than 1% of *all* tweets 
# in a given moment

# dt2 <- stream_tweets("trump", timeout = 10)
# print(dt2$text[1:10])

# (3) Track users by providing a comma separated list of user IDs or screen names (up to 5000 user_ids)

# dt <- stream_tweets("CNN", timeout = 10)
# print(dt$text[1:10])

# (4) Use four latitude/longitude bounding box points to stream by geo location. 

# What twitter will allow is for searches by geocode, and to achieve this, twitter will use the info under the bbox_coords 
# The info about the geocode must be provided via a vector of length 4, e.g., c(-125, 26, -65, 49).
# how to recover that? either you use your google map API...or check here:
# http://boundingbox.klokantech.com/ and select "csv"

# api <- "YOUR GOOGLE MAP API"

# in my case I have saved it in a txt file
myText <- readtext("Google API.txt", encoding = "UTF-8")
api <- myText$text

lookup_coords("london", apikey=api)
lookup_coords("brazil", apikey=api)

# I'd like to stream tweets from US (you do not need a GOOGLE MAP KEY to have these info - just for the US!)
lookup_coords("USA")

## Use the coordinates you get via lookup_coords
usa <- stream_tweets(
  c(-124.84897,   24.39631,  -66.88544,   49.38436),
  timeout = 10
)

print(usa$text[1:10])

# Or alternatively:
usa <- stream_tweets(lookup_coords("USA", apikey=api),timeout = 10)

print(usa$text[1:10])

## create lat/lng variables using all available tweet and profile geo-location data
rtll <- lat_lng(usa)
## plot state boundaries
par(mar = c(0, 0, 0, 0))
map("state", lwd = .25)
## plot lat and lng points onto state map
with(rtll, points(lng, lat, pch = 20, cex = 5, col = rgb(0, .3, .7, .75)))

# An alternative plot using ggplot
map.data <- map_data("state")
str(map.data)
points <- data.frame(x = rtll$lng, y =  rtll$lat )

ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "white",   
color = "grey20", size = 0.25)+ expand_limits(x = map.data$long, y = map.data$lat) + 
    theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), 
        axis.title = element_blank(), plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) + geom_point(data = points, 
    aes(x = x, y = y), size = 3, alpha = 1/3, color = "darkblue")

# But we can do even more...much more!
# Let's open a dataframe including the results of the following geographical query about Italy:
# italy <- stream_tweets(lookup_coords("Italy", apikey=api),timeout = 360)
italy <- readRDS("italy.rds")
# how many tweets? 177
nrow(italy)

# let's search for this Twitter user called Curini:
italy$screen_name=="Curini"
curini <- italy[ which(italy$screen_name=="Curini"), ]
print(curini $text[1:2])
# What am I using to tweet?
print(curini$source)

# let's see some info about my location
print(curini $location) 
# This location refers to what you have decided to write when you created your account.
# basically you can write whatever you want (or leave the space empty)
print(curini $place_name)
print(curini $place_full_name)
print(curini $place_type)
print(curini $country)
print(curini $country_code)
print(curini $bbox_coords) # a polygon!  
print(curini $geo_coords) # not available!

# Let's check the entire dataset
print(italy$location) 
print(italy$place_name) # I have info for all the 177 tweets

# Let’s sort by count and just plot the top 4 locations. To do this you use top_n(). 
# Note that in this case you are grouping your data by user. 

count <- count(italy, place_name, sort = TRUE)
str(count)
# let's drop the NA
count <-count[-c(1), ]
str(count)
count <- mutate(count, place_name= reorder(place_name, n))
count <- top_n(count, 4)
ggplot(count, aes(x = place_name, y = n)) +
  geom_col() +
  coord_flip() +
      labs(x = "Count",
      y = "Place",
      title = "Where Twitter users are from - unique place_name ")

print(italy$place_full_name)
print(italy$place_type)
print(italy$country)
print(italy$country_code)
table(italy$country_code)

# you have some info for geo_coords (latitude and longitude). For example for tweet 139 and 152
print(italy$geo_coords) 
print(italy$geo_coords[139])
print(italy$geo_coords[152])
# let's see their source: Instagram as expected! 
print(italy$source[139])
print(italy$source[152])
print(italy$bbox_coords)  # info about all 177 tweets

### note that you can also run a search_tweets with geocode
## search for 100 tweets sent from ITALY discussing about greenpass
rt <- search_tweets( "greenpass", n = 200, geocode = lookup_coords("Italy", apikey=api))
nrow(rt)

# in this case however rtweet will not just look for data with bbox_coords, but it will do the following:
# first check if the tweet is geocoded, and if not, will check whether a place can be extrapolated from the user's profile information. 
# So you could, for example, search for tweets from the Barcelona area and twitter will deliver a lot of tweets that aren't geocoded 
# because the users have "Barcelona" in their profile.

print(rt$location) 
print(rt$place_name)
print(rt$bbox_coords[184])
print(rt$country)

## create lat/lng variables using all available tweet and profile geo-location data
rtll <- lat_lng(italy)
## plot them within Italian map
par(mar = c(0, 0, 0, 0))
map("world", "Italy", lwd = .25)
## plot lat and lng points onto state map
with(rtll, points(lng, lat, pch = 20, cex = 5, col = rgb(0, .3, .7, .75)))

# An alternative plot using ggplot
map.data <- map_data("world", "Italy")
str(map.data)
points <- data.frame(x = rtll$lng, y =  rtll$lat )

ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "white",   
color = "grey20", size = 0.25)+ expand_limits(x = map.data$long, y = map.data$lat) + 
    theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), 
        axis.title = element_blank(), plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) + geom_point(data = points, 
    aes(x = x, y = y), size = 3, alpha = 1/3, color = "darkblue")

# A further alternative plot via the leaflet package
# Before doing that, however, let's change the color of the points  in the map according to some exogenous info included in the database.
# For example, let's create 3 groups out of the sentiment of the texts (but we could have done in terms of emotions, 
# or any other variables we could be interest in)

colnames(rtll)
library(syuzhet)
rtll$sentiment <- get_sentiment(rtll$text, method="nrc", language="italian") 

summary(rtll$sentiment )
points $sentimentOK <- ifelse(rtll$sentiment <0, -1, ifelse(rtll$sentiment  >0 , 1, 0))
table(points $sentimentOK)
points $sentimentOK <- as.factor(points $sentimentOK )
levels(points $sentimentOK )
levels(points $sentimentOK ) <- c("Negative", "Neutral", "Positive")
levels(points $sentimentOK )
table(points $sentimentOK)

# that would be our usual ggplot graph

ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "white",   
color = "grey20", size = 0.25)+ expand_limits(x = map.data$long, y = map.data$lat) + 
    theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), 
        axis.title = element_blank(), plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) + geom_point(data = points, 
    aes(x = x, y = y, colour = sentimentOK), size = 3) + scale_colour_manual(values = c("red", "blue", "green"))

# let's do the same thing with leaflet

# let's create a function that applies 3 different colors according to the level of the sentiment

getColor <- function(rtll) {
  sapply(rtll$sentiment, function(sentiment) {
  if(sentiment < 0) {
    "red"
  } else if(sentiment  >0) {
    "green"
  } else {
    "blue"
  } })
}

icons <- awesomeIcons(
  icon = 'ios-close',
  iconColor = 'black',
  library = 'ion',
  markerColor = getColor(rtll)
)

m <- leaflet(rtll) 
m <- addTiles(m)   
m <- addAwesomeMarkers(m, lng=rtll$lng, lat=rtll$lat, icon=icons, popup=rtll$text)
m

# if you want to save this graph on your pc
# saveWidget(m, "map.html",   selfcontained =TRUE, background = "white")

# let's change the shape of the Icons

icons <- awesomeIcons(
  icon = 'glass',
   iconColor = 'white',
  markerColor = getColor(rtll)
)

m <- leaflet(rtll) 
m <- addTiles(m)   
m <- addAwesomeMarkers(m, lng=rtll$lng, lat=rtll$lat, icon=icons, popup=rtll$text)
m

# a lot of the markers are clumped together rather closely. 
# We can cluster them together by specifying clusterOptions as follows (zoom out and zoom in the graph!)
# The number inside each circle represents the total number of observations in that area. 
# Areas with higher observations  are marked by yellow circles and areas with lower incidents are marked by green circle

m <- addAwesomeMarkers(m, lng=rtll$lng, lat=rtll$lat, icon=icons, popup=rtll$text, clusterOptions = markerClusterOptions())
m

# you can also decide to plot the tweets w/o any sentiment 

m2 <- leaflet(rtll)
m2 <- addTiles(m2)
m2 <- addMarkers(m2, lng=rtll$lng, lat=rtll$lat, popup=rtll$text)
m2

# if you have problems (I have them since 3 days...google issues?) you can implement the following workaround
# i.e., here we always plot the blue color irrespective of the value of the sentiment

getColor <- function(rtll) {
  sapply(rtll$sentiment, function(sentiment) {
    if(sentiment < 0) {
      "blue"
    } else if(sentiment  >0) {
      "blue"
    } else {
      "blue"
    } })
}

icons <- awesomeIcons(
  icon = 'ios-close',
  iconColor = 'white',
  library = 'ion',
  markerColor = getColor(rtll)
)

m2 <- leaflet(rtll)
m2 <- addTiles(m2)
m2 <- addAwesomeMarkers(m2, lng=rtll$lng, lat=rtll$lat, icon=icons, popup=rtll$text)
m2