rm(list=ls(all=TRUE)) setwd("C:/Users/luigi/Dropbox/TOPIC MODEL") getwd() library(readtext) library(quanteda) library(ggplot2) library(ggmap) library(httpuv) library(dplyr) library(maps) library(leaflet) library(stringr) ######################################################################################### # STREAMING data with latitude/longitude ######################################################################################### # Use four latitude/longitude bounding box points to stream by geo location. # What twitter will allow is for searches by geocode, and to achieve this, twitter will use the info under the bbox_coords # The info about the geocode must be provided via a vector of length 4, e.g., c(-125, 26, -65, 49). # how to recover that? either you use your google map API...or check here: # http://boundingbox.klokantech.com/ and select "csv" # api <- "YOUR GOOGLE MAP API" # in my case I have saved it in a txt file myText <- readtext("Google API.txt", encoding = "UTF-8") api <- myText$text # let's re-install the previous version of rtweet (much more useful in terms of metadata available) devtools::install_version("rtweet", version = "0.7.0", repos = "http://cran.us.r-project.org") library(rtweet) packageVersion("rtweet") usa <- stream_tweets( c(-124.84897, 24.39631, -66.88544, 49.38436), timeout = 10 ) # how many tweets have we downloaded? length(usa$text) ## create lat/lng variables using all available tweet and profile geo-location data rtll <- lat_lng(usa) ## plot state boundaries par(mar = c(0, 0, 0, 0)) map("state", lwd = .25) ## plot lat and lng points onto state map with(rtll, points(lng, lat, pch = 20, cex = 5, col = rgb(0, .3, .7, .75))) # An alternative plot using ggplot map.data <- map_data("state") str(map.data) points <- data.frame(x = rtll$lng, y = rtll$lat ) ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "white", color = "grey20", size = 0.25)+ expand_limits(x = map.data$long, y = map.data$lat) + theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), axis.title = element_blank(), plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) + geom_point(data = points, aes(x = x, y = y), size = 3, alpha = 1/3, color = "darkblue") # alternative plot via leaflet m2 <- leaflet(rtll) m2 <- addTiles(m2) # Add default OpenStreetMap map tiles m2 <- addMarkers(m2, lng=rtll$lng, lat=rtll$lat, popup=rtll$text) m2 # a lot of the markers are clumped together rather closely. # We can cluster them together by specifying clusterOptions as follows (zoom out and zoom in the graph!) # The number inside each circle represents the total number of observations in that area. # Areas with higher observations are marked by yellow circles and areas with lower incidents are marked by green circle m <- addMarkers(m2, lng=rtll$lng, lat=rtll$lat, popup=rtll$text, clusterOptions = markerClusterOptions()) m # let's change the colour of the tweets in the map according to some exogenous # info included in the databaset. Such as the number of followers for example (a metadata NOT available in the 1.0.2. version of rtweet) summary(rtll$ followers_count) # let's create 3 groups out of the number of followers getColor <- function(rtll) { sapply(rtll$followers_count, function(followers_count) { if(followers_count=quantile(rtll$followers_count , 0.5 ) && followers_count<=quantile(rtll$followers_count , 0.75 )) { "orange" } else { "red" } }) } icons <- awesomeIcons( icon = 'ios-close', iconColor = 'black', library = 'ion', markerColor = getColor(rtll) ) m <- leaflet(rtll) m <- addTiles(m) m <- addAwesomeMarkers(m, lng=rtll$lng, lat=rtll$lat, icon=icons, popup=rtll$text, label=~as.character(followers_count)) m icons <- awesomeIcons( icon = 'glass', iconColor = 'white', markerColor = getColor(rtll) ) m <- leaflet(rtll) m <- addTiles(m) m <- addAwesomeMarkers(m, lng=rtll$lng, lat=rtll$lat, icon=icons, popup=rtll$text, label=~as.character(followers_count)) m # for more info: https://rstudio.github.io/leaflet/markers.html # Now, I'd like to stream tweets from Italy lookup_coords("Italy", apikey=api) ## Use the coordinates you get via lookup_coords # italy <- stream_tweets( # c(6.62672, 35.48970, 18.79760, 47.09200 ), # timeout = 30 # ) # Or faster: # italy <- stream_tweets(lookup_coords("Italy", apikey=api),timeout = 360) # print(italy $text[1:10]) # let's open a dataframe including the data frame of tweets of a similar geographical query to understand a bit more about our results italy <- readRDS("italy.rds") str(italy) # how many tweets? 177 nrow(italy) length(italy$text) print(italy$text[13]) print(italy$text[14]) # let's search for this Twitter user called Curini: table(italy$screen_name=="Curini") curini <- italy[ which(italy$screen_name=="Curini"), ] print(curini $text[1:2]) # What am I using to tweet? print(curini$source) # let's see some info about my location print(curini $location) # this is defined by the users (and the user can write basically anything she wants: real or unreal...) print(curini $place_name) print(curini $place_full_name) print(curini $place_type) print(curini $country) print(curini $country_code) print(curini $geo_coords) print(curini $bbox_coords) # a polygon! # Let's check the entire dataset print(italy$location) print(italy$place_name) # I have info for all the 177 tweets # Let?s sort by count and just plot the top 4 locations. To do this you use top_n(). # Note that in this case you are grouping your data by user. count <- count(italy, location, sort = TRUE) str(count) # let's drop the NA count <-count[-c(1), ] str(count) count <- mutate(count, location = reorder(location, n)) count <- top_n(count, 4) ggplot(count, aes(x = location, y = n)) + geom_col() + coord_flip() + labs(x = "Count", y = "Location", title = "Where Twitter users are from - unique locations ") print(italy$place_full_name) print(italy$place_type) print(italy$country) print(italy$country_code) table(italy$country_code) # you have some info for geo_coords (latitude and longitude). For example for tweet 139 and 152 print(italy$geo_coords) print(italy$geo_coords[139]) print(italy$geo_coords[152]) # let's see their source: Instagram as expected! print(italy$source[139]) print(italy$source[152]) print(italy$bbox_coords) # info about all 177 tweets ### note that you can also run a search_tweets with geocode ## search for 100 tweets sent from ITALY discussing about greenpass rt <- search_tweets( "salvini", n = 100, geocode = lookup_coords("Italy", apikey=api)) nrow(rt) print(rt$text[13]) # in this case however rtweet will not just look for data with bbox_coords, but it will do the following: # first check if the tweet is geocoded, and if not, will check whether a place can be extrapolated from the user's profile information. # So you could, for example, search for tweets from the Barcelona area and twitter will deliver a lot of tweets that aren't geocoded # because the users have "Barcelona" in their profile. print(rt$bbox_coords) print(rt$location) print(rt$place_name) print(rt$country) # let's draw some Italian maps! ## create lat/lng variables using all available tweet and profile geo-location data rtll_it <- lat_lng(italy) ## plot state boundaries par(mar = c(0, 0, 0, 0)) map("world", "Italy", lwd = .25) ## plot lat and lng points onto state map with(rtll_it, points(lng, lat, pch = 20, cex = 5, col = rgb(0, .3, .7, .75))) # plot the results map.data <- map_data("world", "Italy") str(map.data) points <- data.frame(x = rtll_it$lng, y = rtll_it$lat ) ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "white", color = "grey20", size = 0.25)+ expand_limits(x = map.data$long, y = map.data$lat) + theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), axis.title = element_blank(), plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) + geom_point(data = points, aes(x = x, y = y), size = 3, alpha = 1/3, color = "darkblue") # alternative plot via leaflet m2 <- leaflet(rtll_it) m2 <- addTiles(m2) # Add default OpenStreetMap map tiles m2 <- addMarkers(m2, lng=rtll_it$lng, lat=rtll_it$lat, popup=rtll_it$text) m2