rm(list=ls(all=TRUE)) getwd() setwd("C:/Users/mw/Dropbox (VOICES)/TOPIC MODEL") getwd() library(rtweet) library(readtext) library(quanteda) library(ggplot2) library(ggmap) library(httpuv) library(dplyr) library(maps) library(leaflet) library(stringr) library(rtweet) ######################################################################################### # geographical analysis: you need to have you google map API (but not if you are analyzing data from US only)! ######################################################################################### api <- "YOUR GOOGLE MAP API" lookup_coords("usa") lookup_coords("london", apikey=api) lookup_coords("brazil", apikey=api) # What twitter will allow is for searches by geocode, and to achieve this, twitter will first check if the tweet is geocoded, # and if not, will check whether a place can be extrapolated from the user's profile information. So you could, for example, # search for tweets from the Barcelona area and twitter will deliver a lot of tweets that aren't geocoded because the users # have "Barcelona" in their profile. ## search for 1000 tweets sent from the US discussing about dinner or food rt <- search_tweets( "dinner OR food", n = 1000, geocode = lookup_coords("usa")) ## create lat/lng variables using all available tweet and profile geo-location data rtll <- lat_lng(rt) ## plot state boundaries par(mar = c(0, 0, 0, 0)) map("state", lwd = .25) ## plot lat and lng points onto state map with(rtll, points(lng, lat, pch = 20, cex = 5, col = rgb(0, .3, .7, .75))) # alternative plot via leaflet m2 <- leaflet(rtll) m2 <- addTiles(m2) # Add default OpenStreetMap map tiles m2 <- addMarkers(m2, lng=rtll$lng, lat=rtll$lat, popup=rtll$text) m2 ######################################################################################### # STREAMING data ######################################################################################### # with stream_tweets (so called Streaming API) you connect to the “stream” of tweets as they are being published # through that you capture the tweets that contain one or more of the keywords given in the track argument # There are four possible methods. (1) The default, q = "", returns a small random sample of all publicly available Twitter statuses # (1% random sample of all the tweets posted in that specific moment) # (2) To filter by keyword, provide a comma separated character string with the desired phrase(s) and keyword(s) # in this case it returns ALL the tweets, UNLESS that number of tweets is higher than 1% of *all* tweets # in a given moment # (3) Track users by providing a comma separated list of user IDs or screen names # (4) Use four latitude/longitude bounding box points to stream by geo location. This must be provided via a vector of length 4, e.g., c(-125, 26, -65, 49). # how to do that? check here! # http://boundingbox.klokantech.com/ and select "csv" dt <- stream_tweets("trump", timeout = 10) print(dt$text[1:10]) # I'd like to stream tweets from US (you do not need a GOOGLE MAP KEY to have these info - just for the US!) lookup_coords("USA") usa <- stream_tweets( c(122.85547, 20.35853, 154.00315, 45.64126 ), timeout = 30 ) # I'd like to stream tweets from Italy lookup_coords("Italy", apikey=api) ## use lookup_coords() for a shortcut verson of the above code [do not run] # italy <- stream_tweets(lookup_coords("Italy", apikey=api),timeout = 30) # print(italy $text[1:10]) ## or use the coordinates you get via lookup_coords italy2 <- stream_tweets( c(6.62672, 35.48970, 18.79760, 47.09200 ), timeout = 30 ) print(italy2 $text[1:10]) ## create lat/lng variables using all available tweet and profile geo-location data rtll_it <- lat_lng(italy2 ) ## plot state boundaries par(mar = c(0, 0, 0, 0)) map("world", "Italy", lwd = .25) ## plot lat and lng points onto state map with(rtll_it, points(lng, lat, pch = 20, cex = 5, col = rgb(0, .3, .7, .75))) # plot the results map.data <- map_data("world", "Italy") str(map.data) points <- data.frame(x = rtll_it$lng, y = rtll_it$lat ) ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "white", color = "grey20", size = 0.25)+ expand_limits(x = map.data$long, y = map.data$lat) + theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), axis.title = element_blank(), plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) + geom_point(data = points, aes(x = x, y = y), size = 3, alpha = 1/3, color = "darkblue") # let's create 3 groups out of the number of followers of each single users in the database and let's plot it # value=1 for the users with a number of followers lower than the median value # value=2 for the users with a number of followers higher than (or equal to) the median value and lower than the 3rd Quartile # value=3 for the users with a number of followers higher than (or equal to) the 3rd Quartile summary(rtll_it$followers_count) points$followers[rtll_it$followers_count< quantile(rtll_it$followers_count , 0.5 )] <- 1 points$followers[rtll_it$followers_count>= quantile(rtll_it$followers_count , 0.5 ) & rtll_it$followers_count= quantile(rtll_it$followers_count , 0.75 )] <- 3 points ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "white", color = "grey20", size = 0.25)+ expand_limits(x = map.data$long, y = map.data$lat) + theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), axis.title = element_blank(), plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) + geom_point(data = points, aes(x = x, y = y, colour = factor(followers)), size = 3) # alternative plot via leaflet m2 <- leaflet(rtll_it) m2 <- addTiles(m2) # Add default OpenStreetMap map tiles m2 <- addMarkers(m2, lng=rtll_it$lng, lat=rtll_it$lat, popup=rtll_it$text) m2 # a lot of the markers are clumped together rather closely. # We can cluster them together by specifying clusterOptions as follows (zoom out and zoom in the graph!) # The number inside each circle represents the total number of observations in that area. # Areas with higher observations are marked by yellow circles and areas with lower incidents are marked by green circle m <- addMarkers(m2, lng=rtll_it$lng, lat=rtll_it$lat, popup=rtll_it$text, clusterOptions = markerClusterOptions()) m # let's change the colour of the tweets in the map according to some exogenous # info included in the databaset. Such as the number of followers for example summary(rtll_it$ followers_count) # let's create 3 groups out of the number of followers getColor <- function(rtll_it) { sapply(rtll_it$followers_count, function(followers_count) { if(followers_count=quantile(rtll_it$followers_count , 0.5 ) && followers_count<=quantile(rtll_it$followers_count , 0.75 )) { "orange" } else { "red" } }) } icons <- awesomeIcons( icon = 'ios-close', iconColor = 'black', library = 'ion', markerColor = getColor(rtll_it) ) m <- leaflet(rtll_it) m <- addTiles(m) m <- addAwesomeMarkers(m, lng=rtll_it$lng, lat=rtll_it$lat, icon=icons, popup=rtll_it$text, label=~as.character(followers_count)) m icons <- awesomeIcons( icon = 'glass', iconColor = 'white', markerColor = getColor(rtll_it) ) m <- leaflet(rtll_it) m <- addTiles(m) m <- addAwesomeMarkers(m, lng=rtll_it$lng, lat=rtll_it$lat, icon=icons, popup=rtll_it$text, label=~as.character(followers_count)) m # for more info: https://rstudio.github.io/leaflet/markers.html # getting tweets from London rtl <- stream_tweets(lookup_coords("london", apikey=api), timeout = 20) length(rtl$text) str(rtl)