TIL: Week 9 In-Class Exercise

Today I learnt in class: How to use plotly to plot interactive charts and crosstalk to link charts/data tables. And we also go deeper into visualization of geospatial data using sf, raster, and tmap functions.

Kelly Koh https://www.linkedin.com/in/kellykkw/ (School of Computing and Information Systems, Singapore Management University)https://scis.smu.edu.sg/
07-03-2021

Install packages and loading data

Install the necessary packages if they are not in library

packages = c('DT','ggiraph','plotly','tidyverse','patchwork',
            'sf','tmap','raster','clock')

for (p in packages) {
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Import data from csv and preview data

exam_data <- read_csv("data/Exam_data.csv")

head(exam_data,10)
# A tibble: 10 x 7
   ID         CLASS GENDER RACE    ENGLISH MATHS SCIENCE
   <chr>      <chr> <chr>  <chr>     <dbl> <dbl>   <dbl>
 1 Student321 3I    Male   Malay        21     9      15
 2 Student305 3I    Female Malay        24    22      16
 3 Student289 3H    Male   Chinese      26    16      16
 4 Student227 3F    Male   Chinese      27    77      31
 5 Student318 3I    Male   Malay        27    11      25
 6 Student306 3I    Female Malay        31    16      16
 7 Student313 3I    Male   Chinese      31    21      25
 8 Student316 3I    Male   Malay        31    18      27
 9 Student312 3I    Male   Malay        33    19      15
10 Student297 3H    Male   Indian       34    49      37
summary(exam_data)
      ID               CLASS              GENDER         
 Length:322         Length:322         Length:322        
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
     RACE              ENGLISH          MATHS          SCIENCE     
 Length:322         Min.   :21.00   Min.   : 9.00   Min.   :15.00  
 Class :character   1st Qu.:59.00   1st Qu.:58.00   1st Qu.:49.25  
 Mode  :character   Median :70.00   Median :74.00   Median :65.00  
                    Mean   :67.18   Mean   :69.33   Mean   :61.16  
                    3rd Qu.:78.00   3rd Qu.:85.00   3rd Qu.:74.75  
                    Max.   :96.00   Max.   :99.00   Max.   :96.00  

Interactive Plots

Interactive scatterplot using plot_ly()

palette <- c("red", "blue", "darkgreen", "orange")

plot_ly(data =exam_data, 
        x = ~MATHS, 
        y = ~ENGLISH,
        color = ~RACE,
        text = ~paste("Student ID:", ID, 
        # break line using <br>
        "<br>Class:", CLASS),
        colors = palette)

Plotly as a wrapper of ggplot

p <- ggplot(data=exam_data, aes(x = MATHS, y = ENGLISH)) + 
     geom_point(size = 1) +
     coord_cartesian(xlim = c(0,100),
                     ylim = c(0,100))
ggplotly(p)

Linked charts using highlight_key from crosstalk::SharedData

d <- highlight_key(exam_data)

p1 <- ggplot(data=d, aes(x = MATHS, y = ENGLISH)) + 
      geom_point(size = 1) +
      coord_cartesian(xlim = c(0,100),
                      ylim=c(0,100))

p2 <- ggplot(data=d, aes(x = MATHS, y = SCIENCE)) + 
      geom_point(size = 1) +
      coord_cartesian(xlim = c(0,100),
                      ylim = c(0,100))

subplot(ggplotly(p1),
        ggplotly(p2))
d <- highlight_key(exam_data)
p <- ggplot(d,
            aes(ENGLISH,
                MATHS)) +
      geom_point(size=1) + 
      coord_cartesian(xlim=c(0,100),
                    ylim=c(0,100))

gg <- highlight(ggplotly(p),
                "plotly_selected")

crosstalk::bscols(gg,
                  DT::datatable(d),
                  widths = 3)

Visualizing Geospatial Data

Import MC2-tourist.tif created using QGIS

ap <- raster("data/Geospatial/MC2-tourist.tif")
ap
class      : RasterLayer 
band       : 1  (of  3  bands)
dimensions : 1595, 2706, 4316070  (nrow, ncol, ncell)
resolution : 3.16216e-05, 3.16216e-05  (x, y)
extent     : 24.82419, 24.90976, 36.04499, 36.09543  (xmin, xmax, ymin, ymax)
crs        : +proj=longlat +datum=WGS84 +no_defs 
source     : MC2-tourist.tif 
names      : MC2.tourist 
values     : 0, 255  (min, max)

Plotting raster layer using tmap

tmap_mode("plot")
# change to view for interactive version, plot for static version
tm_shape(ap) + 
  tm_raster(ap,
            legend.show = FALSE)

Plotting raster layer using tmap to merge rgb bands

tm_shape(ap) +
tm_rgb(ap, r=1,g=2,b=3,
       alpha = NA,
       saturation = 1,
       interpolate = TRUE,
       max.value = 255)

Use sf to import Vector GIS data files in ESRI shapefile format

abila_st <- st_read(dsn = "data/Geospatial",
                    layer = "Abila")
Reading layer `Abila' from data source `C:\kpokp\blog_ISSS608\_posts\2021-07-03-week-9-in-class-exercise\data\Geospatial' using driver `ESRI Shapefile'
Simple feature collection with 3290 features and 9 fields
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: 24.82401 ymin: 36.04502 xmax: 24.90997 ymax: 36.09492
Geodetic CRS:  WGS 84

Importing Aspatial data using read_csv

gps <- read_csv("data/aspatial/gps.csv")
glimpse(gps)
Rows: 685,169
Columns: 4
$ Timestamp <chr> "01/06/2014 06:28:01", "01/06/2014 06:28:01", "01/~
$ id        <dbl> 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35~
$ lat       <dbl> 36.07623, 36.07622, 36.07621, 36.07622, 36.07621, ~
$ long      <dbl> 24.87469, 24.87460, 24.87444, 24.87425, 24.87417, ~

Converting date-time field using clock and id using forcats to right format

gps$Timestamp <- date_time_parse(gps$Timestamp,
                                 zone ="",
                                 format = "%m/%d/%Y %H:%M:%S")
# Factor is a unique kind of character for numerical values
gps$id <- as_factor(gps$id)
gps
# A tibble: 685,169 x 4
   Timestamp           id      lat  long
   <dttm>              <fct> <dbl> <dbl>
 1 2014-01-06 06:28:01 35     36.1  24.9
 2 2014-01-06 06:28:01 35     36.1  24.9
 3 2014-01-06 06:28:03 35     36.1  24.9
 4 2014-01-06 06:28:05 35     36.1  24.9
 5 2014-01-06 06:28:06 35     36.1  24.9
 6 2014-01-06 06:28:07 35     36.1  24.9
 7 2014-01-06 06:28:09 35     36.1  24.9
 8 2014-01-06 06:28:10 35     36.1  24.9
 9 2014-01-06 06:28:11 35     36.1  24.9
10 2014-01-06 06:28:12 35     36.1  24.9
# ... with 685,159 more rows

Converting aspatial data into simple feature data frame using st_as_sf

gps_sf <- st_as_sf(gps,
                   coords = c("long","lat"),
                            # EPSG 4326 which stands for wgs84 geo. coord sys
                            crs = 4326)
gps_sf
Simple feature collection with 685169 features and 2 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 24.82509 ymin: 36.04802 xmax: 24.90849 ymax: 36.08996
Geodetic CRS:  WGS 84
# A tibble: 685,169 x 3
   Timestamp           id               geometry
 * <dttm>              <fct>         <POINT [°]>
 1 2014-01-06 06:28:01 35    (24.87469 36.07623)
 2 2014-01-06 06:28:01 35     (24.8746 36.07622)
 3 2014-01-06 06:28:03 35    (24.87444 36.07621)
 4 2014-01-06 06:28:05 35    (24.87425 36.07622)
 5 2014-01-06 06:28:06 35    (24.87417 36.07621)
 6 2014-01-06 06:28:07 35    (24.87406 36.07619)
 7 2014-01-06 06:28:09 35    (24.87391 36.07619)
 8 2014-01-06 06:28:10 35    (24.87381 36.07618)
 9 2014-01-06 06:28:11 35    (24.87374 36.07617)
10 2014-01-06 06:28:12 35    (24.87362 36.07618)
# ... with 685,159 more rows

Creating movement path from GPS points

gps_path <- gps_sf %>%
            group_by(id) %>%
            # Due to peculiar nature of group by, we need to do an action such as summarize
            summarize(m = mean(Timestamp),
                      do_union = FALSE) %>%
            st_cast("LINESTRING")
gps_path
Simple feature collection with 40 features and 2 fields
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: 24.82509 ymin: 36.04802 xmax: 24.90849 ymax: 36.08996
Geodetic CRS:  WGS 84
# A tibble: 40 x 3
   id    m                                                    geometry
   <fct> <dttm>                                       <LINESTRING [°]>
 1 1     2014-01-12 04:35:32 (24.88258 36.06646, 24.88259 36.06634, 2~
 2 2     2014-01-12 02:49:43 (24.86038 36.08546, 24.86038 36.08551, 2~
 3 3     2014-01-12 10:13:25 (24.85763 36.08668, 24.85743 36.08662, 2~
 4 4     2014-01-12 13:44:49 (24.87214 36.07821, 24.87206 36.07819, 2~
 5 5     2014-01-12 13:35:03 (24.8779 36.0673, 24.87758 36.06736, 24.~
 6 6     2014-01-12 02:05:55 (24.89477 36.05949, 24.89475 36.05939, 2~
 7 7     2014-01-12 14:32:11 (24.86424 36.08449, 24.86421 36.08438, 2~
 8 8     2014-01-12 13:20:30 (24.88597 36.0673, 24.88595 36.06725, 24~
 9 9     2014-01-12 16:17:25 (24.85095 36.08172, 24.85095 36.08175, 2~
10 10    2014-01-12 16:31:53 (24.86589 36.07682, 24.86587 36.07676, 2~
# ... with 30 more rows
# Have to slice the paths by days, or time period to derive meaningful paths

Plotting the gps paths

gps_path_selected <- gps_path %>% 
  filter (id==1)
tmap_mode("view")
tm_shape(ap) +
  tm_rgb(ap, r=1,g=2,b=3,
         alpha = NA,
         saturation = 1,
         interpolate = TRUE,
         max.value = 255) +
  tm_shape(gps_path_selected) +
  tm_lines()