Today I learnt in class: How to use plotly to plot interactive charts and crosstalk to link charts/data tables. And we also go deeper into visualization of geospatial data using sf, raster, and tmap functions.
packages = c('DT','ggiraph','plotly','tidyverse','patchwork',
'sf','tmap','raster','clock')
for (p in packages) {
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
exam_data <- read_csv("data/Exam_data.csv")
head(exam_data,10)
# A tibble: 10 x 7
ID CLASS GENDER RACE ENGLISH MATHS SCIENCE
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Student321 3I Male Malay 21 9 15
2 Student305 3I Female Malay 24 22 16
3 Student289 3H Male Chinese 26 16 16
4 Student227 3F Male Chinese 27 77 31
5 Student318 3I Male Malay 27 11 25
6 Student306 3I Female Malay 31 16 16
7 Student313 3I Male Chinese 31 21 25
8 Student316 3I Male Malay 31 18 27
9 Student312 3I Male Malay 33 19 15
10 Student297 3H Male Indian 34 49 37
summary(exam_data)
ID CLASS GENDER
Length:322 Length:322 Length:322
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
RACE ENGLISH MATHS SCIENCE
Length:322 Min. :21.00 Min. : 9.00 Min. :15.00
Class :character 1st Qu.:59.00 1st Qu.:58.00 1st Qu.:49.25
Mode :character Median :70.00 Median :74.00 Median :65.00
Mean :67.18 Mean :69.33 Mean :61.16
3rd Qu.:78.00 3rd Qu.:85.00 3rd Qu.:74.75
Max. :96.00 Max. :99.00 Max. :96.00
d <- highlight_key(exam_data)
p1 <- ggplot(data=d, aes(x = MATHS, y = ENGLISH)) +
geom_point(size = 1) +
coord_cartesian(xlim = c(0,100),
ylim=c(0,100))
p2 <- ggplot(data=d, aes(x = MATHS, y = SCIENCE)) +
geom_point(size = 1) +
coord_cartesian(xlim = c(0,100),
ylim = c(0,100))
subplot(ggplotly(p1),
ggplotly(p2))
d <- highlight_key(exam_data)
p <- ggplot(d,
aes(ENGLISH,
MATHS)) +
geom_point(size=1) +
coord_cartesian(xlim=c(0,100),
ylim=c(0,100))
gg <- highlight(ggplotly(p),
"plotly_selected")
crosstalk::bscols(gg,
DT::datatable(d),
widths = 3)
ap <- raster("data/Geospatial/MC2-tourist.tif")
ap
class : RasterLayer
band : 1 (of 3 bands)
dimensions : 1595, 2706, 4316070 (nrow, ncol, ncell)
resolution : 3.16216e-05, 3.16216e-05 (x, y)
extent : 24.82419, 24.90976, 36.04499, 36.09543 (xmin, xmax, ymin, ymax)
crs : +proj=longlat +datum=WGS84 +no_defs
source : MC2-tourist.tif
names : MC2.tourist
values : 0, 255 (min, max)
tmap_mode("plot")
# change to view for interactive version, plot for static version
tm_shape(ap) +
tm_raster(ap,
legend.show = FALSE)

tm_shape(ap) +
tm_rgb(ap, r=1,g=2,b=3,
alpha = NA,
saturation = 1,
interpolate = TRUE,
max.value = 255)

abila_st <- st_read(dsn = "data/Geospatial",
layer = "Abila")
Reading layer `Abila' from data source `C:\kpokp\blog_ISSS608\_posts\2021-07-03-week-9-in-class-exercise\data\Geospatial' using driver `ESRI Shapefile'
Simple feature collection with 3290 features and 9 fields
Geometry type: LINESTRING
Dimension: XY
Bounding box: xmin: 24.82401 ymin: 36.04502 xmax: 24.90997 ymax: 36.09492
Geodetic CRS: WGS 84
gps <- read_csv("data/aspatial/gps.csv")
glimpse(gps)
Rows: 685,169
Columns: 4
$ Timestamp <chr> "01/06/2014 06:28:01", "01/06/2014 06:28:01", "01/~
$ id <dbl> 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35~
$ lat <dbl> 36.07623, 36.07622, 36.07621, 36.07622, 36.07621, ~
$ long <dbl> 24.87469, 24.87460, 24.87444, 24.87425, 24.87417, ~
gps$Timestamp <- date_time_parse(gps$Timestamp,
zone ="",
format = "%m/%d/%Y %H:%M:%S")
# Factor is a unique kind of character for numerical values
gps$id <- as_factor(gps$id)
gps
# A tibble: 685,169 x 4
Timestamp id lat long
<dttm> <fct> <dbl> <dbl>
1 2014-01-06 06:28:01 35 36.1 24.9
2 2014-01-06 06:28:01 35 36.1 24.9
3 2014-01-06 06:28:03 35 36.1 24.9
4 2014-01-06 06:28:05 35 36.1 24.9
5 2014-01-06 06:28:06 35 36.1 24.9
6 2014-01-06 06:28:07 35 36.1 24.9
7 2014-01-06 06:28:09 35 36.1 24.9
8 2014-01-06 06:28:10 35 36.1 24.9
9 2014-01-06 06:28:11 35 36.1 24.9
10 2014-01-06 06:28:12 35 36.1 24.9
# ... with 685,159 more rows
gps_sf <- st_as_sf(gps,
coords = c("long","lat"),
# EPSG 4326 which stands for wgs84 geo. coord sys
crs = 4326)
gps_sf
Simple feature collection with 685169 features and 2 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 24.82509 ymin: 36.04802 xmax: 24.90849 ymax: 36.08996
Geodetic CRS: WGS 84
# A tibble: 685,169 x 3
Timestamp id geometry
* <dttm> <fct> <POINT [°]>
1 2014-01-06 06:28:01 35 (24.87469 36.07623)
2 2014-01-06 06:28:01 35 (24.8746 36.07622)
3 2014-01-06 06:28:03 35 (24.87444 36.07621)
4 2014-01-06 06:28:05 35 (24.87425 36.07622)
5 2014-01-06 06:28:06 35 (24.87417 36.07621)
6 2014-01-06 06:28:07 35 (24.87406 36.07619)
7 2014-01-06 06:28:09 35 (24.87391 36.07619)
8 2014-01-06 06:28:10 35 (24.87381 36.07618)
9 2014-01-06 06:28:11 35 (24.87374 36.07617)
10 2014-01-06 06:28:12 35 (24.87362 36.07618)
# ... with 685,159 more rows
gps_path <- gps_sf %>%
group_by(id) %>%
# Due to peculiar nature of group by, we need to do an action such as summarize
summarize(m = mean(Timestamp),
do_union = FALSE) %>%
st_cast("LINESTRING")
gps_path
Simple feature collection with 40 features and 2 fields
Geometry type: LINESTRING
Dimension: XY
Bounding box: xmin: 24.82509 ymin: 36.04802 xmax: 24.90849 ymax: 36.08996
Geodetic CRS: WGS 84
# A tibble: 40 x 3
id m geometry
<fct> <dttm> <LINESTRING [°]>
1 1 2014-01-12 04:35:32 (24.88258 36.06646, 24.88259 36.06634, 2~
2 2 2014-01-12 02:49:43 (24.86038 36.08546, 24.86038 36.08551, 2~
3 3 2014-01-12 10:13:25 (24.85763 36.08668, 24.85743 36.08662, 2~
4 4 2014-01-12 13:44:49 (24.87214 36.07821, 24.87206 36.07819, 2~
5 5 2014-01-12 13:35:03 (24.8779 36.0673, 24.87758 36.06736, 24.~
6 6 2014-01-12 02:05:55 (24.89477 36.05949, 24.89475 36.05939, 2~
7 7 2014-01-12 14:32:11 (24.86424 36.08449, 24.86421 36.08438, 2~
8 8 2014-01-12 13:20:30 (24.88597 36.0673, 24.88595 36.06725, 24~
9 9 2014-01-12 16:17:25 (24.85095 36.08172, 24.85095 36.08175, 2~
10 10 2014-01-12 16:31:53 (24.86589 36.07682, 24.86587 36.07676, 2~
# ... with 30 more rows
# Have to slice the paths by days, or time period to derive meaningful paths
gps_path_selected <- gps_path %>%
filter (id==1)
tmap_mode("view")
tm_shape(ap) +
tm_rgb(ap, r=1,g=2,b=3,
alpha = NA,
saturation = 1,
interpolate = TRUE,
max.value = 255) +
tm_shape(gps_path_selected) +
tm_lines()