Kelly's Blog: TIL: Week 8 In-Class Exercise

Install packages and loading data

Install the necessary packages if they are not in library

packages = c('DT','ggiraph','plotly','tidyverse')

for (p in packages) {
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Import data from csv and preview data

exam_data <- read_csv("data/Exam_data.csv")

head(exam_data,10)

# A tibble: 10 x 7
   ID         CLASS GENDER RACE    ENGLISH MATHS SCIENCE
   <chr>      <chr> <chr>  <chr>     <dbl> <dbl>   <dbl>
 1 Student321 3I    Male   Malay        21     9      15
 2 Student305 3I    Female Malay        24    22      16
 3 Student289 3H    Male   Chinese      26    16      16
 4 Student227 3F    Male   Chinese      27    77      31
 5 Student318 3I    Male   Malay        27    11      25
 6 Student306 3I    Female Malay        31    16      16
 7 Student313 3I    Male   Chinese      31    21      25
 8 Student316 3I    Male   Malay        31    18      27
 9 Student312 3I    Male   Malay        33    19      15
10 Student297 3H    Male   Indian       34    49      37

summary(exam_data)

      ID               CLASS              GENDER         
 Length:322         Length:322         Length:322        
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
     RACE              ENGLISH          MATHS          SCIENCE     
 Length:322         Min.   :21.00   Min.   : 9.00   Min.   :15.00  
 Class :character   1st Qu.:59.00   1st Qu.:58.00   1st Qu.:49.25  
 Mode  :character   Median :70.00   Median :74.00   Median :65.00  
                    Mean   :67.18   Mean   :69.33   Mean   :61.16  
                    3rd Qu.:78.00   3rd Qu.:85.00   3rd Qu.:74.75  
                    Max.   :96.00   Max.   :99.00   Max.   :96.00

The dataset is made up of 7 variables with 322 observations:

ID is the unique key for each student
CLASS, GENDER, RACE are categorical variables expressed as character
ENGLISH, MATHS, SCIENCE are continuous variables defined as numeric values with decimal points

Basic charts using ggplot

Plot histogram of the Math scores

math_hist <- ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram(bins = 20,
                 boundary = 50,
                 color = "black",
                 fill = "light blue"
                 ) +
  scale_x_continuous("MATHS", limits = c(0,100), breaks = seq(0,100,5)) +
  ggtitle("Distribution of Math Scores")

ggplotly(math_hist)

Plot histogram of the math scores by gender

math_hist2 <- ggplot(data = exam_data, aes(x = MATHS, fill = GENDER)) +
  geom_histogram(bins = 20,
                 boundary = 50,
                 color = "grey30",
                 position = "stack"
                 ) +
  ggtitle("Distribution of Math Scores by Gender")

ggplotly(math_hist2)

Plot bar chart of race

race_bar <- ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  ggtitle("Distribution of Race")

ggplotly(race_bar)

Plot dotplot of math scores by race

math_dot <- ggplot(data = exam_data, 
                   aes(x = MATHS, fill = RACE)) +
  geom_dotplot(binwidth = 2.5,
               dotsize = 0.5,
               stackgroups = TRUE) +
  scale_y_continuous(NULL, 
                     breaks = NULL) +
  ggtitle("Distribution of Math Scores by Race")

math_dot

Plot boxplot of math scores by gender

math_box <- ggplot(data = exam_data, 
                   aes(y = MATHS, x = GENDER)) +
  geom_boxplot() +
  geom_point(position="jitter",
             size = 0.5) +
  ggtitle("Distribution of Math Scores by Gender")

ggplotly(math_box)

Interactivity ggplot using ggiraph

Tooltip
Onclick
Data_id

math_dot_interactive <- ggplot(data = exam_data, 
                   aes(x = MATHS, fill = RACE)) +
  geom_dotplot_interactive(
              aes(tooltip = ID),
              method = "histodot",
              binwidth = 2.5,
              dotsize = 0.5,
              stackgroups = TRUE) +
  scale_y_continuous(NULL, 
                     breaks = NULL) +
  ggtitle("Distribution of Math Scores by Race")

girafe(
  ggobj = math_dot_interactive,
  width_svg = 6,
  height_svg = 6*0.618
)

Plot interactive dotplot of math scores using ggiraph data_id

math_dot_interactive <- ggplot(data = exam_data, 
                   aes(x = MATHS)) +
  geom_dotplot_interactive(
              aes(data_id = CLASS, tooltip = CLASS),
              method = "histodot",
              binwidth = 1,
              stackgroups = TRUE) +
  scale_y_continuous(NULL, 
                     breaks = NULL) +
  ggtitle("Distribution of Math Scores by Class")

girafe(
  ggobj = math_dot_interactive,
  width_svg = 6,
  height_svg = 6*0.618
)

TIL: Week 8 In-Class Exercise

Install packages and loading data

Install the necessary packages if they are not in library

Import data from csv and preview data

Basic charts using ggplot

Plot histogram of the Math scores

Plot histogram of the math scores by gender

Plot bar chart of race

Plot dotplot of math scores by race

Plot boxplot of math scores by gender

Interactivity ggplot using ggiraph

Plot interactive dotplot of math scores by gender using ggiraph tooltip

Plot interactive dotplot of math scores using ggiraph data_id