0.1 Texas death row executed offenders website

This continues with the Texas Department of Criminal Justice data, which keeps records of every inmate executed.

0.2 Packages

library(knitr)
library(rmdformats)
library(hrbrthemes)
library(tidyverse)
library(rvest)
library(XML)
library(magrittr)
library(xml2)
library(here)
library(magick)

0.3 Import the data

We will load previous .csv file of all executions.

fs::dir_tree("../data/wk10-dont-mess-with-texas/")
## ../data/wk10-dont-mess-with-texas/
## ├── 2021-11-21-ExecutedOffenders.csv
## ├── 2021-11-30-ExecutedOffenders.csv
## └── processed
##     ├── 2021-11-21
##     │   ├── 2021-11-21-ExExOffndrshtml.csv
##     │   ├── 2021-11-21-ExExOffndrsjpg.csv
##     │   └── ExOffndrsComplete.csv
##     └── 2021-11-30
##         ├── 2021-11-30-ExExOffndrshtml.csv
##         ├── 2021-11-30-ExExOffndrsjpg.csv
##         └── ExOffndrsComplete.csv

The code below will import the most recent data.

# fs::dir_ls("data/processed/2021-10-25")
ExecOffenders <- readr::read_csv("https://bit.ly/2Z7pKTI")
ExOffndrsComplete <- readr::read_csv("https://bit.ly/3oLZdEm")

Wrangle these date variables,

ExecOffenders <- ExecOffenders %>%
  dplyr::mutate(
    date = lubridate::mdy(date),
    year = lubridate::year(date),
    yday = lubridate::yday(date),
    month = lubridate::month(date, label = TRUE)) 
ExecOffenders %>% skimr::skim()
Data summary
Name Piped data
Number of rows 573
Number of columns 18
_______________________
Column type frequency:
character 11
Date 1
factor 1
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
last_name 0 1 3 15 0 463 0
first_name 0 1 3 11 0 272 0
offender_info 0 1 18 18 0 1 0
last_statement 0 1 14 14 0 1 0
race 0 1 5 8 0 4 0
county 0 1 3 12 0 94 0
last_url 0 1 61 78 0 472 0
info_url 0 1 56 74 0 563 0
name_last_url 0 1 61 78 0 472 0
dr_info_url 0 1 56 74 0 563 0
jpg_html 0 1 3 4 0 2 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 0 1 1982-12-07 2021-09-28 2002-11-20 570

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
month 0 1 TRUE 12 Jan: 61, May: 59, Jun: 58, Sep: 52

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
execution 0 1 287.00 165.56 1 144 287 430 573 ▇▇▇▇▇
tdcj_number 0 1 537489.95 498260.12 511 821 999039 999274 999571 ▇▁▁▁▇
age 0 1 39.84 8.88 24 33 39 45 70 ▆▇▅▂▁
year 0 1 2002.95 8.20 1982 1997 2002 2009 2021 ▁▅▇▆▂
yday 0 1 173.90 102.64 4 85 168 263 352 ▇▇▆▇▆

These data are already pretty clean, but we will be using the .jpgs I’ve downloaded in the 02-iterate-with-download.Rmd file.

0.4 The magik package

I will be using the magik package for processing and manipulating these images. I advise checking out the entire vignette for more examples.

0.5 Create a test image

I picked an offender that was typical, meaning they represented the ‘average’ person from this sample, and stored it as test_image below:

The executed offender is Stevn Coen Renfro from Harrison Texas. He was executed on Februrary 9th, 1998.

I create test_magick_img from magick::image_read(), and then go on making the transformations as necessary.

test_magick_img <- magick::image_read(test_image)
test_magick_img

This images comes up in the viewer pane.

TIP: come up with a naming convention for each step so you can use RStudio’s viewer pane to see the manipulations.

0.6 Basic transformations

These functions are for basic image movement/manipulations you would do with any basic photo editing app.

0.6.1 Crop with magick::image_crop()

Now I want to remove the text and focus on the mugshot. This might need to be adjusted slightly for each new test_magick_img.

# crop this image
test_magick_crop_750_x_1000_10 <- magick::image_crop(
  image = test_magick_img,
  geometry = "750x1000+10"
)
test_magick_crop_750_x_1000_10

This should have trimmed the extra space off the bottom of the image.

0.6.2 Rotate with magick::image_rotate()

I want to rotate this image by 90 degrees.

# rotate this image
test_magick_rotate90 <- magick::image_rotate(test_magick_crop_750_x_1000_10,
  degrees = 90
)
test_magick_rotate90

Now I want to remove the rest of the text and focus on the mugshot. This might need to be adjusted slightly for each new test_image.

# crop this image
test_magick_crop_850_x_950_450 <- magick::image_crop(
  image = test_magick_rotate90,
  geometry = "850x950+450"
)
test_magick_crop_850_x_950_450

Now I will rotate this image back to center (image_rotate again) and flip it using magick::image_flip()

# rotate this image
test_magick_rotate270 <- magick::image_rotate(test_magick_crop_850_x_950_450,
  degrees = 270)
# rotate this image
test_magick_flip <- magick::image_flip(test_magick_rotate270)
test_magick_flip

I’ll crop the rest of the text out of the image, and trim the whitespace for the plot.

# crop this image
test_magick_crop_750_x_200_10 <- magick::image_crop(
  image = test_magick_flip,
  geometry = "750x200+10"
)
test_magick_crop_750_x_200_10

Flip this image again.

# flip this image again
test_magick_flip2 <- magick::image_flip(test_magick_crop_750_x_200_10)
test_magick_flip2

Rotate another 270:

# rotate to remove the dot
test_magick_rotate270v2 <- magick::image_rotate(test_magick_flip2,
  degrees = 270
)
test_magick_rotate270v2

Now crop out the last little bit of the document.

# crop the dot out
test_magick_crop_640_x_352_10 <- magick::image_crop(
  image = test_magick_rotate270v2,
  geometry = "650x352+10"
)
test_magick_crop_640_x_352_10

Rotate back to center

# rotate back to center
test_magick_rotate90v02 <- magick::image_rotate(test_magick_crop_640_x_352_10,
  degrees = 90
)
test_magick_rotate90v02

0.6.3 Image effects with magick

Now we will use magick::image_trim() to clean the image up a bit.

# Here we will trim the image up a bit with the `fuzz` argument
test_magick_clean <- magick::image_trim(
  image = test_magick_rotate90v02,
  fuzz = 1
)
test_magick_clean

Now that I have all the trimming on and cropping done, I will add some effects for the ggplot2 image.

I want the image to be a bit more subdued, so I will use magick::image_modulate() and magick::image_flatten() to create these effects.

test_image_modulate <- magick::image_modulate(test_magick_clean,
  brightness = 100,
  saturation = 25,
  hue = 20
)
# test_image_modulate
test_magick_final <- magick::image_flatten(test_image_modulate,
  operator = "Threshold"
)
test_magick_final

Export image:

magick::image_write(image = test_magick_final, 
                    path = "../img/test_magick_final.png")

0.7 Data for plot

I want to graph the number of executions over time (year) by race. I can do this by getting a grouped data from using dplyr’s functions.

0.8 Plot executions over time

I create base_ggplot2 as the basic plot I want as a layer for the image to appear on top of.

# Scatter plot
# colors_brewer
PlotExecOffender <- ExecOffenders %>%
  # remove 'Other'
  filter(race != "Other") %>% 
  dplyr::group_by(race, year) %>% 
      dplyr::summarise(
            ex_x_race_year = sum(n())) %>% 
    dplyr::arrange(desc(ex_x_race_year)) %>% 
  ungroup() 
head(PlotExecOffender)

0.9 Start with labels!

Build the labels first:

labs_executed_offndrss <- ggplot2::labs(
    title = "Texas Justice",
    subtitle = "Executions (1980-2020) in Texas",
    caption = "source: http://www.tdcj.state.tx.us/death_row/index.html",
    x = NULL,
    y = "Executions"
    )

0.10 Build a base plot

Now we can start with a base plot:

base_ggplot2 <- PlotExecOffender %>% 
  ggplot2::ggplot(data = ., aes(
    y = ex_x_race_year,
    x = year,
    color = race
  )) + 
  labs_executed_offndrss
base_ggplot2

0.10.1 Adding data to canvas

We can now add the data to the canvas.

Color palettes: http://applied-r.com/rcolorbrewer-palettes/

library(RColorBrewer)
gg_executions_year_01 <- base_ggplot2 + 
  # add the lines
  ggplot2::geom_line(aes(color = race), size = 2) + 
  # add the points
  ggplot2::geom_point(aes(color = race), 
                      size = 1.2,
                      alpha = 1/3) +
  # set the axes
  ggplot2::scale_x_continuous(breaks = seq(1982, 2020, 4)) +
  ggplot2::scale_y_continuous(breaks = seq(0, 22, 2)) +
  # add the themes
  ggplot2::theme(
    legend.position = "bottom",
    legend.direction = "horizontal",
    legend.title = element_blank(),
    panel.grid.major = element_blank()
    # panel.grid.minor = element_blank()
  ) + 
  scale_color_brewer(palette = "RdBu")
gg_executions_year_01

This looks OK, but the white line is hard to see, so we will try another color palette in the example below:

0.10.2 Example: overplot using ggpubr package

The first example I’ll plot will use image as the ‘canvas’. This requires exporting the image as a .jpeg, then reloading it and using the ggpubr package.

library(jpeg)
# 1) export the `test_magick_final` file,
magick::image_write(test_magick_final,
  path =
    paste0("../img/",
      base::noquote(lubridate::today()),
      "-test_magick_final",
      format = ".jpg"
    )
)
# 2) then read it back in as an `jpeg::readJPEG()`.
fs::dir_ls("../img", regexp = lubridate::today())
## ../img/2021-11-30-test_magick_final.jpg
test_magick_final_file <- list.files("../img", 
                                     pattern = as.character(lubridate::today()), 
                                     full.names = TRUE)
# test_magick_final_file
imgJPEG <- jpeg::readJPEG(test_magick_final_file)

0.10.3 Build new labels

We want to adjust our labels to include the link about the offender in the background.

labs_executed_offndrss_02 <- ggplot2::labs(
    title = "Texas Justice (executions from 1980-2020)",
    subtitle = "source: http://www.tdcj.state.tx.us/death_row/index.html",
    caption = "Stevn Coen Renfro: https://murderpedia.org/male.R/r1/renfro-steven.htm",
    x = NULL,
    y = "Executions"
    ) 

0.10.4 Add a background image

Now I can add the imgJPEG after the base layer (but before I map the geom_line() and geom_theme()) using ggpubr::background_image().

library(hrbrthemes)
library(ggpubr)
gg_executions_year_02 <- base_ggplot2 +
  # this is the image for the background
  ggpubr::background_image(imgJPEG) +

  # add the lines
  ggplot2::geom_line(aes(color = race), alpha = 3/4, 
                     size = 0.8) + 
  # add the points
  ggplot2::geom_point(aes(color = race), 
                      size = 1.3,
                      alpha = 1/3) +
  # set the axes
  ggplot2::scale_x_continuous(breaks = seq(1982, 2020, 4)) +
  ggplot2::scale_y_continuous(breaks = seq(0, 22, 2)) +
  # add the themes
  hrbrthemes::theme_ft_rc() + 
  # add the labels
  labs_executed_offndrss_02 
gg_executions_year_02

0.10.5 Add manual color scale

We will add a manual color scale to this plot with scale_colour_discrete().

gg_executions_year_03 <- gg_executions_year_02 + 
  # add a manual color scale
  scale_colour_discrete(type = c("red", "blue", "black"))
gg_executions_year_03

0.10.6 Example: Add labels using ggrepel

We also want to label the specific point on the graph where Steven Renfro was executed. We can do this by filtering the ExecOffenders to only his row in that dataset, then creating a label variable with paste0().

ExecOffLabel <- ExecOffenders %>% 
  filter(last_name == "Renfro") %>%
  mutate(label = paste0(last_name, ", ", first_name, ": executed on ", date)) 
ExecOffLabel

We now want to limit the columns in ExecOffLabel to only those values on the graph, which are last_name, first_name, race, year, label

ExecOffLabel <- ExecOffLabel %>% 
  select(last_name, first_name, race, year, label)
ExecOffLabel

But wait, we grouped the ExecOffenders by race and year to create PlotExecOffender (review this code below)

PlotExecOffender <- ExecOffenders %>%
  # remove 'Other'
  filter(race != "Other") %>% 
  dplyr::group_by(race, year) %>% 
      dplyr::summarise(
            ex_x_race_year = sum(n())) %>% 
    dplyr::arrange(desc(ex_x_race_year)) %>% 
  ungroup() 
head(PlotExecOffender)

So in order to get that ex_x_race_year variable, we will left_join() the columns in ExecOffLabel to the columns in PlotExecOffender. Fortunately, dplyr is smart enough to know what we want to join on.

ExecOffLabelData <- left_join(x = ExecOffLabel, y = PlotExecOffender)
ExecOffLabelData

We see the message: Joining, by = c("race", "year") and we now have all the variables we need to add the labels to the graph. Now we load the ggrepel package and use the ggrepel::geom_label_repel() layer, with data set to ExecOffLabelData.

We also add another geom_point() layer with ExecOffLabelData and increase the size of the dot so it stands out.

library(ggrepel)
gg_executions_year_04 <- base_ggplot2 +
  # this is the image for the background
  ggpubr::background_image(imgJPEG) +

  # add the lines
  ggplot2::geom_line(aes(color = race), alpha = 1/2, 
                     size = 1.5) + 
  # add the points
  ggplot2::geom_point(aes(color = race), 
                      size = 1.2,
                      alpha = 1/4) +
     ggplot2::geom_point(data = ExecOffLabelData,
                     aes(color = race), 
                      size = 10,
                      alpha = 1/4,
                     show.legend = FALSE) +
     # add value label
     geom_label_repel(data = ExecOffLabelData,
              aes(x = year, y = ex_x_race_year,
                  label = label),
                  # set color and size...
                  color = "black",
                  size = 2) +
  # set the axes
  ggplot2::scale_x_continuous(breaks = seq(1982, 2020, 4)) +
  ggplot2::scale_y_continuous(breaks = seq(0, 22, 2)) +
  # add the themes
  hrbrthemes::theme_ft_rc(axis_text_size = 10, axis_title_size = 12) + 
  # add color scale
  scale_colour_discrete(type = c("red", "blue", "black")) +
  # add plot labels
  labs_executed_offndrss_02
gg_executions_year_04