Risk Tables

library(segtools)
library(dplyr)
library(vroom)
library(janitor)
library(waldo)
library(diffobj)

The risk tables are created using three functions in the seg-shiny-1-3-3 repo. Through the multiple iterations of this application, different values have been used to establish these categories.

This vignette covers the development of each table function and look-up table, and performs unit tests on the values from the segtools functions to match the output in the application.

Load test data

The segtools package uses testthat, a common testing framework for performing unit tests. I’ll load a test data file used in the GitHub repo (VanderbiltComplete.csv) below and run it in the shiny app to generate the tables for comparison.

github_data_root <-
  "https://raw.githubusercontent.com/mjfrigaard/seg-shiny-data/master/Data/"
full_sample_repo <- base::paste0(github_data_root,
  "VanderbiltComplete.csv")
test_vand_comp_data <-
  vroom::vroom(file = full_sample_repo, delim = ",")
glimpse(test_vand_comp_data)
#> Rows: 9,891
#> Columns: 2
#> $ BGM <dbl> 121, 212, 161, 191, 189, 104, 293, 130, 261, 147, 83, 132, 146, 24…
#> $ REF <dbl> 127, 223, 166, 205, 210, 100, 296, 142, 231, 148, 81, 131, 155, 25…

Application (version 1.3.3) functions

Below are the three functions from the helpers.R file in the application repo.

SEG variables

The SEG variables were initially created with the segTable() function. I’ve included it below:

segTable <- function(dat) {

  # 3.1 - import data frame -----
  SampMeasData <- suppressWarnings(readr::read_csv(file = dat))

  SampMeasData %>%
    dplyr::mutate(BGM = as.double(BGM),
                  REF = as.double(REF)) %>%

  # create bgm_pair_cat ---- 
    dplyr::mutate(
      bgm_pair_cat =
        dplyr::case_when(
          BGM < REF ~ "BGM < REF",
          BGM == REF ~ "BGM = REF",
          BGM > REF ~ "BGM > REF"
        )
    ) |> 
    # 3.3 create ref_pair_2cat ---- 
    dplyr::mutate(
      ref_pair_2cat =
        dplyr::case_when(
          REF > 600 ~ "REF > 600: Excluded from SEG Analysis",
          REF < 21 & REF <= 600 ~ "REF <21: Included in SEG Analysis"
        )
    ) %>%
    # # 3.4 create included ---- 
    dplyr::mutate(
      included =
        dplyr::case_when(
          REF <= 600 ~ "Total included in SEG Analysis",
          REF > 600 ~ "Total excluded in SEG Analysis"
        )
    ) %>%
    # 3.5 join to RiskPairData ---- 
    dplyr::inner_join(.,
      y = RiskPairData,
      by = c("BGM", "REF")
    ) %>%
    dplyr::mutate( # 3.6 Create risk_cat variable ----
      risk_cat =
        base::findInterval(
          x = abs_risk, # the abs_risk absolute value
          vec = LookUpRiskCat$ABSLB, # the lower bound absolute risk
          left.open = TRUE
        ) - 1
    ) %>%
    dplyr::inner_join( # 3.7 Join to LookUpRiskCat data ---- 
      x = ., y = LookUpRiskCat, # inner join to look-up
      by = "risk_cat"
    ) %>%
    dplyr::mutate(
      risk_cat_txt = # text risk categories
      dplyr::case_when(
        abs_risk < 0.5 ~ "None",
        abs_risk >= 0.5 & abs_risk <= 1 ~ "Slight, Lower",
        abs_risk > 1 & abs_risk <= 1.5 ~ "Slight, Higher",
        abs_risk > 1.5 & abs_risk <= 2.0 ~ "Moderate, Lower",
        abs_risk > 2 & abs_risk <= 2.5 ~ "Moderate, Higher",
        abs_risk > 2.5 & abs_risk <= 3.0 ~ "Severe, Lower",
        abs_risk > 3.0 & abs_risk <= 3.5 ~ "Severe, Higher",
        abs_risk > 3.5 ~ "Extreme"
      )
    ) %>%
    dplyr::mutate(
      rel_diff = (BGM - REF) / REF, # relative diff
      abs_rel_diff = abs(rel_diff), # abs relative diff
      sq_rel_diff = rel_diff^2,
      iso_diff =
      if_else(REF >= 100, # condition 1
        100 * abs(BGM - REF) / REF, # T 1
        if_else(REF < 100, # condition 2
          abs(BGM - REF), # T 2
          NA_real_
        ), # F 2
        NA_real_
      ), # F1
      iso_range = # # 4.3.16 create iso range variable ----
      dplyr::case_when(
          # # A tibble: 5 x 2
          #      ID iso_range
          #   <int> <chr>
          # 1     1 <= 5% or 5 mg/dL
          # 2     2 > 5 - 10% or mg/dL
          # 3     3 > 10 - 15% or mg/dL
          # 4     4 > 15 - 20% mg/dL
          # 5     5 > 20% or 20 mg/dL
        iso_diff <= 5 ~ "<= 5% or 5 mg/dL",
        iso_diff > 5 & iso_diff <= 10 ~ "> 5 - 10% or mg/dL",
        iso_diff > 10 & iso_diff <= 15 ~ "> 10 - 15% or mg/dL",
        iso_diff > 15 & iso_diff <= 20 ~ "> 15 - 20% mg/dL",
        iso_diff > 20 ~ "> 20% or 20 mg/dL"),

      risk_grade = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "A",
        abs_risk >= 0.5 & abs_risk < 1.0 ~ "B",
        abs_risk >= 1.0 & abs_risk < 2.0 ~ "C",
        abs_risk >= 2.0 & abs_risk < 3.0 ~ "D",
        abs_risk >= 3.0 ~ "E"
      ),
      risk_grade_txt = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "0 - 0.5",
        abs_risk >= 0.5 & abs_risk < 1.0 ~ "> 0.5 - 1.0",
        abs_risk >= 1.0 & abs_risk < 2.0 ~ "> 1.0 - 2.0",
        abs_risk >= 2.0 & abs_risk < 3.0 ~ "> 2.0 - 3.0",
        abs_risk >= 3.0 ~ "> 3.0"
      )
    )
}

Error

When I try this with the full_sample_repo, I get the following error:

segTable(dat = full_sample_repo)
#> Error in loadNamespace(x): there is no package called 'readr'

SEG Risk Category Columns

I’ll split segTable() into two parts to identify the bug:

seg_risk_cat_cols() imports the AppLookUpRiskCat.csv and AppRiskPairData.csv inside the function.

seg_risk_cat_cols <- function(df) {
  github_data_root <-
    "https://raw.githubusercontent.com/mjfrigaard/seg-shiny-data/master/Data/"

  # import LookUpRiskCat -----
  app_lookup_repo <- base::paste0(
    github_data_root,
    "AppLookUpRiskCat.csv"
  )
  LookUpRiskCat <- vroom::vroom(file = app_lookup_repo, delim = ",")

  # import RiskPairData -----
  app_riskpair_repo <- base::paste0(
    github_data_root,
    "AppRiskPairData.csv"
  )
  RiskPairData <- vroom::vroom(file = app_riskpair_repo, delim = ",")

  # directly manipulate df
  bgm_ref_tbl <- dplyr::mutate(df,
    BGM = as.double(BGM),
    REF = as.double(REF)
  )

  # create bgm_pair_cat ----
  bgm_pair_cat_tbl <- dplyr::mutate(bgm_ref_tbl,
    bgm_pair_cat =
      dplyr::case_when(
        BGM < REF ~ "BGM < REF",
        BGM == REF ~ "BGM = REF",
        BGM > REF ~ "BGM > REF"
      )
  )
  # create ref_pair_2cat ----
  ref_pair_2cat_tbl <- dplyr::mutate(bgm_pair_cat_tbl,
    ref_pair_2cat =
      dplyr::case_when(
        REF > 600 ~ "REF > 600: Excluded from SEG Analysis",
        REF < 21 & REF <= 600 ~ "REF <21: Included in SEG Analysis"
      )
  )
  # create included ----
  included_tbl <- dplyr::mutate(ref_pair_2cat_tbl,
    included =
      dplyr::case_when(
        REF <= 600 ~ "Total included in SEG Analysis",
        REF > 600 ~ "Total excluded in SEG Analysis"
      )
  )
  # join to RiskPairData ----
  risk_pair_tbl <- dplyr::inner_join(included_tbl,
    y = RiskPairData,
    by = c("BGM", "REF")
  )
  # Create risk_cat variable ----
  risk_cat_tbl <- dplyr::mutate(risk_pair_tbl,
    risk_cat =
      base::findInterval(
        x = abs_risk, # the abs_risk absolute value
        vec = LookUpRiskCat$ABSLB, # the lower bound absolute risk
        left.open = TRUE
      ) - 1
  )
  # Join to LookUpRiskCat data ----
  risk_cat_join_tbl <- dplyr::inner_join(
    x = risk_cat_tbl,
    y = LookUpRiskCat, # inner join to look-up
    by = "risk_cat"
  )
  # create text risk categories ----
  seg_risk_cat_cols_tbl <- dplyr::mutate(risk_cat_join_tbl,
      risk_cat_txt = 
      dplyr::case_when(
        abs_risk < 0.5 ~ "None",
        abs_risk >= 0.5 & abs_risk <= 1 ~ "Slight, Lower",
        abs_risk > 1 & abs_risk <= 1.5 ~ "Slight, Higher",
        abs_risk > 1.5 & abs_risk <= 2.0 ~ "Moderate, Lower",
        abs_risk > 2 & abs_risk <= 2.5 ~ "Moderate, Higher",
        abs_risk > 2.5 & abs_risk <= 3.0 ~ "Severe, Lower",
        abs_risk > 3.0 & abs_risk <= 3.5 ~ "Severe, Higher",
        abs_risk > 3.5 ~ "Extreme"
      )
    )
  return(seg_risk_cat_cols_tbl)
}

I test seg_risk_cat_cols() with test_vand_comp_data:

seg_risk_cat_cols(df = test_vand_comp_data)
#> # A tibble: 9,868 × 12
#>      BGM   REF bgm_pair_cat ref_pair_2cat included         RiskPairID RiskFactor
#>    <dbl> <dbl> <chr>        <chr>         <chr>                 <dbl>      <dbl>
#>  1   121   127 BGM < REF    NA            Total included …      72849    0.00254
#>  2   212   223 BGM < REF    NA            Total included …     127636    0.0280 
#>  3   161   166 BGM < REF    NA            Total included …      96928    0      
#>  4   191   205 BGM < REF    NA            Total included …     114997    0.206  
#>  5   189   210 BGM < REF    NA            Total included …     113800    0.209  
#>  6   104   100 BGM > REF    NA            Total included …      62605   -0.125  
#>  7   293   296 BGM < REF    NA            Total included …     176390    0      
#>  8   130   142 BGM < REF    NA            Total included …      78273    0.267  
#>  9   261   231 BGM > REF    NA            Total included …     157093   -0.285  
#> 10   147   148 BGM < REF    NA            Total included …      88496    0      
#> # ℹ 9,858 more rows
#> # ℹ 5 more variables: abs_risk <dbl>, risk_cat <dbl>, ABSLB <dbl>, ABSUB <dbl>,
#> #   risk_cat_txt <chr>

SEG ISO Range Columns

seg_iso_cols() creates the ISO columns:

seg_iso_cols <- function(risk_cat_cols) {
    iso_cols_tbl <- dplyr::mutate(risk_cat_cols, 
      rel_diff = (BGM - REF) / REF, # relative diff
      abs_rel_diff = abs(rel_diff), # abs relative diff
      sq_rel_diff = rel_diff^2,
      iso_diff =
      if_else(REF >= 100, # condition 1
        100 * abs(BGM - REF) / REF, # T 1
        if_else(REF < 100, # condition 2
          abs(BGM - REF), # T 2
          NA_real_
        ), # F 2
        NA_real_
      ), # F1
      iso_range = # # 4.3.16 create iso range variable ----
      dplyr::case_when(
        iso_diff <= 5 ~ "<= 5% or 5 mg/dL",
        iso_diff > 5 & iso_diff <= 10 ~ "> 5 - 10% or mg/dL",
        iso_diff > 10 & iso_diff <= 15 ~ "> 10 - 15% or mg/dL",
        iso_diff > 15 & iso_diff <= 20 ~ "> 15 - 20% mg/dL",
        iso_diff > 20 ~ "> 20% or 20 mg/dL"),

      risk_grade = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "A",
        abs_risk >= 0.5 & abs_risk < 1.0 ~ "B",
        abs_risk >= 1.0 & abs_risk < 2.0 ~ "C",
        abs_risk >= 2.0 & abs_risk < 3.0 ~ "D",
        abs_risk >= 3.0 ~ "E"
      ),
      risk_grade_txt = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "0 - 0.5",
        abs_risk >= 0.5 & abs_risk < 1.0 ~ "> 0.5 - 1.0",
        abs_risk >= 1.0 & abs_risk < 2.0 ~ "> 1.0 - 2.0",
        abs_risk >= 2.0 & abs_risk < 3.0 ~ "> 2.0 - 3.0",
        abs_risk >= 3.0 ~ "> 3.0"
      )
    )
    return(iso_cols_tbl)
}

seg_iso_cols() accepts the output from seg_risk_cat_cols() and returns a dataset with all the risk/ISO variables.

risk_cat_vars <- seg_risk_cat_cols(df = test_vand_comp_data)
iso_vars_tbl <- seg_iso_cols(risk_cat_cols = risk_cat_vars)
glimpse(iso_vars_tbl)
#> Rows: 9,868
#> Columns: 19
#> $ BGM            <dbl> 121, 212, 161, 191, 189, 104, 293, 130, 261, 147, 83, 1…
#> $ REF            <dbl> 127, 223, 166, 205, 210, 100, 296, 142, 231, 148, 81, 1…
#> $ bgm_pair_cat   <chr> "BGM < REF", "BGM < REF", "BGM < REF", "BGM < REF", "BG…
#> $ ref_pair_2cat  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ included       <chr> "Total included in SEG Analysis", "Total included in SE…
#> $ RiskPairID     <dbl> 72849, 127636, 96928, 114997, 113800, 62605, 176390, 78…
#> $ RiskFactor     <dbl> 0.0025445, 0.0279900, 0.0000000, 0.2061100, 0.2086500, …
#> $ abs_risk       <dbl> 0.0025445, 0.0279900, 0.0000000, 0.2061100, 0.2086500, …
#> $ risk_cat       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
#> $ ABSLB          <dbl> -0.001, -0.001, -0.001, -0.001, -0.001, -0.001, -0.001,…
#> $ ABSUB          <dbl> 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, …
#> $ risk_cat_txt   <chr> "None", "None", "None", "None", "None", "None", "None",…
#> $ rel_diff       <dbl> -0.047244094, -0.049327354, -0.030120482, -0.068292683,…
#> $ abs_rel_diff   <dbl> 0.047244094, 0.049327354, 0.030120482, 0.068292683, 0.1…
#> $ sq_rel_diff    <dbl> 2.232004e-03, 2.433188e-03, 9.072434e-04, 4.663891e-03,…
#> $ iso_diff       <dbl> 4.7244094, 4.9327354, 3.0120482, 6.8292683, 10.0000000,…
#> $ iso_range      <chr> "<= 5% or 5 mg/dL", "<= 5% or 5 mg/dL", "<= 5% or 5 mg/…
#> $ risk_grade     <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", …
#> $ risk_grade_txt <chr> "0 - 0.5", "0 - 0.5", "0 - 0.5", "0 - 0.5", "0 - 0.5", …

Combine SEG Risk Category and ISO Range columns

seg_risk_vars() is a wrapper around seg_risk_cat_cols() and seg_iso_cols():

seg_risk_vars <- function(df) {
  seg_risk_cat_cols <- function(df) {
  github_data_root <-
    "https://raw.githubusercontent.com/mjfrigaard/seg-shiny-data/master/Data/"

  # import LookUpRiskCat -----
  app_lookup_repo <- base::paste0(
    github_data_root,
    "AppLookUpRiskCat.csv"
  )
  LookUpRiskCat <- vroom::vroom(file = app_lookup_repo, delim = ",")

  # import RiskPairData -----
  app_riskpair_repo <- base::paste0(
    github_data_root,
    "AppRiskPairData.csv"
  )
  RiskPairData <- vroom::vroom(file = app_riskpair_repo, delim = ",")

  # directly manipulate df
  bgm_ref_tbl <- dplyr::mutate(df,
    BGM = as.double(BGM),
    REF = as.double(REF)
  )

  # create bgm_pair_cat ----
  bgm_pair_cat_tbl <- dplyr::mutate(bgm_ref_tbl,
    bgm_pair_cat =
      dplyr::case_when(
        BGM < REF ~ "BGM < REF",
        BGM == REF ~ "BGM = REF",
        BGM > REF ~ "BGM > REF"
      )
  )
  # create ref_pair_2cat ----
  ref_pair_2cat_tbl <- dplyr::mutate(bgm_pair_cat_tbl,
    ref_pair_2cat =
      dplyr::case_when(
        REF > 600 ~ "REF > 600: Excluded from SEG Analysis",
        REF < 21 & REF <= 600 ~ "REF <21: Included in SEG Analysis"
      )
  )
  # create included ----
  included_tbl <- dplyr::mutate(ref_pair_2cat_tbl,
    included =
      dplyr::case_when(
        REF <= 600 ~ "Total included in SEG Analysis",
        REF > 600 ~ "Total excluded in SEG Analysis"
      )
  )
  # join to RiskPairData ----
  risk_pair_tbl <- dplyr::inner_join(included_tbl,
    y = RiskPairData,
    by = c("BGM", "REF")
  )
  # Create risk_cat variable ----
  risk_cat_tbl <- dplyr::mutate(risk_pair_tbl,
    risk_cat =
      base::findInterval(
        x = abs_risk, # the abs_risk absolute value
        vec = LookUpRiskCat$ABSLB, # the lower bound absolute risk
        left.open = TRUE
      ) - 1
  )
  # Join to LookUpRiskCat data ----
  risk_cat_join_tbl <- dplyr::inner_join(
    x = risk_cat_tbl,
    y = LookUpRiskCat, # inner join to look-up
    by = "risk_cat"
  )
  # create text risk categories ----
  seg_risk_cat_cols_tbl <- dplyr::mutate(risk_cat_join_tbl,
      risk_cat_txt = 
      dplyr::case_when(
        abs_risk < 0.5 ~ "None",
        abs_risk >= 0.5 & abs_risk <= 1 ~ "Slight, Lower",
        abs_risk > 1 & abs_risk <= 1.5 ~ "Slight, Higher",
        abs_risk > 1.5 & abs_risk <= 2.0 ~ "Moderate, Lower",
        abs_risk > 2 & abs_risk <= 2.5 ~ "Moderate, Higher",
        abs_risk > 2.5 & abs_risk <= 3.0 ~ "Severe, Lower",
        abs_risk > 3.0 & abs_risk <= 3.5 ~ "Severe, Higher",
        abs_risk > 3.5 ~ "Extreme"
      )
    )
  return(seg_risk_cat_cols_tbl)
  }
  
  risk_cat_cols <- seg_risk_cat_cols(df = test_vand_comp_data)

  seg_iso_cols <- function(risk_cat_cols) {
    iso_cols_tbl <- dplyr::mutate(risk_cat_cols, 
      rel_diff = (BGM - REF) / REF, # relative diff
      abs_rel_diff = abs(rel_diff), # abs relative diff
      sq_rel_diff = rel_diff^2,
      iso_diff =
      if_else(REF >= 100, # condition 1
        100 * abs(BGM - REF) / REF, # T 1
        if_else(REF < 100, # condition 2
          abs(BGM - REF), # T 2
          NA_real_
        ), # F 2
        NA_real_
      ), # F1
      iso_range = # # 4.3.16 create iso range variable ----
      dplyr::case_when(
        iso_diff <= 5 ~ "<= 5% or 5 mg/dL",
        iso_diff > 5 & iso_diff <= 10 ~ "> 5 - 10% or mg/dL",
        iso_diff > 10 & iso_diff <= 15 ~ "> 10 - 15% or mg/dL",
        iso_diff > 15 & iso_diff <= 20 ~ "> 15 - 20% mg/dL",
        iso_diff > 20 ~ "> 20% or 20 mg/dL"),

      risk_grade = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "A",
        abs_risk >= 0.5 & abs_risk < 1.0 ~ "B",
        abs_risk >= 1.0 & abs_risk < 2.0 ~ "C",
        abs_risk >= 2.0 & abs_risk < 3.0 ~ "D",
        abs_risk >= 3.0 ~ "E"
      ),
      risk_grade_txt = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "0 - 0.5",
        abs_risk >= 0.5 & abs_risk < 1.0 ~ "> 0.5 - 1.0",
        abs_risk >= 1.0 & abs_risk < 2.0 ~ "> 1.0 - 2.0",
        abs_risk >= 2.0 & abs_risk < 3.0 ~ "> 2.0 - 3.0",
        abs_risk >= 3.0 ~ "> 3.0"
      )
    )
    return(iso_cols_tbl)
  }
  
  risk_vars <- seg_iso_cols(risk_cat_cols = risk_cat_cols)
  return(risk_vars)
}

Below I check the differences between seg_risk_vars() and the iso_vars_tbl output:

risk_vars_tbl <- seg_risk_vars(df = test_vand_comp_data)
waldo::compare(x = iso_vars_tbl, y = risk_vars_tbl)
#> ✔ No differences

Pair Type Table

The original pairtypeTable() function is below:

pairtypeTable <- function(dat) {
  # 2.1.1 - import data frame/define SampMeasData ----
  SampMeasData <- readr::read_csv(dat)
  # 2.1.2 - convert the columns to numeric  ----
  SampMeasData <- SampMeasData |> 
    dplyr::mutate(BGM = as.double(BGM),
                  REF = as.double(REF |> 
    # 2.2 create bgm_pair_cat ---- 
  dplyr::mutate(
    bgm_pair_cat =
      dplyr::case_when(
        BGM < REF ~ "BGM < REF",
        BGM == REF ~ "BGM = REF",
        BGM > REF ~ "BGM > REF"
      )
  ) |> 
    # 2.3 create excluded ---- 
  dplyr::mutate(
    excluded =
      dplyr::case_when(
        REF > 600 ~ "REF > 600: Excluded from SEG Analysis",
        TRUE ~ NA_character_
      )
  ) |> 
    # 2.4 create included ---- 
  dplyr::mutate(
    included =
      dplyr::case_when(
        REF <= 600 ~ "Total included in SEG Analysis",
        REF > 600 ~ "Total excluded in SEG Analysis"
      )
  )
  # 2.5 create BGMPairs ----
  BGMPairs <- SampMeasData  |> 
    dplyr::count(bgm_pair_cat)  |> 
    dplyr::rename(
      `Pair Type` = bgm_pair_cat,
      Count = n
    )
  # 2.6 create Excluded ----
  Excluded <- SampMeasData  |> 
    dplyr::count(excluded) |> 
    dplyr::rename(
      `Pair Type` = excluded,
      Count = n
    )  |> 
    dplyr::filter(!is.na(`Pair Type`))
  # 2.7 create Included ----
  Included <- SampMeasData  |> 
    dplyr::count(included)  |> %
    dplyr::rename(
      `Pair Type` = included,
      Count = n
    ) |> 
    dplyr::filter(`Pair Type` == "Total included in SEG Analysis")
  # 2.8 create PairTypeTable ----
  PairTypeTable <- dplyr::bind_rows(BGMPairs, 
                                    Excluded, 
                                    Included)
  # 2.9 add the Total row  ----
  PairTypeTable <- PairTypeTable |> tibble::add_row(
    `Pair Type` = "Total",
    Count = nrow(SampMeasData),
    .after = 0
  )
  return(PairTypeTable)
}

I’ve re-written this as seg_pair_type_tbl()

`seg_pair_type_tbl()`

seg_pair_type_tbl <- function(df) {
  # manipulate data directly 
  samp_meas_data <- dplyr::mutate(.data = df, 
      BGM = as.double(BGM),
      REF = as.double(REF),
    # create bgm_pair_cat ----
    bgm_pair_cat =
      dplyr::case_when(
        BGM < REF ~ "BGM < REF",
        BGM == REF ~ "BGM = REF",
        BGM > REF ~ "BGM > REF"
      ),
    # create excluded ---- 
    excluded =
      dplyr::case_when(
        REF > 600 ~ "REF > 600: Excluded from SEG Analysis",
        TRUE ~ NA_character_
      ),
    # create included ---- 
    included =
      dplyr::case_when(
        REF <= 600 ~ "Total included in SEG Analysis",
        REF > 600 ~ "Total excluded in SEG Analysis"
      )
  )
  
  # create bgm_pair_cnt_tbl ----
  bgm_pair_cnt_tbl <- dplyr::count(samp_meas_data, 
    bgm_pair_cat)
  # create bgm_pairs_tbl ----
  bgm_pairs_tbl <- dplyr::rename(.data = bgm_pair_cnt_tbl, 
                      `Pair Type` = bgm_pair_cat,
                      Count = n)
  
  # create excluded_cnts_tbl ----
  excluded_cnts_tbl <- dplyr::count(samp_meas_data, excluded) 
  # create excluded_pairs_tbl ----
  excluded_pairs_tbl <- dplyr::rename(excluded_cnts_tbl,
      `Pair Type` = excluded,
      Count = n) |> 
    dplyr::filter(!is.na(`Pair Type`))
  
  # create Included ----
  included_cnts_tbl <- dplyr::count(samp_meas_data, included)
  
  included_pairs_tbl <- dplyr::rename(included_cnts_tbl,
      `Pair Type` = included,
      Count = n
    ) |> 
    dplyr::filter(`Pair Type` == "Total included in SEG Analysis")
  
  # 2.8 create pair_types ----
  pair_types <- dplyr::bind_rows(bgm_pairs_tbl, 
                                    excluded_pairs_tbl, 
                                    included_pairs_tbl)
  # 2.9 add the Total row  ----
  pair_type_tbl <- tibble::add_row(pair_types,
    `Pair Type` = "Total",
    Count = nrow(samp_meas_data),
    .after = 0
  )
  return(pair_type_tbl)
}

When I test this function on the full_sample_repo data (VanderbiltComplete.csv), I see the following:

seg_pair_type_tbl(df = test_vand_comp_data)
#> # A tibble: 6 × 2
#>   `Pair Type`                           Count
#>   <chr>                                 <int>
#> 1 Total                                  9891
#> 2 BGM < REF                              4710
#> 3 BGM = REF                               479
#> 4 BGM > REF                              4702
#> 5 REF > 600: Excluded from SEG Analysis    23
#> 6 Total included in SEG Analysis         9868

Application `PairTypeTable`

After uploading VanderbiltComplete.csv, the first table in the Summary Tables tab is the pairs table:

I’ve also stored these as app_pairs_tbl:

app_pairs_tbl = tibble::as_tibble(data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  `Pair Type` = c(
    "Total",
    "BGM < REF",
    "BGM = REF",
    "BGM > REF",
    "REF > 600: Excluded from SEG Analysis",
    "Total included in SEG Analysis"
  ),
  Count = c(9891L, 4710L, 479L, 4702L, 23L, 9868L)
))

Pair Type	Count
Total	9891
BGM < REF	4710
BGM = REF	479
BGM > REF	4702
REF > 600: Excluded from SEG Analysis	23
Total included in SEG Analysis	9868

Test

To test if the two tables are identical, I use testthat’s testthat::expect_equal() function, with object set to seg_pair_type_tbl(dat = test_vand_comp_data) and expected set to app_pairs_tbl.

testthat::test_that("Test seg_pair_type_tbl()", {
  testthat::expect_equal(
    # function table
    object = seg_pair_type_tbl(df = test_vand_comp_data), 
    # application table
    expected = app_pairs_tbl
  )
})
#> Test passed 🌈

MARD Table

The MARDTable is created in the server function of the application, starting here.

I’ve converted it into a function, seg_mard_tbl(), which takes the output from seg_risk_vars():

`seg_mard_tbl()`

seg_mard_tbl <- function(risk_vars) {
  mard_cols <- data.frame(
    Total = c(nrow(risk_vars)),
    Bias = c(mean(risk_vars$rel_diff)),
    MARD = c(mean(risk_vars$abs_rel_diff)),
    CV = c(sd(risk_vars$rel_diff)),
    stringsAsFactors = FALSE,
    check.names = FALSE
  )

  lower_tbl <- tibble::add_column(
    .data = mard_cols,
    `Lower 95% Limit of Agreement` = mard_cols$Bias - 1.96 * mard_cols$CV
  )
  upper_tbl <- tibble::add_column(
    .data = lower_tbl,
    `Upper 95% Limit of Agreement` = mard_cols$Bias + 1.96 * mard_cols$CV
  )
  mard_vars <- dplyr::mutate(
    .data = upper_tbl,
    Bias = base::paste0(base::round(
      100 * Bias,
      digits = 1
    ), "%"),
    MARD = base::paste0(base::round(
      100 * MARD,
      digits = 1
    ), "%"),
    CV = base::paste0(base::round(
      100 * CV,
      digits = 1
    ), "%"),
    `Lower 95% Limit of Agreement` = base::paste0(base::round(
      100 * `Lower 95% Limit of Agreement`,
      digits = 1
    ), "%"),
    `Upper 95% Limit of Agreement` = base::paste0(base::round(
      100 * `Upper 95% Limit of Agreement`,
      digits = 1
    ), "%")
  )
  mard_vars_tbl <- tibble::as_tibble(mard_vars)
  return(mard_vars_tbl)
}

Below I test the seg_mard_tbl() function

seg_mard_tbl(risk_vars = risk_vars_tbl)

Total	Bias	MARD	CV	Lower 95% Limit of Agreement	Upper 95% Limit of Agreement
9868	0.6%	7%	14.8%	-28.3%	29.6%

Application `MARDTable`

The MARD table from the application is below:

Once again, I will store the app’s MARD table into and object I can test using datapasta::tribble_paste() (as app_mard_tbl).

app_mard_tbl <- tibble::tibble(
    Total = 9868L,
    Bias = "0.6%",
    MARD = "7%",
    CV = "14.8%",
    `Lower 95% Limit of Agreement` = "-28.3%",
    `Upper 95% Limit of Agreement` = "29.6%")
app_mard_tbl

Total	Bias	MARD	CV	Lower 95% Limit of Agreement	Upper 95% Limit of Agreement
9868	0.6%	7%	14.8%	-28.3%	29.6%

Test

Below I test both MARD tables using testthat::expect_equal() again:

testthat::test_that("Test MARD table values", {
testthat::expect_equal(
  # function table
  object = seg_mard_tbl(risk_vars = risk_vars_tbl),
  # application table
  expected = app_mard_tbl
  )
})
#> Test passed 😸

Risk Grade Table

The RiskGradeTable3 is also created in the server, which I’ve wrapped into a function:

seg_risk_grade_tbl <- function(risk_vars) {
  risk_grade_cnts <- dplyr::count(risk_vars,
    risk_grade,
    sort = TRUE
  )

  lkpRiskGrade <- tibble::tibble(
      risk_grade_id = c(1, 2, 3, 4, 5),
      risk_grade = c("A", "B", "C", "D", "E"),
      REF = c("0 - 0.5", "> 0.5 - 1.0", "> 1.0 - 2.0", "> 2.0 - 3.0", "> 3.0"))
  
  risk_grade_joined <- dplyr::full_join(
    x = risk_grade_cnts,
    y = lkpRiskGrade,
    by = "risk_grade"
  )

  # change lkp table variables
  risk_grade_vars_tbl <- dplyr::mutate(
    .data = risk_grade_joined,
    risk_grade_id = as.numeric(risk_grade_id),
    Percent = base::paste0(
      base::round(n / nrow(risk_vars) * 100,
        digits = 1
      ),
      if_else(condition = is.na(n),
        true = "", false = "%"
      )
    )
  ) |> 
    # rename variables
    dplyr::select(
      ID = risk_grade_id,
      `Risk Grade` = risk_grade,
      `Number of Pairs` = n,
      Percent,
      # `REF Range` = REF
      `Risk Factor Range` = REF
    )
  
  return(risk_grade_vars_tbl)
}

seg_risk_grade_tbl(risk_vars = risk_vars_tbl)

ID	Risk Grade	Number of Pairs	Percent	Risk Factor Range
1	A	9474	96%	0 - 0.5
2	B	294	3%	> 0.5 - 1.0
3	C	79	0.8%	> 1.0 - 2.0
4	D	21	0.2%	> 2.0 - 3.0
5	E	NA	NA	> 3.0

I’ll compare this to the application output below:

Application `RiskGradeTable`

But I can build it manually and wrap the output tibble in the data.frame).

app_risk_grade_tbl <- as.data.frame(
  tibble::tibble(
    ID = c(1L, 2L, 3L, 4L, 5L),
    `Risk Grade` = c('A', 'B', 'C', 'D', 'E'),
    `Number of Pairs` = c(9474L, 349L, 35L, 10L, NA_integer_),
    Percent = c('96%', '3.5%', '0.4%', '0.1%', NA_character_),
    `Risk Factor Range` = c('0 - 0.5', '> 0.5 - 1.5', '> 1.5 - 2.5',
      '> 2.5 - 3.5', '> 3.5'),
  )
)

app_risk_grade_tbl

ID	Risk Grade	Number of Pairs	Percent	Risk Factor Range
1	A	9474	96%	0 - 0.5
2	B	349	3.5%	> 0.5 - 1.5
3	C	35	0.4%	> 1.5 - 2.5
4	D	10	0.1%	> 2.5 - 3.5
5	E	NA	NA	> 3.5

I can see there are some discrepancies here due to the risk_grade and risk_grade_txt in the seg_iso_cols() function:

      risk_grade = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "A",
        abs_risk >= 0.5 & abs_risk < 1.5 ~ "B",
        abs_risk >= 1.5 & abs_risk < 2.5 ~ "C",
        abs_risk >= 2.5 & abs_risk < 3.5 ~ "D",
        abs_risk >= 3.5 ~ "E"
      ),
      risk_grade_txt = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "0 - 0.5",
        abs_risk >= 0.5 & abs_risk < 1.0 ~ "> 0.5 - 1.0",
        abs_risk >= 1.0 & abs_risk < 2.0 ~ "> 1.0 - 2.0",
        abs_risk >= 2.0 & abs_risk < 3.0 ~ "> 2.0 - 3.0",
        abs_risk >= 3.0 ~ "> 3.0"
      )

Update `seg_risk_vars()`

I remedy this issue in an updated seg_risk_vars()

seg_risk_vars <- function(df) {
  
  seg_risk_cat_cols <- function(df) {
    LookUpRiskCat <- data.frame(
      risk_cat = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L),
      ABSLB = c(-0.001, 0.5, 1, 1.5, 2, 2.5, 3, 3.5),
      ABSUB = c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 10)
    )

    # import RiskPairData -----
    RiskPairData <- segtools::RiskPairData

    # directly manipulate df
    bgm_ref_tbl <- dplyr::mutate(df,
      BGM = as.double(BGM),
      REF = as.double(REF)
    )

    # create bgm_pair_cat ----
    bgm_pair_cat_tbl <- dplyr::mutate(bgm_ref_tbl,
      bgm_pair_cat =
        dplyr::case_when(
          BGM < REF ~ "BGM < REF",
          BGM == REF ~ "BGM = REF",
          BGM > REF ~ "BGM > REF"
        )
    )
    # create ref_pair_2cat ----
    ref_pair_2cat_tbl <- dplyr::mutate(bgm_pair_cat_tbl,
      ref_pair_2cat =
        dplyr::case_when(
          REF > 600 ~ "REF > 600: Excluded from SEG Analysis",
          REF < 21 & REF <= 600 ~ "REF <21: Included in SEG Analysis"
        )
    )
    # create included ----
    included_tbl <- dplyr::mutate(ref_pair_2cat_tbl,
      included =
        dplyr::case_when(
          REF <= 600 ~ "Total included in SEG Analysis",
          REF > 600 ~ "Total excluded in SEG Analysis"
        )
    )
    # join to RiskPairData ----
    risk_pair_tbl <- dplyr::inner_join(included_tbl,
      y = RiskPairData,
      by = c("BGM", "REF")
    )
    # Create risk_cat variable ----
    risk_cat_tbl <- dplyr::mutate(risk_pair_tbl,
      risk_cat =
        base::findInterval(
          x = abs_risk, # the abs_risk absolute value
          vec = LookUpRiskCat$ABSLB, # the lower bound absolute risk
          left.open = TRUE
        ) - 1
    )
    # Join to LookUpRiskCat data ----
    risk_cat_join_tbl <- dplyr::inner_join(
      x = risk_cat_tbl,
      y = LookUpRiskCat, # inner join to look-up
      by = "risk_cat"
    )
    # create text risk categories ----
    seg_risk_cat_vars_tbl <- dplyr::mutate(risk_cat_join_tbl,
      risk_cat_txt =
        dplyr::case_when(
          abs_risk < 0.5 ~ "None",
          abs_risk >= 0.5 & abs_risk <= 1 ~ "Slight, Lower",
          abs_risk > 1 & abs_risk <= 1.5 ~ "Slight, Higher",
          abs_risk > 1.5 & abs_risk <= 2.0 ~ "Moderate, Lower",
          abs_risk > 2 & abs_risk <= 2.5 ~ "Moderate, Higher",
          abs_risk > 2.5 & abs_risk <= 3.0 ~ "Severe, Lower",
          abs_risk > 3.0 & abs_risk <= 3.5 ~ "Severe, Higher",
          abs_risk > 3.5 ~ "Extreme"
        )
    )
    return(seg_risk_cat_vars_tbl)
  }

  risk_cat_vars_tbl <- seg_risk_cat_cols(df = df)

  seg_iso_cols <- function(risk_cat_vars) {
    iso_vars_tbl <- dplyr::mutate(risk_cat_vars,
      rel_diff = (BGM - REF) / REF, # relative diff
      abs_rel_diff = abs(rel_diff), # abs relative diff
      sq_rel_diff = rel_diff^2,
      iso_diff =
        if_else(REF >= 100, # condition 1
          100 * abs(BGM - REF) / REF, # T 1
          if_else(REF < 100, # condition 2
            abs(BGM - REF), # T 2
            NA_real_
          ), # F 2
          NA_real_
        ), # F1
      iso_range = # # 4.3.16 create iso range variable ----
        dplyr::case_when(
          iso_diff <= 5 ~ "<= 5% or 5 mg/dL",
          iso_diff > 5 & iso_diff <= 10 ~ "> 5 - 10% or mg/dL",
          iso_diff > 10 & iso_diff <= 15 ~ "> 10 - 15% or mg/dL",
          iso_diff > 15 & iso_diff <= 20 ~ "> 15 - 20% mg/dL",
          iso_diff > 20 ~ "> 20% or 20 mg/dL"
        ),
      risk_grade = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "A",
        abs_risk >= 0.5 & abs_risk < 1.5 ~ "B",
        abs_risk >= 1.5 & abs_risk < 2.5 ~ "C",
        abs_risk >= 2.5 & abs_risk < 3.5 ~ "D",
        abs_risk >= 3.5 ~ "E"
      ),
      risk_grade_txt = dplyr::case_when(
        abs_risk >= 0.0 & abs_risk < 0.5 ~ "0 - 0.5",
        abs_risk >= 0.5 & abs_risk < 1.5 ~ "> 0.5 - 1.5",
        abs_risk >= 1.5 & abs_risk < 2.5 ~ "> 1.5 - 2.5",
        abs_risk >= 2.5 & abs_risk < 3.5 ~ "> 2.5 - 3.5",
        abs_risk >= 3.5 ~ "> 3.5"
      )
    )
    return(iso_vars_tbl)
  }

  iso_vars_tbl <- seg_iso_cols(risk_cat_vars = risk_cat_vars_tbl)

  return(iso_vars_tbl)
}

Now I check the seg_risk_vars() and seg_risk_grade_tbl() functions:

# risk_cat_vars_tbl <- seg_risk_cat_cols(df = test_vand_comp_data)
# risk_vars_tbl <- seg_iso_cols(risk_cat_vars = risk_cat_vars_tbl)
risk_vars_tbl <- seg_risk_vars(df = test_vand_comp_data)
risk_grade_tbl <- seg_risk_grade_tbl(risk_vars = risk_vars_tbl)
risk_grade_tbl
#> # A tibble: 5 × 5
#>      ID `Risk Grade` `Number of Pairs` Percent `Risk Factor Range`
#>   <dbl> <chr>                    <int> <chr>   <chr>              
#> 1     1 A                         9474 96%     0 - 0.5            
#> 2     2 B                          349 3.5%    > 0.5 - 1.0        
#> 3     3 C                           35 0.4%    > 1.0 - 2.0        
#> 4     4 D                           10 0.1%    > 2.0 - 3.0        
#> 5     5 E                           NA NA      > 3.0

Test

The risk grade table doesn’t necessarily require that we test the entire table structure–we can focus on the specific columns we care about in the output (like Number of Pairs)

testthat::expect_equal() uses the waldo package to make comparisons, so I’ll use that first here to check the Number of Pairs columns:

waldo::compare(
  x = risk_grade_tbl$`Number of Pairs`, 
  y = app_risk_grade_tbl$`Number of Pairs`
)
#> ✔ No differences

Now we can formally test the columns using testthat::expect_equal() and limit testing to only the Number of Pairs column:

testthat::test_that("Test risk grade table", {
testthat::expect_equal(
  # function table
  object = risk_grade_tbl$`Number of Pairs`, 
  # application table
  expected = app_risk_grade_tbl$`Number of Pairs`
  )
})
#> Test passed 🌈

Risk Category Table

The SEGRiskCategoryTable4 table is created in the app server function, and I’ve re-written it below as seg_risk_cat_tbl(risk_vars = )

seg_risk_cat_tbl <- function(risk_vars) {
  # count risk cats
risk_cat_cnts <- dplyr::count(risk_vars,
  risk_cat,
  sort = TRUE
)

# define lkpSEGRiskCat4 ----
lkpSEGRiskCat4 <- tibble::tibble(
  risk_cat = 0:7,
  risk_cat_txt = c(
    "None",
    "Slight, Lower",
    "Slight, Higher",
    "Moderate, Lower",
    "Moderate, Higher",
    "Severe, Lower",
    "Severe, Upper",
    "Extreme"
  ),
  ABSLB = c(-0.001, 0.5, 1, 1.5, 2, 2.5, 3, 3),
  ABSUB = c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 1000)
)

risk_cat_joined <- dplyr::full_join(
  x = risk_cat_cnts,
  y = lkpSEGRiskCat4,
  by = "risk_cat"
)

risk_cat_cols <- dplyr::mutate(
  risk_cat_joined,
  risk_cat = as.numeric(risk_cat),
  Percent = base::paste0(
    base::round(n / nrow(risk_vars) * 100,
      digits = 1
    ),
    if_else(
      condition = is.na(n),
      true = "",
      false = "%"
    )
  )
) |> 
  dplyr::arrange(desc(n))

risk_cat_vars <- dplyr::select(risk_cat_cols,
    `SEG Risk Level` = risk_cat,
    `SEG Risk Category` = risk_cat_txt,
    `Number of Pairs` = n,
    Percent
  )
return(risk_cat_vars)
}

seg_risk_cat_tbl(risk_vars_tbl)

SEG Risk Level	SEG Risk Category	Number of Pairs	Percent
0	None	9474	96%
1	Slight, Lower	294	3%
2	Slight, Higher	55	0.6%
3	Moderate, Lower	24	0.2%
4	Moderate, Higher	11	0.1%
5	Severe, Lower	10	0.1%
6	Severe, Upper	NA	NA
7	Extreme	NA	NA

Application `RiskCategoryTable`

Below is the app display of the risk level table.

The risk level table is also built with DT, and I’ve built it below as app_risk_level_tbl (for testing)

app_risk_level_tbl <- as.data.frame(
  tibble::tibble(
    `SEG Risk Level` = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L),
    `SEG Risk Category` = c(
      'None', 
      'Slight, Lower', 'Slight, Higher',
      'Moderate, Lower','Moderate, Higher',
      'Severe, Lower', 'Severe, Higher',
      'Extreme'
    ),
    `Number of Pairs` = c(9474L, 294L, 55L, 24L, 11L, 10L, NA_integer_, NA_integer_),
    Percent = c(
      '96%', '3%', '0.6%', '0.2%', '0.1%', '0.1%', NA_character_, NA_character_
    ),
  )
)

app_risk_level_tbl

SEG Risk Level	SEG Risk Category	Number of Pairs	Percent
0	None	9474	96%
1	Slight, Lower	294	3%
2	Slight, Higher	55	0.6%
3	Moderate, Lower	24	0.2%
4	Moderate, Higher	11	0.1%
5	Severe, Lower	10	0.1%
6	Severe, Higher	NA	NA
7	Extreme	NA	NA

Test

I’ll limit the test to the Number of Pairs column.

testthat::test_that("Test risk category pairs", {
testthat::expect_equal(
  # function table
  object = seg_risk_cat_tbl(risk_vars_tbl)$`Number of Pairs`, 
  # application table
  expected = app_risk_level_tbl$`Number of Pairs`
  )
})
#> Test passed 🥳

ISO Range Table

The ISORangeTable5 table is also created in the server function in the App.R file.

I’ve re-written this below as seg_iso_range_tbl():

seg_iso_range_tbl <- function(risk_vars) {

  lkpISORanges <- tibble::tribble(
       ~ID,                ~iso_range,
        1L,    "<= 5% or 5 mg/dL",
        2L,  "> 5 - 10% or mg/dL",
        3L, "> 10 - 15% or mg/dL",
        4L,    "> 15 - 20% mg/dL",
        5L,   "> 20% or 20 mg/dL")

  iso_range_cnts <- dplyr::count(risk_vars,
    iso_range,
    sort = TRUE
  )

  iso_range_joined <- dplyr::full_join(
    x = iso_range_cnts,
    y = lkpISORanges,
    by = "iso_range"
  )
    iso_range_vars <- dplyr::mutate(iso_range_joined,
    Percent = base::paste0(
      base::round(n / nrow(risk_vars) * 100,
        digits = 1
      ),
      dplyr::if_else(condition = is.na(n),
        true = "",
        false = "%"
      )
    )
  ) |>
    dplyr::arrange(desc(n))

  iso_range_vars_tbl <- dplyr::select(iso_range_vars, ID,
    `ISO range` = iso_range,
    N = n,
    Percent
  )
  return(iso_range_vars_tbl)
}

seg_iso_range_tbl(risk_vars_tbl)
#> # A tibble: 5 × 4
#>      ID `ISO range`             N Percent
#>   <int> <chr>               <int> <chr>  
#> 1     1 <= 5% or 5 mg/dL     5328 54%    
#> 2     2 > 5 - 10% or mg/dL   2842 28.8%  
#> 3     3 > 10 - 15% or mg/dL  1050 10.6%  
#> 4     4 > 15 - 20% mg/dL      340 3.4%   
#> 5     5 > 20% or 20 mg/dL     308 3.1%

Application `ISORangeTable`

Below is the output for the ISORangeTable in the application

I’ve re-created this as app_iso_range_tbl below:

app_iso_range_tbl <- tibble::tribble(
     ~ID,          ~`ISO range`,    ~N, ~Percent,
      1L,    "<= 5% or 5 mg/dL", 5328L,    "54%",
      2L,  "> 5 - 10% or mg/dL", 2842L,  "28.8%",
      3L, "> 10 - 15% or mg/dL", 1050L,  "10.6%",
      4L,    "> 15 - 20% mg/dL",  340L,   "3.4%",
      5L,   "> 20% or 20 mg/dL",  308L,   "3.1%"
     )
app_iso_range_tbl

ID	ISO range	N	Percent
1	<= 5% or 5 mg/dL	5328	54%
2	> 5 - 10% or mg/dL	2842	28.8%
3	> 10 - 15% or mg/dL	1050	10.6%
4	> 15 - 20% mg/dL	340	3.4%
5	> 20% or 20 mg/dL	308	3.1%

Test

Below we test the two ISORangeTable tables

testthat::test_that("Test risk grade table", {
testthat::expect_equal(
  # function table
  object = seg_iso_range_tbl(risk_vars_tbl),
  # application table
  expected = app_iso_range_tbl
  )
})
#> Test passed 😀

Compliant Pairs Table

The compliant pairs table is created with the binomialTable() function in the original application, and uses the output from segTable() (which I’ve replaced with seg_risk_vars()).

binomialTable <- function(dat) {

  dataset <- segTable(dat)

  CompliantPairs <- nrow(dataset) - base::nrow(dplyr::filter(
    dataset, iso_diff > 15)) %>%
  tibble(
    `Compliant Pairs` = .)
# CompliantPairs
# Then calculate the percent
CompliantPairs <- CompliantPairs %>%
  dplyr::mutate(
    `Compliant Pairs %` =
      base::paste0(base::round(
        100 * `Compliant Pairs` / nrow(dataset),
        1
      ), "%") )
# CompliantPairs
# create probability
prb <- 0.95
p_value <- 0.05
df_size <- nrow(dataset)
qbinom_tibble <- qbinom(
    p = p_value,
    size = df_size,
    prob = prb) %>%
    tibble(`value` = .) %>%
    # clean up this variable in the tibble for display
    dplyr::rename(`Lower Bound for Acceptance` = value)
# qbinom_tibble
QbinomTable <- qbinom_tibble %>%
  dplyr::mutate(
    `Lower Bound for Acceptance %` =
      base::paste0(base::round(
        100 * `Lower Bound for Acceptance` / nrow(dataset),
        1
      ), "%")
  )
BinomialTest6 <- bind_cols(CompliantPairs, QbinomTable)
BinomialTest6 <- BinomialTest6 %>% dplyr::mutate(
  Result =
    if_else(condition = `Compliant Pairs` < `Lower Bound for Acceptance`,
      true = paste0(
        BinomialTest6$`Compliant Pairs %`[1],
        " < ",
        BinomialTest6$`Lower Bound for Acceptance %`[1],
        " - Does not meet BGM Surveillance Study Accuracy Standard"
      ),
      false = paste0(
        BinomialTest6$`Compliant Pairs %`[1],
        " > ",
        BinomialTest6$`Lower Bound for Acceptance %`[1],
        " - Meets BGM Surveillance Study Accuracy Standard"
      )
    )
)
return(BinomialTest6)
}

I’ve re-written this below as seg_binom_tbl()

`seg_binom_tbl()`

seg_binom_tbl <- function(risk_vars) {
  
  compliant_pairs <- tibble(`Compliant Pairs` = 
        base::nrow(risk_vars) - base::nrow(dplyr::filter(risk_vars, iso_diff > 15)))

  # calculate the percent
  compliant_pairs_perc <-
    dplyr::mutate(compliant_pairs,
      `Compliant Pairs %` =
        base::paste0(base::round(
          100 * `Compliant Pairs` / nrow(risk_vars),
          1
        ), "%")
    )
  # create probability
  prb <- 0.95
  p_value <- 0.05
  df_size <- nrow(risk_vars)
  qbinom_vector <- qbinom(
    p = p_value,
    size = df_size,
    prob = prb
  )
    qbinom_tbl <- tibble(`Lower Bound for Acceptance` = qbinom_vector)
    # clean up this variable in the tibble for display
  # qbinom_tbl
    qbinom_tbl <- dplyr::mutate(qbinom_tbl,
      `Lower Bound for Acceptance %` =
        base::paste0(base::round(
          100 * `Lower Bound for Acceptance` / nrow(risk_vars),
          1
        ), "%")
    )

  binom_test_tbl <- dplyr::bind_cols(compliant_pairs_perc, qbinom_tbl)

  binom_table <- dplyr::mutate(binom_test_tbl,
    Result =
      dplyr::if_else(condition = `Compliant Pairs` < `Lower Bound for Acceptance`,
        true = paste0(
          binom_test_tbl$`Compliant Pairs %`[1],
          " < ",
          binom_test_tbl$`Lower Bound for Acceptance %`[1],
          " - Does not meet BGM Surveillance Study Accuracy Standard"
        ),
        false = paste0(
          binom_test_tbl$`Compliant Pairs %`[1],
          " > ",
          binom_test_tbl$`Lower Bound for Acceptance %`[1],
          " - Meets BGM Surveillance Study Accuracy Standard"
        )
      )
  )
  return(binom_table)
}

Below I check seg_binom_tbl() with the output from seg_risk_vars()

seg_binom_tbl(risk_vars = risk_vars_tbl)

Compliant Pairs	Compliant Pairs %	Lower Bound for Acceptance	Lower Bound for Acceptance %	Result
9220	93.4%	9339	94.6%	93.4% < 94.6% - Does not meet BGM Surveillance Study Accuracy Standard

Application `BinomialTest` table

Below is the binomial test table (aka the compliant pairs table) in the application.

I’ve re-created this table as app_binomial_tbl below:

app_binomial_tbl <- tibble::as_tibble(data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  `Compliant Pairs` = c(9220L),
  `Compliant Pairs %` = c("93.4%"),
  `Lower Bound for Acceptance` = c(9339L),
  `Lower Bound for Acceptance %` = c("94.6%"),
  Result = c("93.4% < 94.6% - Does not meet BGM Surveillance Study Accuracy Standard")
))
app_binomial_tbl

Compliant Pairs	Compliant Pairs %	Lower Bound for Acceptance	Lower Bound for Acceptance %	Result
9220	93.4%	9339	94.6%	93.4% < 94.6% - Does not meet BGM Surveillance Study Accuracy Standard

Test

We’ll do a final test of the binomial test table.

testthat::test_that("Test compliant pairs table", {
testthat::expect_equal(
  # function table
  object = seg_binom_tbl(risk_vars = risk_vars_tbl),
  # application table
  expected = app_binomial_tbl
  )
})
#> Test passed 🌈

Load test data

Application (version 1.3.3) functions

SEG variables

Error

SEG Risk Category Columns

SEG ISO Range Columns

Combine SEG Risk Category and ISO Range columns

Pair Type Table

seg_pair_type_tbl()

Application PairTypeTable

Test

MARD Table

seg_mard_tbl()

Application MARDTable

Test

Risk Grade Table

Application RiskGradeTable

Update seg_risk_vars()

Test

Risk Category Table

Application RiskCategoryTable

Test

ISO Range Table

Application ISORangeTable

Test

Compliant Pairs Table

seg_binom_tbl()

Application BinomialTest table

Test

`seg_pair_type_tbl()`

Application `PairTypeTable`

`seg_mard_tbl()`

Application `MARDTable`

Application `RiskGradeTable`

Update `seg_risk_vars()`

Application `RiskCategoryTable`

Application `ISORangeTable`

`seg_binom_tbl()`

Application `BinomialTest` table