Graphs
graphs.Rmd
Below are examples of the data visualizations using
inspectdf
in the Shiny app.
Data
We’ll need data to demonstrate how the inspect_plot()
function works. We’ll use the load_tt_data()
function to
collect four lists of data from the TidyTuesday
repo.
"Netflix Titles"
has a single dataset:
ttd <- load_tt_data("Netflix Titles")
#> INFO [2025-07-24 06:20:46] Starting import for netflix_titles.csv from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2021/2021-04-20/netflix_titles.csv
#> SUCCESS [2025-07-24 06:20:46] Successfully loaded netflix_titles.csv
purrr::map(.x = ttd, .f = attr, "clean_title")
#> $netflix_titles.csv
#> [1] "netflix_titles"
# for a list with two datasets, we'll use the `"Space Launches"` data
ttd2 <- load_tt_data("Space Launches")
#> INFO [2025-07-24 06:20:46] Starting import for agencies.csv from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2019/2019-01-15/agencies.csv
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded agencies.csv
#> INFO [2025-07-24 06:20:47] Starting import for launches.csv from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2019/2019-01-15/launches.csv
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded launches.csv
purrr::map(.x = ttd2, .f = attr, "clean_title")
#> $agencies.csv
#> [1] "space_launches"
#>
#> $launches.csv
#> [1] "space_launches"
# "Moore's Law" contains three datasets
ttd3 <- load_tt_data("Moore’s Law")
#> INFO [2025-07-24 06:20:47] Starting import for cpu.csv from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2019/2019-09-03/cpu.csv
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded cpu.csv
#> INFO [2025-07-24 06:20:47] Starting import for gpu.csv from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2019/2019-09-03/gpu.csv
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded gpu.csv
#> INFO [2025-07-24 06:20:47] Starting import for ram.csv from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2019/2019-09-03/ram.csv
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded ram.csv
purrr::map(.x = ttd3, .f = attr, "clean_title")
#> $cpu.csv
#> [1] "moores_law"
#>
#> $gpu.csv
#> [1] "moores_law"
#>
#> $ram.csv
#> [1] "moores_law"
# finally, we'll download the "LEGO database" data with 12 datasets into the
# ttd_many list:
ttd_many <- load_tt_data("LEGO database")
#> INFO [2025-07-24 06:20:47] Starting import for colors.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/colors.csv.gz
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded colors.csv.gz
#> INFO [2025-07-24 06:20:47] Starting import for elements.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/elements.csv.gz
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded elements.csv.gz
#> INFO [2025-07-24 06:20:47] Starting import for inventories.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/inventories.csv.gz
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded inventories.csv.gz
#> INFO [2025-07-24 06:20:47] Starting import for inventory_minifigs.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/inventory_minifigs.csv.gz
#> SUCCESS [2025-07-24 06:20:47] Successfully loaded inventory_minifigs.csv.gz
#> INFO [2025-07-24 06:20:47] Starting import for inventory_parts.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/inventory_parts.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded inventory_parts.csv.gz
#> INFO [2025-07-24 06:20:48] Starting import for inventory_sets.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/inventory_sets.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded inventory_sets.csv.gz
#> INFO [2025-07-24 06:20:48] Starting import for minifigs.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/minifigs.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded minifigs.csv.gz
#> INFO [2025-07-24 06:20:48] Starting import for part_categories.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/part_categories.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded part_categories.csv.gz
#> INFO [2025-07-24 06:20:48] Starting import for part_relationships.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/part_relationships.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded part_relationships.csv.gz
#> INFO [2025-07-24 06:20:48] Starting import for parts.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/parts.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded parts.csv.gz
#> INFO [2025-07-24 06:20:48] Starting import for sets.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/sets.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded sets.csv.gz
#> INFO [2025-07-24 06:20:48] Starting import for themes.csv.gz from https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2022/2022-09-06/themes.csv.gz
#> SUCCESS [2025-07-24 06:20:48] Successfully loaded themes.csv.gz
purrr::map(.x = ttd_many, .f = attr, "clean_title")
#> $colors.csv.gz
#> [1] "lego_database"
#>
#> $elements.csv.gz
#> [1] "lego_database"
#>
#> $inventories.csv.gz
#> [1] "lego_database"
#>
#> $inventory_minifigs.csv.gz
#> [1] "lego_database"
#>
#> $inventory_parts.csv.gz
#> [1] "lego_database"
#>
#> $inventory_sets.csv.gz
#> [1] "lego_database"
#>
#> $minifigs.csv.gz
#> [1] "lego_database"
#>
#> $part_categories.csv.gz
#> [1] "lego_database"
#>
#> $part_relationships.csv.gz
#> [1] "lego_database"
#>
#> $parts.csv.gz
#> [1] "lego_database"
#>
#> $sets.csv.gz
#> [1] "lego_database"
#>
#> $themes.csv.gz
#> [1] "lego_database"
Utility functions
The inspect_plot()
function has the following utility
functions:
Check numeric columns
Check if datasets have numerical columns:
check_ttd_num_cols(ttd = ttd3)
#> $cpu.csv
#> [1] "transistor_count" "date_of_introduction" "process"
#> [4] "area"
#>
#> $gpu.csv
#> [1] "transistor_count" "date_of_introduction" "process"
#> [4] "area"
#>
#> $ram.csv
#> [1] "capacity_bits" "transistor_count" "date_of_introduction"
#> [4] "process" "area"
- If so, do they have two or more?
Check categorical columns
Check if datasets have categorical (character) columns:
check_ttd_cat_cols(ttd3)
#> $cpu.csv
#> [1] "processor" "designer"
#>
#> $gpu.csv
#> [1] "processor" "designer_s" "manufacturer_s" "ref"
#>
#> $ram.csv
#> [1] "chip_name" "bit_units" "ram_type" "manufacturer_s"
#> [5] "ref"
Check list columns
Check list columns:
check_ttd_list_cols(ttd3)
#> $cpu.csv
#> [1] 0
#>
#> $gpu.csv
#> [1] 0
#>
#> $ram.csv
#> [1] 0
Check logical columns
Check logical columns:
check_ttd_log_cols(ttd3)
#> $cpu.csv
#> [1] 0
#>
#> $gpu.csv
#> [1] 0
#>
#> $ram.csv
#> [1] 0
Check date columns
Check date columns:
check_ttd_date_cols(ttd3)
#> $cpu.csv
#> [1] 0
#>
#> $gpu.csv
#> [1] 0
#>
#> $ram.csv
#> [1] 0
check_col_types()
These are wrapped in the check_col_types()
function:
identical( # check the
x = length(check_col_types(ttd = ttd3, type = "cat")),
y = length(check_ttd_cat_cols(ttd3))
)
#> [1] TRUE
identical(
x = length(check_col_types(ttd = ttd3, type = "date")),
y = length(check_ttd_date_cols(ttd3))
)
#> [1] TRUE
identical(
x = length(check_col_types(ttd = ttd3, type = "log")),
y = length(check_ttd_log_cols(ttd3))
)
#> [1] TRUE
identical(
x = length(check_col_types(ttd = ttd3, type = "num")),
y = length(check_ttd_num_cols(ttd3))
)
#> [1] TRUE
The functions above allow us to collect the following:
- Determine the number of datasets in the
ttd
list
- Determine the name of the datasets in the
ttd
list
- Determine the number of numerical columns in each dataset in the
ttd
list
- Determine the names of any intersecting numerical columns in each
dataset in the
ttd
list
- Determine the number of categorical columns in each dataset in the
ttd
list
- Determine the names of any intersecting categorical columns in each
dataset in the
ttd
list
- Determine the number of logical columns in each dataset in the
ttd
list
- Determine the number of date columns in each dataset in the
ttd
list
- Determine the names of any date columns in each dataset in the
ttd
list
- Determine the number of list columns in each dataset in the
ttd
list
- Determine the names of any list columns in each dataset in the
ttd
list
Function logic
Using the functions above, write the custom
inspect_plot()
function and have it adhere to the following
rules:
inspect_types()
if (length(ttd) == 1)
If one dataset in ttd
:
inspectdf::inspect_types(
df1 = ttd2[[1]],
df2 = NULL
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(ttd =
list("agencies.csv" = ttd2[[1]]),
plots = "types")
#> INFO [2025-07-24 06:20:49] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:20:49] Beginning plot type 'types'
#> SUCCESS [2025-07-24 06:20:50] Completed plot type 'types'
#> SUCCESS [2025-07-24 06:20:50] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) == 2)
If two datasets in ttd
:
check for intersecting columns
if columns intersect, store column names as
intersecting_columns
if (length(intersect(names(ttd2[[1]]), names(ttd2[[2]]))) > 0) {
intersecting_columns <- intersect(
x = names(ttd2[[1]]),
y = names(ttd2[[2]])
)
}
- Then use
intersecting_columns
to subsetttd
for theinspect_cat()
function:
inspectdf::inspect_types(
df1 = ttd2[[1]][intersecting_columns],
df2 = ttd2[[2]][intersecting_columns],
) |>
inspectdf::show_plot(text_labels = TRUE)
if (length(ttd) >= 3)
If more than two datasets in ttd
:
inspectdf::inspect_types(
df1 = ttd3[[1]],
df2 = NULL
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("cpu.csv" = ttd3[[1]]),
plots = "types"
)
#> INFO [2025-07-24 06:20:53] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:20:53] Beginning plot type 'types'
#> SUCCESS [2025-07-24 06:20:53] Completed plot type 'types'
#> SUCCESS [2025-07-24 06:20:53] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_types(
df1 = ttd3[[2]],
df2 = NULL
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("gpu.csv" = ttd3[[2]]),
plots = "types"
)
#> INFO [2025-07-24 06:20:55] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:20:55] Beginning plot type 'types'
#> SUCCESS [2025-07-24 06:20:56] Completed plot type 'types'
#> SUCCESS [2025-07-24 06:20:56] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_types(
df1 = ttd3[[3]],
df2 = NULL
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("ram.csv" = ttd3[[3]]),
plots = "types"
)
#> INFO [2025-07-24 06:20:58] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:20:58] Beginning plot type 'types'
#> SUCCESS [2025-07-24 06:20:58] Completed plot type 'types'
#> SUCCESS [2025-07-24 06:20:58] inspect_plot(): all requested plots finished
#> NULL
inspect_mem()
if (length(ttd) == 1)
If one dataset in ttd
:
inspectdf::inspect_mem(
df1 = ttd[[1]],
df2 = NULL) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("netflix_titles.csv" = ttd[[1]]),
plots = "mem"
)
#> INFO [2025-07-24 06:21:00] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:00] Beginning plot type 'mem'
#> SUCCESS [2025-07-24 06:21:00] Completed plot type 'mem'
#> SUCCESS [2025-07-24 06:21:00] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) == 2)
If two datasets in ttd
:
inspectdf::inspect_mem(
df1 = ttd2[[1]],
df2 = ttd2[[2]]) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list(
"agencies.csv" = ttd2[[1]],
"launches.csv" = ttd2[[2]]
),
plots = "mem")
#> INFO [2025-07-24 06:21:02] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:02] Beginning plot type 'mem'
#> SUCCESS [2025-07-24 06:21:03] Completed plot type 'mem'
#> SUCCESS [2025-07-24 06:21:03] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) >= 3)
If more than two datasets in ttd
:
inspectdf::inspect_mem(
df1 = ttd3[[1]],
df2 = NULL
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("cpu.csv" = ttd3[[1]]),
plots = "mem"
)
#> INFO [2025-07-24 06:21:05] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:05] Beginning plot type 'mem'
#> SUCCESS [2025-07-24 06:21:05] Completed plot type 'mem'
#> SUCCESS [2025-07-24 06:21:05] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_mem(
df1 = ttd3[[2]],
df2 = NULL
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("gpu.csv" = ttd3[[2]]),
plots = "mem"
)
#> INFO [2025-07-24 06:21:07] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:07] Beginning plot type 'mem'
#> SUCCESS [2025-07-24 06:21:07] Completed plot type 'mem'
#> SUCCESS [2025-07-24 06:21:07] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_mem(
df1 = ttd3[[3]],
df2 = NULL
) |>
inspectdf::show_plot(text_labels = TRUE)
# ... for all datasets in ttd list
inspect_plot(
ttd = list("ram.csv" = ttd3[[3]]),
plots = "mem"
)
#> INFO [2025-07-24 06:21:08] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:08] Beginning plot type 'mem'
#> SUCCESS [2025-07-24 06:21:09] Completed plot type 'mem'
#> SUCCESS [2025-07-24 06:21:09] inspect_plot(): all requested plots finished
#> NULL
inspect_cor()
if (length(ttd) == 1)
If one dataset in ttd
:
- check if there are at least 2 numeric columns in the datasets in the
ttd
list- Plot the dataset with at least two numeric columns
if (ttd_length(ttd2) == 2) { # check for two datasets
# check for two numerical columns in ttd[[1]]
if (length(check_ttd_num_cols(ttd = ttd2)[[1]]) == 2) {
# plot first dataset
inspectdf::inspect_cor(
df1 = ttd2[[1]],
df2 = NULL,
method = "pearson",
with_col = NULL,
alpha = 0.05
) |>
inspectdf::show_plot(text_labels = TRUE)
# check for two numerical columns in ttd[[2]]
} else if (length(check_ttd_num_cols(ttd = ttd2)[[2]]) >= 2) {
# plot second dataset
inspectdf::inspect_cor(
df1 = ttd2[[2]],
df2 = NULL,
method = "pearson",
with_col = NULL,
alpha = 0.05
) |>
inspectdf::show_plot(text_labels = TRUE)
} else {
logr_msg("Error", level = "INFO")
}
}
inspect_plot(
ttd = list(
"launches.csv" = ttd2[[2]]
),
plots = "cor"
)
#> INFO [2025-07-24 06:21:10] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:10] Beginning plot type 'cor'
#> SUCCESS [2025-07-24 06:21:10] Completed plot type 'cor'
#> SUCCESS [2025-07-24 06:21:10] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) == 2)
If two datasets in ttd
:
- check if there is at least 1 numeric column in each dataset from the
ttd
list
check_col_types(ttd2, type = "num")
#> $agencies.csv
#> [1] "count"
#>
#> $launches.csv
#> [1] "JD" "launch_year"
check_col_types(ttd3, type = "num")
#> $cpu.csv
#> [1] "transistor_count" "date_of_introduction" "process"
#> [4] "area"
#>
#> $gpu.csv
#> [1] "transistor_count" "date_of_introduction" "process"
#> [4] "area"
#>
#> $ram.csv
#> [1] "capacity_bits" "transistor_count" "date_of_introduction"
#> [4] "process" "area"
- Run
inspectdf::inspect_cor()
if at least one numerical column is in each dataset:
inspectdf::inspect_cor(
df1 = ttd3[[1]],
df2 = ttd3[[2]],
method = "pearson",
with_col = NULL,
alpha = 0.05
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list(
"cpu.csv" = ttd3[[1]],
"gpu.csv" = ttd3[[2]]
),
plots = "cor"
)
#> INFO [2025-07-24 06:21:12] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:12] Beginning plot type 'cor'
#> SUCCESS [2025-07-24 06:21:12] Completed plot type 'cor'
#> SUCCESS [2025-07-24 06:21:12] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) >= 3)
If more than two datasets in ttd
:
- check if there is at least 1 numeric column in each of the datasets
from the
ttd
list:
check_col_types(ttd3, type = "num")
#> $cpu.csv
#> [1] "transistor_count" "date_of_introduction" "process"
#> [4] "area"
#>
#> $gpu.csv
#> [1] "transistor_count" "date_of_introduction" "process"
#> [4] "area"
#>
#> $ram.csv
#> [1] "capacity_bits" "transistor_count" "date_of_introduction"
#> [4] "process" "area"
- Run the following on all datasets with at least two numerical columns.
inspectdf::inspect_cor(
df1 = ttd3[[1]],
df2 = NULL,
method = "pearson",
with_col = NULL,
alpha = 0.05
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("cpu.csv" = ttd3[[1]]),
plots = "cor")
#> INFO [2025-07-24 06:21:14] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:14] Beginning plot type 'cor'
#> SUCCESS [2025-07-24 06:21:14] Completed plot type 'cor'
#> SUCCESS [2025-07-24 06:21:14] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_cor(
df1 = ttd3[[2]],
df2 = NULL,
method = "pearson",
with_col = NULL,
alpha = 0.05
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("gpu.csv" = ttd3[[2]]),
plots = "cor")
#> INFO [2025-07-24 06:21:16] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:16] Beginning plot type 'cor'
#> SUCCESS [2025-07-24 06:21:16] Completed plot type 'cor'
#> SUCCESS [2025-07-24 06:21:16] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_cor(
df1 = ttd3[[3]],
df2 = NULL,
method = "pearson",
with_col = NULL,
alpha = 0.05
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("ram.csv" = ttd3[[3]]),
plots = "cor")
#> INFO [2025-07-24 06:21:18] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:18] Beginning plot type 'cor'
#> SUCCESS [2025-07-24 06:21:18] Completed plot type 'cor'
#> SUCCESS [2025-07-24 06:21:18] inspect_plot(): all requested plots finished
#> NULL
# for all other dataset in the list
inspect_imb()
if (length(ttd) == 1)
If one dataset in ttd
:
inspectdf::inspect_imb(
df1 = ttd[[1]],
df2 = NULL,
include_na = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("netflix_titles.csv" = ttd[[1]]),
plots = "imb")
#> INFO [2025-07-24 06:21:20] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:20] Beginning plot type 'imb'
#> SUCCESS [2025-07-24 06:21:20] Completed plot type 'imb'
#> SUCCESS [2025-07-24 06:21:20] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) == 2)
If two datasets in ttd
:
inspectdf::inspect_imb(
df1 = ttd2[[1]],
df2 = ttd2[[2]],
include_na = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list(
"agencies.csv" = ttd2[[1]],
"launches.csv" = ttd2[[2]]
),
plots = "imb")
#> INFO [2025-07-24 06:21:22] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:22] Beginning plot type 'imb'
#> SUCCESS [2025-07-24 06:21:23] Completed plot type 'imb'
#> SUCCESS [2025-07-24 06:21:23] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) >= 3)
If more than two datasets in ttd
:
inspectdf::inspect_imb(
df1 = ttd3[[1]],
df2 = NULL,
include_na = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("cpu.csv" = ttd3[[1]]),
plots = "imb")
#> INFO [2025-07-24 06:21:24] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:24] Beginning plot type 'imb'
#> SUCCESS [2025-07-24 06:21:24] Completed plot type 'imb'
#> SUCCESS [2025-07-24 06:21:24] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_imb(
df1 = ttd3[[2]],
df2 = NULL,
include_na = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("gpu.csv" = ttd3[[2]]),
plots = "imb")
#> INFO [2025-07-24 06:21:26] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:26] Beginning plot type 'imb'
#> SUCCESS [2025-07-24 06:21:26] Completed plot type 'imb'
#> SUCCESS [2025-07-24 06:21:26] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_imb(
df1 = ttd3[[3]],
df2 = NULL,
include_na = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("ram.csv" = ttd3[[3]]),
plots = "imb")
#> INFO [2025-07-24 06:21:28] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:28] Beginning plot type 'imb'
#> SUCCESS [2025-07-24 06:21:28] Completed plot type 'imb'
#> SUCCESS [2025-07-24 06:21:28] inspect_plot(): all requested plots finished
#> NULL
# ... for all datasets in ttd list
inspect_num()
if (length(ttd) == 1)
If one dataset in ttd
:
- check if there is at least 1 numeric column in the dataset in the
ttd
list
check_ttd_num_cols(ttd2)
#> $agencies.csv
#> [1] "count"
#>
#> $launches.csv
#> [1] "JD" "launch_year"
- if so, run:
inspectdf::inspect_num(
df1 = ttd2[[1]],
df2 = NULL,
breaks = 20,
include_int = TRUE
) |>
inspectdf::show_plot(text_labels = TRUE)
BUG: inspect_plot()
doesn’t seem to
identify when a list has a dataset with single numerical column.
inspect_plot(
ttd = list("agencies.csv" = ttd2[[1]]),
plots = "num")
#> INFO [2025-07-24 06:21:30] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:30] Beginning plot type 'num'
#> SUCCESS [2025-07-24 06:21:30] Completed plot type 'num'
#> SUCCESS [2025-07-24 06:21:30] inspect_plot(): all requested plots finished
#> NULL
If will work if the list has two numeric columns:
inspect_plot(
ttd = list("launches.csv" = ttd2[[2]]),
plots = "num")
#> INFO [2025-07-24 06:21:30] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:30] Beginning plot type 'num'
#> SUCCESS [2025-07-24 06:21:30] Completed plot type 'num'
#> SUCCESS [2025-07-24 06:21:30] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) == 2)
If two datasets in ttd
:
-
check if there is at least 1 numeric column in each of the two datasets in the
ttd
list- if so, run:
inspectdf::inspect_num(
df1 = ttd2[[1]],
df2 = ttd2[[2]],
breaks = 20,
include_int = TRUE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list(
"agencies.csv" = ttd2[[1]],
"launches.csv" = ttd2[[2]]),
plots = "num")
#> INFO [2025-07-24 06:21:32] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:32] Beginning plot type 'num'
#> SUCCESS [2025-07-24 06:21:33] Completed plot type 'num'
#> SUCCESS [2025-07-24 06:21:33] inspect_plot(): all requested plots finished
#> NULL
- if there is only one numeric column, run the code above on the dataset with the numeric column.
if (length(ttd) >= 3)
If more than two datasets in ttd
:
inspectdf::inspect_num(
df1 = ttd3[[1]],
df2 = NULL,
breaks = 20,
include_int = TRUE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("cpu.csv" = ttd3[[1]]),
plots = "num")
#> INFO [2025-07-24 06:21:35] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:35] Beginning plot type 'num'
#> SUCCESS [2025-07-24 06:21:35] Completed plot type 'num'
#> SUCCESS [2025-07-24 06:21:35] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_num(
df1 = ttd3[[2]],
df2 = NULL,
breaks = 20,
include_int = TRUE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("gpu.csv" = ttd3[[2]]),
plots = "num")
#> INFO [2025-07-24 06:21:37] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:37] Beginning plot type 'num'
#> SUCCESS [2025-07-24 06:21:37] Completed plot type 'num'
#> SUCCESS [2025-07-24 06:21:37] inspect_plot(): all requested plots finished
#> NULL
inspectdf::inspect_num(
df1 = ttd3[[3]],
df2 = NULL,
breaks = 20,
include_int = TRUE
) |>
inspectdf::show_plot(text_labels = TRUE)
# ... for all datasets in ttd list
inspect_plot(
ttd = list("ram.csv" = ttd3[[3]]),
plots = "num")
#> INFO [2025-07-24 06:21:39] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:39] Beginning plot type 'num'
#> SUCCESS [2025-07-24 06:21:40] Completed plot type 'num'
#> SUCCESS [2025-07-24 06:21:40] inspect_plot(): all requested plots finished
#> NULL
inspect_cat()
if (length(ttd) == 1)
If one dataset in ttd
:
- check if there is at least 1 categorical column in the dataset in
the
ttd
list- if so, run:
inspectdf::inspect_cat(
df1 = ttd[[1]],
df2 = NULL,
include_int = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("netflix_titles.csv" = ttd[[1]]),
plots = "cat")
#> INFO [2025-07-24 06:21:43] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:43] Beginning plot type 'cat'
#> SUCCESS [2025-07-24 06:21:45] Completed plot type 'cat'
#> SUCCESS [2025-07-24 06:21:45] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) == 2)
If two datasets in ttd
:
- check for intersecting columns in the two datasets in the
ttd
list
length(
intersect(
x = check_ttd_cat_cols(ttd2)[[1]],
y = check_ttd_cat_cols(ttd2)[[2]]
)
) > 0
#> [1] TRUE
- If
TRUE
, storeintersecting_columns
- Use
intersecting_columns
to subset each dataset inttd
list.
inspectdf::inspect_cat(
df1 = ttd2[[1]][intersecting_columns],
df2 = ttd2[[2]][intersecting_columns],
include_int = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list(
"agencies.csv" = ttd2[[1]][intersecting_columns],
"launches.csv" = ttd2[[2]][intersecting_columns]
),
plots = "cat")
#> INFO [2025-07-24 06:21:47] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:47] Beginning plot type 'cat'
#> SUCCESS [2025-07-24 06:21:48] Completed plot type 'cat'
#> SUCCESS [2025-07-24 06:21:48] inspect_plot(): all requested plots finished
#> NULL
- if there aren’t any intersecting categorical columns, run the
following on the dataset with the categorical column
ttd[[?]]
:
inspectdf::inspect_cat(
df1 = ttd[[1]],
df2 = NULL,
include_int = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("netflix_titles.csv" = ttd[[1]]),
plots = "cat")
#> INFO [2025-07-24 06:21:51] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:51] Beginning plot type 'cat'
#> SUCCESS [2025-07-24 06:21:53] Completed plot type 'cat'
#> SUCCESS [2025-07-24 06:21:53] inspect_plot(): all requested plots finished
#> NULL
if (length(ttd) >= 3)
If more than two datasets in ttd
:
inspectdf::inspect_cat(
df1 = ttd_many[[1]],
df2 = NULL,
include_int = FALSE
) |>
inspectdf::show_plot(text_labels = TRUE)
inspect_plot(
ttd = list("colors.csv.gz" = ttd_many[[1]]),
plots = "cat")
#> INFO [2025-07-24 06:21:55] inspect_plot(): starting analysis
#> INFO [2025-07-24 06:21:55] Beginning plot type 'cat'
#> SUCCESS [2025-07-24 06:21:55] Completed plot type 'cat'
#> SUCCESS [2025-07-24 06:21:55] inspect_plot(): all requested plots finished
#> NULL