Skip to contents

Process raw data

Usage

process_text(raw_data, fct = FALSE)

Arguments

raw_data

raw dataset

fct

include factors?

Value

data with standardized names, lowercase text, etc.

Examples

require(palmerpenguins)
#> Loading required package: palmerpenguins
str(palmerpenguins::penguins_raw)
#> tibble [344 × 17] (S3: tbl_df/tbl/data.frame)
#>  $ studyName          : chr [1:344] "PAL0708" "PAL0708" "PAL0708" "PAL0708" ...
#>  $ Sample Number      : num [1:344] 1 2 3 4 5 6 7 8 9 10 ...
#>  $ Species            : chr [1:344] "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" ...
#>  $ Region             : chr [1:344] "Anvers" "Anvers" "Anvers" "Anvers" ...
#>  $ Island             : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
#>  $ Stage              : chr [1:344] "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" ...
#>  $ Individual ID      : chr [1:344] "N1A1" "N1A2" "N2A1" "N2A2" ...
#>  $ Clutch Completion  : chr [1:344] "Yes" "Yes" "Yes" "Yes" ...
#>  $ Date Egg           : Date[1:344], format: "2007-11-11" "2007-11-11" ...
#>  $ Culmen Length (mm) : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#>  $ Culmen Depth (mm)  : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#>  $ Flipper Length (mm): num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
#>  $ Body Mass (g)      : num [1:344] 3750 3800 3250 NA 3450 ...
#>  $ Sex                : chr [1:344] "MALE" "FEMALE" "FEMALE" NA ...
#>  $ Delta 15 N (o/oo)  : num [1:344] NA 8.95 8.37 NA 8.77 ...
#>  $ Delta 13 C (o/oo)  : num [1:344] NA -24.7 -25.3 NA -25.3 ...
#>  $ Comments           : chr [1:344] "Not enough blood for isotopes." NA NA "Adult not sampled." ...
#>  - attr(*, "spec")=List of 3
#>   ..$ cols   :List of 17
#>   .. ..$ studyName          : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Sample Number      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Species            : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Region             : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Island             : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Stage              : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Individual ID      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Clutch Completion  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Date Egg           :List of 1
#>   .. .. ..$ format: chr ""
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_date" "collector"
#>   .. ..$ Culmen Length (mm) : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Culmen Depth (mm)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Flipper Length (mm): list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Body Mass (g)      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Sex                : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Delta 15 N (o/oo)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Delta 13 C (o/oo)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Comments           : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   ..$ default: list()
#>   .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
#>   ..$ skip   : num 1
#>   ..- attr(*, "class")= chr "col_spec"
# compare to
str(process_text(palmerpenguins::penguins_raw))
#> 'data.frame':	344 obs. of  17 variables:
#>  $ studyname        : chr  "pal0708" "pal0708" "pal0708" "pal0708" ...
#>  $ sample_number    : num  1 2 3 4 5 6 7 8 9 10 ...
#>  $ species          : chr  "adelie penguin (pygoscelis adeliae)" "adelie penguin (pygoscelis adeliae)" "adelie penguin (pygoscelis adeliae)" "adelie penguin (pygoscelis adeliae)" ...
#>  $ region           : chr  "anvers" "anvers" "anvers" "anvers" ...
#>  $ island           : chr  "torgersen" "torgersen" "torgersen" "torgersen" ...
#>  $ stage            : chr  "adult, 1 egg stage" "adult, 1 egg stage" "adult, 1 egg stage" "adult, 1 egg stage" ...
#>  $ individual_id    : chr  "n1a1" "n1a2" "n2a1" "n2a2" ...
#>  $ clutch_completion: chr  "yes" "yes" "yes" "yes" ...
#>  $ date_egg         : Date, format: "2007-11-11" "2007-11-11" ...
#>  $ culmen_length_mm : num  39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#>  $ culmen_depth_mm  : num  18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#>  $ flipper_length_mm: num  181 186 195 NA 193 190 181 195 193 190 ...
#>  $ body_mass_g      : num  3750 3800 3250 NA 3450 ...
#>  $ sex              : chr  "male" "female" "female" NA ...
#>  $ delta_15_n_o_oo  : num  NA 8.95 8.37 NA 8.77 ...
#>  $ delta_13_c_o_oo  : num  NA -24.7 -25.3 NA -25.3 ...
#>  $ comments         : chr  "not enough blood for isotopes." NA NA "adult not sampled." ...
#>  - attr(*, "spec")=List of 3
#>   ..$ cols   :List of 17
#>   .. ..$ studyName          : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Sample Number      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Species            : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Region             : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Island             : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Stage              : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Individual ID      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Clutch Completion  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Date Egg           :List of 1
#>   .. .. ..$ format: chr ""
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_date" "collector"
#>   .. ..$ Culmen Length (mm) : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Culmen Depth (mm)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Flipper Length (mm): list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Body Mass (g)      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Sex                : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Delta 15 N (o/oo)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Delta 13 C (o/oo)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Comments           : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   ..$ default: list()
#>   .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
#>   ..$ skip   : num 1
#>   ..- attr(*, "class")= chr "col_spec"