Process raw data
process_text.Rd
Process raw data
Examples
require(palmerpenguins)
#> Loading required package: palmerpenguins
str(palmerpenguins::penguins_raw)
#> tibble [344 × 17] (S3: tbl_df/tbl/data.frame)
#> $ studyName : chr [1:344] "PAL0708" "PAL0708" "PAL0708" "PAL0708" ...
#> $ Sample Number : num [1:344] 1 2 3 4 5 6 7 8 9 10 ...
#> $ Species : chr [1:344] "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" ...
#> $ Region : chr [1:344] "Anvers" "Anvers" "Anvers" "Anvers" ...
#> $ Island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
#> $ Stage : chr [1:344] "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" ...
#> $ Individual ID : chr [1:344] "N1A1" "N1A2" "N2A1" "N2A2" ...
#> $ Clutch Completion : chr [1:344] "Yes" "Yes" "Yes" "Yes" ...
#> $ Date Egg : Date[1:344], format: "2007-11-11" "2007-11-11" ...
#> $ Culmen Length (mm) : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#> $ Culmen Depth (mm) : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#> $ Flipper Length (mm): num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
#> $ Body Mass (g) : num [1:344] 3750 3800 3250 NA 3450 ...
#> $ Sex : chr [1:344] "MALE" "FEMALE" "FEMALE" NA ...
#> $ Delta 15 N (o/oo) : num [1:344] NA 8.95 8.37 NA 8.77 ...
#> $ Delta 13 C (o/oo) : num [1:344] NA -24.7 -25.3 NA -25.3 ...
#> $ Comments : chr [1:344] "Not enough blood for isotopes." NA NA "Adult not sampled." ...
#> - attr(*, "spec")=List of 3
#> ..$ cols :List of 17
#> .. ..$ studyName : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Sample Number : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Species : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Region : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Island : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Stage : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Individual ID : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Clutch Completion : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Date Egg :List of 1
#> .. .. ..$ format: chr ""
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_date" "collector"
#> .. ..$ Culmen Length (mm) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Culmen Depth (mm) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Flipper Length (mm): list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Body Mass (g) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Sex : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Delta 15 N (o/oo) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Delta 13 C (o/oo) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Comments : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> ..$ default: list()
#> .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
#> ..$ skip : num 1
#> ..- attr(*, "class")= chr "col_spec"
# compare to
str(process_text(palmerpenguins::penguins_raw))
#> 'data.frame': 344 obs. of 17 variables:
#> $ studyname : chr "pal0708" "pal0708" "pal0708" "pal0708" ...
#> $ sample_number : num 1 2 3 4 5 6 7 8 9 10 ...
#> $ species : chr "adelie penguin (pygoscelis adeliae)" "adelie penguin (pygoscelis adeliae)" "adelie penguin (pygoscelis adeliae)" "adelie penguin (pygoscelis adeliae)" ...
#> $ region : chr "anvers" "anvers" "anvers" "anvers" ...
#> $ island : chr "torgersen" "torgersen" "torgersen" "torgersen" ...
#> $ stage : chr "adult, 1 egg stage" "adult, 1 egg stage" "adult, 1 egg stage" "adult, 1 egg stage" ...
#> $ individual_id : chr "n1a1" "n1a2" "n2a1" "n2a2" ...
#> $ clutch_completion: chr "yes" "yes" "yes" "yes" ...
#> $ date_egg : Date, format: "2007-11-11" "2007-11-11" ...
#> $ culmen_length_mm : num 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#> $ culmen_depth_mm : num 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#> $ flipper_length_mm: num 181 186 195 NA 193 190 181 195 193 190 ...
#> $ body_mass_g : num 3750 3800 3250 NA 3450 ...
#> $ sex : chr "male" "female" "female" NA ...
#> $ delta_15_n_o_oo : num NA 8.95 8.37 NA 8.77 ...
#> $ delta_13_c_o_oo : num NA -24.7 -25.3 NA -25.3 ...
#> $ comments : chr "not enough blood for isotopes." NA NA "adult not sampled." ...
#> - attr(*, "spec")=List of 3
#> ..$ cols :List of 17
#> .. ..$ studyName : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Sample Number : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Species : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Region : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Island : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Stage : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Individual ID : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Clutch Completion : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Date Egg :List of 1
#> .. .. ..$ format: chr ""
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_date" "collector"
#> .. ..$ Culmen Length (mm) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Culmen Depth (mm) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Flipper Length (mm): list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Body Mass (g) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Sex : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Delta 15 N (o/oo) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Delta 13 C (o/oo) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Comments : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> ..$ default: list()
#> .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
#> ..$ skip : num 1
#> ..- attr(*, "class")= chr "col_spec"