Apply `forcats::fct_collapse` on a character column.

categorize(data, col, ..., other_values = NULL, na_level = "(Missing)")

See also

Other forcats functions: recode_boolean(), recode_value()

Examples

library(tidyverse) test_data <- tibble( Group = sample(c("Apple", "Pear"), size = 10, replace = TRUE), A = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE), C = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 10, replace = TRUE), D = sample(c(NA, TRUE, FALSE), size = 10, replace = TRUE) ) %>% dplyr::mutate(E = B) categorize(data = test_data, col = A, Odd = as.character(seq(from = 1, to = 10, by = 2)), other_values = "Even")
#> Warning: Problem with `mutate()` input `A`. #> Unknown levels in `f`: 5, 7, 9 #> Input `A` is `(function (.f, ..., other_level = NULL, group_other = "DEPRECATED") ...`.
#> Warning: Unknown levels in `f`: 5, 7, 9
#> # A tibble: 10 x 6 #> Group A B C D E #> <chr> <chr> <int> <dbl> <lgl> <int> #> 1 Apple Odd 5 6.03 FALSE 5 #> 2 Apple NA 5 6.01 TRUE 5 #> 3 Pear NA 4 6.02 NA 4 #> 4 Apple NA 5 6.08 TRUE 5 #> 5 Apple Even NA 6.05 NA NA #> 6 Pear Odd NA 6.06 TRUE NA #> 7 Apple Even 6 6.04 FALSE 6 #> 8 Apple NA NA 6.05 NA NA #> 9 Apple Odd 4 6.08 TRUE 4 #> 10 Pear Odd 4 6.05 TRUE 4
categorize(data = test_data, col = A, Odd = as.character(seq(from = 1, to = 10, by = 2)), other_values = "Even", na_level = NULL)
#> Warning: Problem with `mutate()` input `A`. #> Unknown levels in `f`: 5, 7, 9 #> Input `A` is `(function (.f, ..., other_level = NULL, group_other = "DEPRECATED") ...`.
#> Warning: Unknown levels in `f`: 5, 7, 9
#> # A tibble: 10 x 6 #> Group A B C D E #> <chr> <chr> <int> <dbl> <lgl> <int> #> 1 Apple Odd 5 6.03 FALSE 5 #> 2 Apple NA 5 6.01 TRUE 5 #> 3 Pear NA 4 6.02 NA 4 #> 4 Apple NA 5 6.08 TRUE 5 #> 5 Apple Even NA 6.05 NA NA #> 6 Pear Odd NA 6.06 TRUE NA #> 7 Apple Even 6 6.04 FALSE 6 #> 8 Apple NA NA 6.05 NA NA #> 9 Apple Odd 4 6.08 TRUE 4 #> 10 Pear Odd 4 6.05 TRUE 4
categorize(data = test_data, col = A, Odd = as.character(seq(from = 1, to = 10, by = 2)), Even = as.character(seq(from = 2, to = 10, by = 2)), na_level = NULL)
#> Warning: Problem with `mutate()` input `A`. #> Unknown levels in `f`: 5, 7, 9, 4, 6, 8, 10 #> Input `A` is `(function (.f, ..., other_level = NULL, group_other = "DEPRECATED") ...`.
#> Warning: Unknown levels in `f`: 5, 7, 9, 4, 6, 8, 10
#> # A tibble: 10 x 6 #> Group A B C D E #> <chr> <chr> <int> <dbl> <lgl> <int> #> 1 Apple Odd 5 6.03 FALSE 5 #> 2 Apple NA 5 6.01 TRUE 5 #> 3 Pear NA 4 6.02 NA 4 #> 4 Apple NA 5 6.08 TRUE 5 #> 5 Apple Even NA 6.05 NA NA #> 6 Pear Odd NA 6.06 TRUE NA #> 7 Apple Even 6 6.04 FALSE 6 #> 8 Apple NA NA 6.05 NA NA #> 9 Apple Odd 4 6.08 TRUE 4 #> 10 Pear Odd 4 6.05 TRUE 4
categorize(data = test_data, col = A, Odd = as.character(seq(from = 1, to = 10, by = 2)), Even = as.character(seq(from = 2, to = 10, by = 2)))
#> Warning: Problem with `mutate()` input `A`. #> Unknown levels in `f`: 5, 7, 9, 4, 6, 8, 10 #> Input `A` is `(function (.f, ..., other_level = NULL, group_other = "DEPRECATED") ...`.
#> Warning: Unknown levels in `f`: 5, 7, 9, 4, 6, 8, 10
#> # A tibble: 10 x 6 #> Group A B C D E #> <chr> <chr> <int> <dbl> <lgl> <int> #> 1 Apple Odd 5 6.03 FALSE 5 #> 2 Apple NA 5 6.01 TRUE 5 #> 3 Pear NA 4 6.02 NA 4 #> 4 Apple NA 5 6.08 TRUE 5 #> 5 Apple Even NA 6.05 NA NA #> 6 Pear Odd NA 6.06 TRUE NA #> 7 Apple Even 6 6.04 FALSE 6 #> 8 Apple NA NA 6.05 NA NA #> 9 Apple Odd 4 6.08 TRUE 4 #> 10 Pear Odd 4 6.05 TRUE 4
# Recode recode_value(data = test_data, col = A, One = as.character(1), Two = as.character(2))
#> # A tibble: 10 x 6 #> Group A B C D E #> <chr> <chr> <int> <dbl> <lgl> <int> #> 1 Apple One 5 6.03 FALSE 5 #> 2 Apple NA 5 6.01 TRUE 5 #> 3 Pear NA 4 6.02 NA 4 #> 4 Apple NA 5 6.08 TRUE 5 #> 5 Apple Two NA 6.05 NA NA #> 6 Pear 3 NA 6.06 TRUE NA #> 7 Apple Two 6 6.04 FALSE 6 #> 8 Apple NA NA 6.05 NA NA #> 9 Apple One 4 6.08 TRUE 4 #> 10 Pear One 4 6.05 TRUE 4
recode_boolean(data = test_data, col = Group, true_value = "Pear", false_value = "Apple")
#> # A tibble: 10 x 6 #> Group A B C D E #> <lgl> <int> <int> <dbl> <lgl> <int> #> 1 FALSE 1 5 6.03 FALSE 5 #> 2 FALSE NA 5 6.01 TRUE 5 #> 3 TRUE NA 4 6.02 NA 4 #> 4 FALSE NA 5 6.08 TRUE 5 #> 5 FALSE 2 NA 6.05 NA NA #> 6 TRUE 3 NA 6.06 TRUE NA #> 7 FALSE 2 6 6.04 FALSE 6 #> 8 FALSE NA NA 6.05 NA NA #> 9 FALSE 1 4 6.08 TRUE 4 #> 10 TRUE 1 4 6.05 TRUE 4