Normalize Character Column Values to `NA_character_``

mutate_all_na_character(data, blank = TRUE, empty = TRUE, na_str = TRUE)

Arguments

data

A dataframe or tibble.

blank

Should strings consisting with one or more spaces be replaced with NA_character_?

empty

Should strings with a length of 0 be replaced with NA_character_?

na_str

Should "NA" be replaced with NA_character_?

See also

mutate_all,vars,reexports

Examples

library(tidyverse) test_data <- tibble(A = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE), C = sample(c(NA_integer_, 7:9), size = 10, replace = TRUE), D = sample(c(NA_integer_, 10:12), size = 10, replace = TRUE), E = sample(c(NA_integer_, 13:15), size = 10, replace = TRUE), ) # Only operates on character columns mutate_all_na_character(data = test_data)
#> # A tibble: 10 x 5 #> A B C D E #> <int> <int> <int> <int> <int> #> 1 2 NA 8 10 15 #> 2 NA NA 9 NA 14 #> 3 3 6 8 12 13 #> 4 3 6 7 10 13 #> 5 1 4 8 11 14 #> 6 NA 6 7 11 14 #> 7 NA 6 NA 12 15 #> 8 2 5 7 10 15 #> 9 3 4 NA 12 NA #> 10 NA 6 7 10 15
test_data <- tibble(A = sample(c(NA_character_, "NA", " ", " ", "", 1:3), size = 10, replace = TRUE), B = sample(c(NA_character_, "NA", " ", " ", "", 4:6), size = 10, replace = TRUE), C = sample(c(NA_character_, "NA", " ", " ", "", 7:9), size = 10, replace = TRUE) ) # Results with all input character columns mutate_all_na_character(data = test_data)
#> # A tibble: 10 x 3 #> A B C #> <chr> <chr> <chr> #> 1 3 NA NA #> 2 2 NA 9 #> 3 NA NA 8 #> 4 NA NA NA #> 5 NA NA NA #> 6 NA NA NA #> 7 NA NA NA #> 8 1 NA 7 #> 9 NA NA NA #> 10 NA NA NA
mutate_all_na_character(data = test_data, blank = FALSE)
#> # A tibble: 10 x 3 #> A B C #> <chr> <chr> <chr> #> 1 "3" NA " " #> 2 "2" NA "9" #> 3 NA NA "8" #> 4 NA NA NA #> 5 NA NA " " #> 6 NA NA NA #> 7 " " " " NA #> 8 "1" NA "7" #> 9 NA NA " " #> 10 NA NA " "
mutate_all_na_character(data = test_data, blank = FALSE, empty = FALSE)
#> # A tibble: 10 x 3 #> A B C #> <chr> <chr> <chr> #> 1 "3" NA " " #> 2 "2" NA "9" #> 3 NA NA "8" #> 4 NA NA NA #> 5 NA NA " " #> 6 NA NA "" #> 7 " " " " "" #> 8 "1" NA "7" #> 9 "" NA " " #> 10 NA NA " "
mutate_all_na_character(data = test_data, na_str = FALSE)
#> # A tibble: 10 x 3 #> A B C #> <chr> <chr> <chr> #> 1 3 NA NA #> 2 2 NA 9 #> 3 NA NA 8 #> 4 NA NA NA #> 5 NA NA NA #> 6 NA NA NA #> 7 NA NA NA #> 8 1 NA 7 #> 9 NA NA NA #> 10 NA NA NA