Summarize a Variable

summarize_variables(
  data,
  incl_num_calc = TRUE,
  names_to = "Variable",
  values_to = "Value",
  grouper
)

Arguments

data

A dataframe or tibble.

incl_num_calc

If TRUE, includes an additional dataframe of summary statistics on the numeric columns in the dataframe.

names_to

Passed to `tidyr::pivot_longer()`.

values_to

Passed to `tidyr::pivot_longer()`.

grouper

(Optional) Group by column.

See also

Examples

library(tidyverse) test_data <- tibble( Group = sample(c("Apple", "Pear"), size = 10, replace = TRUE), A = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE), C = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 10, replace = TRUE) ) summarize_variables(data = test_data, incl_num_calc = FALSE)
#> # A tibble: 4 x 7 #> Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <int> <int> <int> <int> <int> #> 1 A 10 4 4 0 0 #> 2 B 10 4 1 0 0 #> 3 C 10 6 0 0 0 #> 4 Group 10 2 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr>
summarize_variables(data = test_data, incl_num_calc = TRUE)
#> $SUMMARY #> # A tibble: 4 x 7 #> Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <int> <int> <int> <int> <int> #> 1 A 10 4 4 0 0 #> 2 B 10 4 1 0 0 #> 3 C 10 6 0 0 0 #> 4 Group 10 2 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr> #> #> $NUMERIC_CALCULATIONS #> # A tibble: 3 x 17 #> Variable MEAN MEAN_NA MEDIAN MEDIAN_NA SD SD_NA MAX MAX_NA MIN #> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A 2 NA 2 NA 0.632 NA 3 NA 1 #> 2 B 4.89 NA 5 NA 0.782 NA 6 NA 4 #> 3 C 6.05 6.05 6.04 6.04 0.0295 0.0295 6.09 6.09 6.01 #> # … with 7 more variables: MIN_NA <dbl>, SUM <dbl>, SUM_NA <dbl>, #> # DISTINCT_LENGTH <int>, NA_LENGTH <int>, BLANK_LENGTH <int>, #> # DISTINCT_STR <chr> #>
summarize_variables(data = test_data, incl_num_calc = TRUE, grouper = Group)
#> $SUMMARY #> # A tibble: 6 x 8 #> # Groups: Group [2] #> Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <chr> <int> <int> <int> <int> <int> #> 1 Apple A 4 2 2 0 0 #> 2 Apple B 4 3 0 0 0 #> 3 Apple C 4 3 0 0 0 #> 4 Pear A 6 4 2 0 0 #> 5 Pear B 6 3 1 0 0 #> 6 Pear C 6 4 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr> #> #> $NUMERIC_CALCULATIONS #> # A tibble: 6 x 18 #> # Groups: Group [2] #> Group Variable MEAN MEAN_NA MEDIAN MEDIAN_NA SD SD_NA MAX MAX_NA #> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 Apple A 2 NA 2 NA 0 NA 2 NA #> 2 Apple B 5.25 5.25 5.5 5.5 0.957 0.957 6 6 #> 3 Apple C 6.03 6.03 6.02 6.02 0.0386 0.0386 6.09 6.09 #> 4 Pear A 2 NA 2 NA 0.816 NA 3 NA #> 5 Pear B 4.6 NA 5 NA 0.548 NA 5 NA #> 6 Pear C 6.06 6.06 6.04 6.04 0.0207 0.0207 6.09 6.09 #> # … with 8 more variables: MIN <dbl>, MIN_NA <dbl>, SUM <dbl>, SUM_NA <dbl>, #> # DISTINCT_LENGTH <int>, NA_LENGTH <int>, BLANK_LENGTH <int>, #> # DISTINCT_STR <chr> #>
summarize_variables(data = test_data, incl_num_calc = FALSE, grouper = Group)
#> # A tibble: 6 x 8 #> # Groups: Group [2] #> Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <chr> <int> <int> <int> <int> <int> #> 1 Apple A 4 2 2 0 0 #> 2 Apple B 4 3 0 0 0 #> 3 Apple C 4 3 0 0 0 #> 4 Pear A 6 4 2 0 0 #> 5 Pear B 6 3 1 0 0 #> 6 Pear C 6 4 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr>
observation_count(data = test_data)
#> # A tibble: 10 x 5 #> Group A B C n #> <chr> <int> <int> <dbl> <int> #> 1 Apple 2 4 6.01 1 #> 2 Apple 2 6 6.01 1 #> 3 Apple NA 5 6.02 1 #> 4 Apple NA 6 6.09 1 #> 5 Pear 1 4 6.04 1 #> 6 Pear 2 4 6.04 1 #> 7 Pear 2 NA 6.04 1 #> 8 Pear 3 5 6.07 1 #> 9 Pear NA 5 6.05 1 #> 10 Pear NA 5 6.09 1
value_count(data = test_data)
#> # A tibble: 16 x 3 #> Variable Value n #> <chr> <chr> <int> #> 1 Group Pear 6 #> 2 A 2 4 #> 3 A NA 4 #> 4 B 5 4 #> 5 Group Apple 4 #> 6 B 4 3 #> 7 C 6.04 3 #> 8 B 6 2 #> 9 C 6.01 2 #> 10 C 6.09 2 #> 11 A 1 1 #> 12 A 3 1 #> 13 B NA 1 #> 14 C 6.02 1 #> 15 C 6.05 1 #> 16 C 6.07 1
value_count(data = test_data, grouper = Group)
#> # A tibble: 19 x 4 #> Group Variable Value n #> <chr> <chr> <chr> <int> #> 1 Pear B 5 3 #> 2 Pear C 6.04 3 #> 3 Apple A 2 2 #> 4 Apple A NA 2 #> 5 Apple B 6 2 #> 6 Apple C 6.01 2 #> 7 Pear A 2 2 #> 8 Pear A NA 2 #> 9 Pear B 4 2 #> 10 Apple B 4 1 #> 11 Apple B 5 1 #> 12 Apple C 6.02 1 #> 13 Apple C 6.09 1 #> 14 Pear A 1 1 #> 15 Pear A 3 1 #> 16 Pear B NA 1 #> 17 Pear C 6.05 1 #> 18 Pear C 6.07 1 #> 19 Pear C 6.09 1