Summarize Functions

Arguments

data	A dataframe or tibble.
grouper	(Optional) Group by column.
names_to	Passed to `tidyr::pivot_longer()`.
values_to	Passed to `tidyr::pivot_longer()`.
na.rm	TRUE if true NA are to be removed.
Examples

library(tidyverse)
test_data <- 
        tibble(
                Group = sample(c("Apple", "Pear"), size = 10, replace = TRUE),
                A     = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE),
                B     = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE),
                C     = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 10, replace = TRUE)
        )


summarize_variables(data = test_data,
                    incl_num_calc = FALSE)
#> # A tibble: 4 x 7
#>   Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 A           10              3        5            0           0
#> 2 B           10              3        3            0           0
#> 3 C           10              6        0            0           0
#> 4 Group       10              2        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>

summarize_variables(data = test_data,
                    incl_num_calc = TRUE)
#> $SUMMARY
#> # A tibble: 4 x 7
#>   Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 A           10              3        5            0           0
#> 2 B           10              3        3            0           0
#> 3 C           10              6        0            0           0
#> 4 Group       10              2        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>
#> 
#> $NUMERIC_CALCULATIONS
#> # A tibble: 3 x 17
#>   Variable  MEAN MEAN_NA MEDIAN MEDIAN_NA     SD   SD_NA   MAX MAX_NA   MIN
#>   <chr>    <dbl>   <dbl>  <dbl>     <dbl>  <dbl>   <dbl> <dbl>  <dbl> <dbl>
#> 1 A         2.2    NA      3        NA    1.10   NA       3     NA     1   
#> 2 B         5.43   NA      5        NA    0.535  NA       6     NA     5   
#> 3 C         6.04    6.04   6.04      6.04 0.0200  0.0200  6.07   6.07  6.02
#> # … with 7 more variables: MIN_NA <dbl>, SUM <dbl>, SUM_NA <dbl>,
#> #   DISTINCT_LENGTH <int>, NA_LENGTH <int>, BLANK_LENGTH <int>,
#> #   DISTINCT_STR <chr>
#> 

summarize_variables(data = test_data,
                    incl_num_calc = TRUE,
                    grouper = Group)
#> $SUMMARY
#> # A tibble: 6 x 8
#> # Groups:   Group [2]
#>   Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr> <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 Apple A            6              3        3            0           0
#> 2 Apple B            6              3        3            0           0
#> 3 Apple C            6              5        0            0           0
#> 4 Pear  A            4              2        2            0           0
#> 5 Pear  B            4              2        0            0           0
#> 6 Pear  C            4              4        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>
#> 
#> $NUMERIC_CALCULATIONS
#> # A tibble: 6 x 18
#> # Groups:   Group [2]
#>   Group Variable  MEAN MEAN_NA MEDIAN MEDIAN_NA     SD   SD_NA   MAX MAX_NA
#>   <chr> <chr>    <dbl>   <dbl>  <dbl>     <dbl>  <dbl>   <dbl> <dbl>  <dbl>
#> 1 Apple A         1.67   NA      1        NA    1.15   NA       3     NA   
#> 2 Apple B         5.33   NA      5        NA    0.577  NA       6     NA   
#> 3 Apple C         6.04    6.04   6.04      6.04 0.0214  0.0214  6.07   6.07
#> 4 Pear  A         3      NA      3        NA    0      NA       3     NA   
#> 5 Pear  B         5.5     5.5    5.5       5.5  0.577   0.577   6      6   
#> 6 Pear  C         6.04    6.04   6.04      6.04 0.0208  0.0208  6.07   6.07
#> # … with 8 more variables: MIN <dbl>, MIN_NA <dbl>, SUM <dbl>, SUM_NA <dbl>,
#> #   DISTINCT_LENGTH <int>, NA_LENGTH <int>, BLANK_LENGTH <int>,
#> #   DISTINCT_STR <chr>
#> 

summarize_variables(data = test_data,
                    incl_num_calc = FALSE,
                    grouper = Group)
#> # A tibble: 6 x 8
#> # Groups:   Group [2]
#>   Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr> <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 Apple A            6              3        3            0           0
#> 2 Apple B            6              3        3            0           0
#> 3 Apple C            6              5        0            0           0
#> 4 Pear  A            4              2        2            0           0
#> 5 Pear  B            4              2        0            0           0
#> 6 Pear  C            4              4        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>


observation_count(data = test_data)
#> # A tibble: 10 x 5
#>    Group     A     B     C     n
#>    <chr> <int> <int> <dbl> <int>
#>  1 Apple     1     5  6.03     1
#>  2 Apple     1    NA  6.05     1
#>  3 Apple     3     5  6.02     1
#>  4 Apple    NA     6  6.06     1
#>  5 Apple    NA    NA  6.02     1
#>  6 Apple    NA    NA  6.07     1
#>  7 Pear      3     5  6.07     1
#>  8 Pear      3     6  6.02     1
#>  9 Pear     NA     5  6.05     1
#> 10 Pear     NA     6  6.04     1


value_count(data = test_data)
#> # A tibble: 14 x 3
#>    Variable Value     n
#>    <chr>    <chr> <int>
#>  1 Group    Apple     6
#>  2 A        NA        5
#>  3 B        5         4
#>  4 Group    Pear      4
#>  5 A        3         3
#>  6 B        6         3
#>  7 B        NA        3
#>  8 C        6.02      3
#>  9 A        1         2
#> 10 C        6.05      2
#> 11 C        6.07      2
#> 12 C        6.03      1
#> 13 C        6.04      1
#> 14 C        6.06      1
value_count(data = test_data,
            grouper = Group)
#> # A tibble: 19 x 4
#>    Group Variable Value     n
#>    <chr> <chr>    <chr> <int>
#>  1 Apple A        NA        3
#>  2 Apple B        NA        3
#>  3 Apple A        1         2
#>  4 Apple B        5         2
#>  5 Apple C        6.02      2
#>  6 Pear  A        3         2
#>  7 Pear  A        NA        2
#>  8 Pear  B        5         2
#>  9 Pear  B        6         2
#> 10 Apple A        3         1
#> 11 Apple B        6         1
#> 12 Apple C        6.03      1
#> 13 Apple C        6.05      1
#> 14 Apple C        6.06      1
#> 15 Apple C        6.07      1
#> 16 Pear  C        6.02      1
#> 17 Pear  C        6.04      1
#> 18 Pear  C        6.05      1
#> 19 Pear  C        6.07      1