Value Count for a Dataframe

Depivot a dataframe and retrieve unique counts for each column with the option of grouping the counts with a `grouper` column.

value_count(
  data,
  names_to = "Variable",
  values_to = "Value",
  desc = TRUE,
  grouper
)

Arguments

data	A dataframe or tibble.
names_to	Passed to `tidyr::pivot_longer()`.
values_to	Passed to `tidyr::pivot_longer()`.
desc	If TRUE, the output is arranged in descending order. Otherwise it is arranged in ascending order.
grouper	(Optional) Group by column.

Examples

library(tidyverse)
test_data <- 
        tibble(
                Group = sample(c("Apple", "Pear"), size = 10, replace = TRUE),
                A     = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE),
                B     = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE),
                C     = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 10, replace = TRUE)
        )


summarize_variables(data = test_data,
                    incl_num_calc = FALSE)
#> # A tibble: 4 x 7
#>   Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 A           10              4        2            0           0
#> 2 B           10              4        1            0           0
#> 3 C           10              8        1            0           0
#> 4 Group       10              2        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>

summarize_variables(data = test_data,
                    incl_num_calc = TRUE)
#> $SUMMARY
#> # A tibble: 4 x 7
#>   Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 A           10              4        2            0           0
#> 2 B           10              4        1            0           0
#> 3 C           10              8        1            0           0
#> 4 Group       10              2        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>
#> 
#> $NUMERIC_CALCULATIONS
#> # A tibble: 3 x 17
#>   Variable  MEAN MEAN_NA MEDIAN MEDIAN_NA     SD SD_NA   MAX MAX_NA   MIN MIN_NA
#>   <chr>    <dbl>   <dbl>  <dbl>     <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
#> 1 A         1.75      NA   1.5         NA 0.886     NA  3        NA  1        NA
#> 2 B         5.33      NA   6           NA 0.866     NA  6        NA  4        NA
#> 3 C         6.05      NA   6.04        NA 0.0307    NA  6.09     NA  6.01     NA
#> # … with 6 more variables: SUM <dbl>, SUM_NA <dbl>, DISTINCT_LENGTH <int>,
#> #   NA_LENGTH <int>, BLANK_LENGTH <int>, DISTINCT_STR <chr>
#> 

summarize_variables(data = test_data,
                    incl_num_calc = TRUE,
                    grouper = Group)
#> $SUMMARY
#> # A tibble: 6 x 8
#> # Groups:   Group [2]
#>   Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr> <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 Apple A            6              3        1            0           0
#> 2 Apple B            6              4        1            0           0
#> 3 Apple C            6              6        1            0           0
#> 4 Pear  A            4              3        1            0           0
#> 5 Pear  B            4              2        0            0           0
#> 6 Pear  C            4              3        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>
#> 
#> $NUMERIC_CALCULATIONS
#> # A tibble: 6 x 18
#> # Groups:   Group [2]
#>   Group Variable  MEAN MEAN_NA MEDIAN MEDIAN_NA     SD   SD_NA   MAX MAX_NA
#>   <chr> <chr>    <dbl>   <dbl>  <dbl>     <dbl>  <dbl>   <dbl> <dbl>  <dbl>
#> 1 Apple A         1.2    NA      1        NA    0.447  NA       2     NA   
#> 2 Apple B         5.2    NA      5        NA    0.837  NA       6     NA   
#> 3 Apple C         6.06   NA      6.07     NA    0.0311 NA       6.09  NA   
#> 4 Pear  A         2.67   NA      3        NA    0.577  NA       3     NA   
#> 5 Pear  B         5.5     5.5    6         6    1       1       6      6   
#> 6 Pear  C         6.04    6.04   6.04      6.04 0.0332  0.0332  6.09   6.09
#> # … with 8 more variables: MIN <dbl>, MIN_NA <dbl>, SUM <dbl>, SUM_NA <dbl>,
#> #   DISTINCT_LENGTH <int>, NA_LENGTH <int>, BLANK_LENGTH <int>,
#> #   DISTINCT_STR <chr>
#> 

summarize_variables(data = test_data,
                    incl_num_calc = FALSE,
                    grouper = Group)
#> # A tibble: 6 x 8
#> # Groups:   Group [2]
#>   Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT
#>   <chr> <chr>    <int>          <int>    <int>        <int>       <int>
#> 1 Apple A            6              3        1            0           0
#> 2 Apple B            6              4        1            0           0
#> 3 Apple C            6              6        1            0           0
#> 4 Pear  A            4              3        1            0           0
#> 5 Pear  B            4              2        0            0           0
#> 6 Pear  C            4              3        0            0           0
#> # … with 1 more variable: DISTINCT_VALUES <chr>


observation_count(data = test_data)
#> # A tibble: 10 x 5
#>    Group     A     B     C     n
#>    <chr> <int> <int> <dbl> <int>
#>  1 Apple     1     5  6.02     1
#>  2 Apple     1     6  6.08     1
#>  3 Apple     1     6 NA        1
#>  4 Apple     1    NA  6.07     1
#>  5 Apple     2     4  6.03     1
#>  6 Apple    NA     5  6.09     1
#>  7 Pear      2     6  6.04     1
#>  8 Pear      3     4  6.04     1
#>  9 Pear      3     6  6.09     1
#> 10 Pear     NA     6  6.01     1


value_count(data = test_data)
#> # A tibble: 18 x 3
#>    Variable Value     n
#>    <chr>    <chr> <int>
#>  1 Group    Apple     6
#>  2 B        6         5
#>  3 A        1         4
#>  4 Group    Pear      4
#>  5 A        2         2
#>  6 A        3         2
#>  7 A        NA        2
#>  8 B        4         2
#>  9 B        5         2
#> 10 C        6.04      2
#> 11 C        6.09      2
#> 12 B        NA        1
#> 13 C        6.01      1
#> 14 C        6.02      1
#> 15 C        6.03      1
#> 16 C        6.07      1
#> 17 C        6.08      1
#> 18 C        NA        1
value_count(data = test_data,
            grouper = Group)
#> # A tibble: 21 x 4
#>    Group Variable Value     n
#>    <chr> <chr>    <chr> <int>
#>  1 Apple A        1         4
#>  2 Pear  B        6         3
#>  3 Apple B        5         2
#>  4 Apple B        6         2
#>  5 Pear  A        3         2
#>  6 Pear  C        6.04      2
#>  7 Apple A        2         1
#>  8 Apple A        NA        1
#>  9 Apple B        4         1
#> 10 Apple B        NA        1
#> # … with 11 more rows

Arguments

See also

Examples