Summarize Functions
data | A dataframe or tibble. |
---|---|
grouper | (Optional) Group by column. |
names_to | Passed to `tidyr::pivot_longer()`. |
values_to | Passed to `tidyr::pivot_longer()`. |
na.rm | TRUE if true NA are to be removed. |
library(tidyverse) test_data <- tibble( Group = sample(c("Apple", "Pear"), size = 10, replace = TRUE), A = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE), C = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 10, replace = TRUE) ) summarize_variables(data = test_data, incl_num_calc = FALSE)#> # A tibble: 4 x 7 #> Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <int> <int> <int> <int> <int> #> 1 A 10 3 5 0 0 #> 2 B 10 3 3 0 0 #> 3 C 10 6 0 0 0 #> 4 Group 10 2 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr>#> $SUMMARY #> # A tibble: 4 x 7 #> Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <int> <int> <int> <int> <int> #> 1 A 10 3 5 0 0 #> 2 B 10 3 3 0 0 #> 3 C 10 6 0 0 0 #> 4 Group 10 2 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr> #> #> $NUMERIC_CALCULATIONS #> # A tibble: 3 x 17 #> Variable MEAN MEAN_NA MEDIAN MEDIAN_NA SD SD_NA MAX MAX_NA MIN #> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A 2.2 NA 3 NA 1.10 NA 3 NA 1 #> 2 B 5.43 NA 5 NA 0.535 NA 6 NA 5 #> 3 C 6.04 6.04 6.04 6.04 0.0200 0.0200 6.07 6.07 6.02 #> # … with 7 more variables: MIN_NA <dbl>, SUM <dbl>, SUM_NA <dbl>, #> # DISTINCT_LENGTH <int>, NA_LENGTH <int>, BLANK_LENGTH <int>, #> # DISTINCT_STR <chr> #>#> $SUMMARY #> # A tibble: 6 x 8 #> # Groups: Group [2] #> Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <chr> <int> <int> <int> <int> <int> #> 1 Apple A 6 3 3 0 0 #> 2 Apple B 6 3 3 0 0 #> 3 Apple C 6 5 0 0 0 #> 4 Pear A 4 2 2 0 0 #> 5 Pear B 4 2 0 0 0 #> 6 Pear C 4 4 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr> #> #> $NUMERIC_CALCULATIONS #> # A tibble: 6 x 18 #> # Groups: Group [2] #> Group Variable MEAN MEAN_NA MEDIAN MEDIAN_NA SD SD_NA MAX MAX_NA #> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 Apple A 1.67 NA 1 NA 1.15 NA 3 NA #> 2 Apple B 5.33 NA 5 NA 0.577 NA 6 NA #> 3 Apple C 6.04 6.04 6.04 6.04 0.0214 0.0214 6.07 6.07 #> 4 Pear A 3 NA 3 NA 0 NA 3 NA #> 5 Pear B 5.5 5.5 5.5 5.5 0.577 0.577 6 6 #> 6 Pear C 6.04 6.04 6.04 6.04 0.0208 0.0208 6.07 6.07 #> # … with 8 more variables: MIN <dbl>, MIN_NA <dbl>, SUM <dbl>, SUM_NA <dbl>, #> # DISTINCT_LENGTH <int>, NA_LENGTH <int>, BLANK_LENGTH <int>, #> # DISTINCT_STR <chr> #>#> # A tibble: 6 x 8 #> # Groups: Group [2] #> Group Variable COUNT DISTINCT_COUNT NA_COUNT NA_STR_COUNT BLANK_COUNT #> <chr> <chr> <int> <int> <int> <int> <int> #> 1 Apple A 6 3 3 0 0 #> 2 Apple B 6 3 3 0 0 #> 3 Apple C 6 5 0 0 0 #> 4 Pear A 4 2 2 0 0 #> 5 Pear B 4 2 0 0 0 #> 6 Pear C 4 4 0 0 0 #> # … with 1 more variable: DISTINCT_VALUES <chr>#> # A tibble: 10 x 5 #> Group A B C n #> <chr> <int> <int> <dbl> <int> #> 1 Apple 1 5 6.03 1 #> 2 Apple 1 NA 6.05 1 #> 3 Apple 3 5 6.02 1 #> 4 Apple NA 6 6.06 1 #> 5 Apple NA NA 6.02 1 #> 6 Apple NA NA 6.07 1 #> 7 Pear 3 5 6.07 1 #> 8 Pear 3 6 6.02 1 #> 9 Pear NA 5 6.05 1 #> 10 Pear NA 6 6.04 1#> # A tibble: 14 x 3 #> Variable Value n #> <chr> <chr> <int> #> 1 Group Apple 6 #> 2 A NA 5 #> 3 B 5 4 #> 4 Group Pear 4 #> 5 A 3 3 #> 6 B 6 3 #> 7 B NA 3 #> 8 C 6.02 3 #> 9 A 1 2 #> 10 C 6.05 2 #> 11 C 6.07 2 #> 12 C 6.03 1 #> 13 C 6.04 1 #> 14 C 6.06 1#> # A tibble: 19 x 4 #> Group Variable Value n #> <chr> <chr> <chr> <int> #> 1 Apple A NA 3 #> 2 Apple B NA 3 #> 3 Apple A 1 2 #> 4 Apple B 5 2 #> 5 Apple C 6.02 2 #> 6 Pear A 3 2 #> 7 Pear A NA 2 #> 8 Pear B 5 2 #> 9 Pear B 6 2 #> 10 Apple A 3 1 #> 11 Apple B 6 1 #> 12 Apple C 6.03 1 #> 13 Apple C 6.05 1 #> 14 Apple C 6.06 1 #> 15 Apple C 6.07 1 #> 16 Pear C 6.02 1 #> 17 Pear C 6.04 1 #> 18 Pear C 6.05 1 #> 19 Pear C 6.07 1