Preemptively select for columns that overlap between 2 dataframes before calling the `setdiff()` function.

Compare column names and positions for 2 dataframes.

setdiff_col_match(x, y)

compare_cols(x, y)

Arguments

x

A dataframe or tibble.

y

A dataframe or tibble.

See also

map,reduce select,reexports

map,reduce select,reexports

Other diff functions: setdiff_nrow()

Other diff functions: setdiff_nrow()

Examples

library(tidyverse) test_data <- tibble( Group = sample(c("Apple", "Pear"), size = 10, replace = TRUE), A = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE), C = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 10, replace = TRUE), D = sample(c(NA, TRUE, FALSE), size = 10, replace = TRUE) ) %>% dplyr::mutate(E = B) test_data2 <- dplyr::bind_rows(test_data[1:5,], tibble( Group = sample(c("Apple", "Pear"), size = 5, replace = TRUE), A = sample(c(NA_integer_, 1:3), size = 5, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 5, replace = TRUE), C = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 5, replace = TRUE), D = sample(c(NA, TRUE, FALSE), size = 5, replace = TRUE) ) %>% dplyr::mutate(E = B)) # Rows difference setdiff_nrow(x = test_data, y = test_data2)
#> [1] 5
setdiff_nrow(x = test_data2, y = test_data)
#> [1] 5
# Setdiff with a Column Match setdiff_col_match(x = test_data, y = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20))
#> # A tibble: 5 x 6 #> Group A B C D E #> <chr> <int> <int> <dbl> <lgl> <int> #> 1 Apple NA 5 6.03 FALSE 5 #> 2 Apple 3 6 6.06 NA 6 #> 3 Apple 2 4 6.03 TRUE 4 #> 4 Pear 3 5 6.06 NA 5 #> 5 Pear 3 5 NA NA 5
setdiff_col_match(x = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20), y = test_data)
#> # A tibble: 5 x 6 #> Group A B C D E #> <chr> <int> <int> <dbl> <lgl> <int> #> 1 Apple 2 6 NA TRUE 6 #> 2 Pear 3 5 6.05 TRUE 5 #> 3 Apple 1 NA 6.09 NA NA #> 4 Apple NA 4 6.08 FALSE 4 #> 5 Apple NA 6 6.01 FALSE 6
setdiff_col_match(x = test_data, y = test_data2 %>% rename(F = E))
#> # A tibble: 5 x 5 #> Group A B C D #> <chr> <int> <int> <dbl> <lgl> #> 1 Apple NA 5 6.03 FALSE #> 2 Apple 3 6 6.06 NA #> 3 Apple 2 4 6.03 TRUE #> 4 Pear 3 5 6.06 NA #> 5 Pear 3 5 NA NA
setdiff_col_match(y = test_data, x = test_data2 %>% rename(F = E))
#> # A tibble: 5 x 5 #> Group A B C D #> <chr> <int> <int> <dbl> <lgl> #> 1 Apple 2 6 NA TRUE #> 2 Pear 3 5 6.05 TRUE #> 3 Apple 1 NA 6.09 NA #> 4 Apple NA 4 6.08 FALSE #> 5 Apple NA 6 6.01 FALSE
# Compare Columns compare_cols(x = test_data, y = test_data2)
#> # A tibble: 6 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E 6 E
compare_cols(x = test_data, y = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20))
#> # A tibble: 8 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E 6 E #> 7 NA NA F 7 F #> 8 NA NA G 8 G
compare_cols(x = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20), y = test_data)
#> # A tibble: 8 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E 6 E #> 7 7 F F NA NA #> 8 8 G G NA NA
compare_cols(x = test_data, y = test_data2 %>% rename(F = E))
#> # A tibble: 7 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E NA NA #> 7 NA NA F 6 F
compare_cols(y = test_data, x = test_data2 %>% rename(F = E))
#> # A tibble: 7 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 F F NA NA #> 7 NA NA E 6 E
library(tidyverse) test_data <- tibble( Group = sample(c("Apple", "Pear"), size = 10, replace = TRUE), A = sample(c(NA_integer_, 1:3), size = 10, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 10, replace = TRUE), C = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 10, replace = TRUE), D = sample(c(NA, TRUE, FALSE), size = 10, replace = TRUE) ) %>% dplyr::mutate(E = B) test_data2 <- dplyr::bind_rows(test_data[1:5,], tibble( Group = sample(c("Apple", "Pear"), size = 5, replace = TRUE), A = sample(c(NA_integer_, 1:3), size = 5, replace = TRUE), B = sample(c(NA_integer_, 4:6), size = 5, replace = TRUE), C = sample(c(NA_real_, seq(from = 6.01, to = 6.09, by = 0.01)), size = 5, replace = TRUE), D = sample(c(NA, TRUE, FALSE), size = 5, replace = TRUE) ) %>% dplyr::mutate(E = B)) # Rows difference setdiff_nrow(x = test_data, y = test_data2)
#> [1] 5
setdiff_nrow(x = test_data2, y = test_data)
#> [1] 5
# Setdiff with a Column Match setdiff_col_match(x = test_data, y = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20))
#> # A tibble: 5 x 6 #> Group A B C D E #> <chr> <int> <int> <dbl> <lgl> <int> #> 1 Apple NA 5 6.07 NA 5 #> 2 Pear 1 NA 6.01 TRUE NA #> 3 Pear 1 6 NA TRUE 6 #> 4 Pear 3 4 6.02 NA 4 #> 5 Apple 1 4 6.07 FALSE 4
setdiff_col_match(x = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20), y = test_data)
#> # A tibble: 5 x 6 #> Group A B C D E #> <chr> <int> <int> <dbl> <lgl> <int> #> 1 Apple 2 4 6.02 NA 4 #> 2 Pear NA 4 6.07 FALSE 4 #> 3 Pear 1 5 6.03 FALSE 5 #> 4 Apple 2 4 NA TRUE 4 #> 5 Apple 1 5 6.06 TRUE 5
setdiff_col_match(x = test_data, y = test_data2 %>% rename(F = E))
#> # A tibble: 5 x 5 #> Group A B C D #> <chr> <int> <int> <dbl> <lgl> #> 1 Apple NA 5 6.07 NA #> 2 Pear 1 NA 6.01 TRUE #> 3 Pear 1 6 NA TRUE #> 4 Pear 3 4 6.02 NA #> 5 Apple 1 4 6.07 FALSE
setdiff_col_match(y = test_data, x = test_data2 %>% rename(F = E))
#> # A tibble: 5 x 5 #> Group A B C D #> <chr> <int> <int> <dbl> <lgl> #> 1 Apple 2 4 6.02 NA #> 2 Pear NA 4 6.07 FALSE #> 3 Pear 1 5 6.03 FALSE #> 4 Apple 2 4 NA TRUE #> 5 Apple 1 5 6.06 TRUE
# Compare Columns compare_cols(x = test_data, y = test_data2)
#> # A tibble: 6 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E 6 E
compare_cols(x = test_data, y = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20))
#> # A tibble: 8 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E 6 E #> 7 NA NA F 7 F #> 8 NA NA G 8 G
compare_cols(x = test_data2 %>% dplyr::mutate(F = 1:10) %>% dplyr::mutate(G = 11:20), y = test_data)
#> # A tibble: 8 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E 6 E #> 7 7 F F NA NA #> 8 8 G G NA NA
compare_cols(x = test_data, y = test_data2 %>% rename(F = E))
#> # A tibble: 7 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 E E NA NA #> 7 NA NA F 6 F
compare_cols(y = test_data, x = test_data2 %>% rename(F = E))
#> # A tibble: 7 x 5 #> position_x column_x x_and_y_columns position_y column_y #> <int> <chr> <chr> <int> <chr> #> 1 1 Group Group 1 Group #> 2 2 A A 2 A #> 3 3 B B 3 B #> 4 4 C C 4 C #> 5 5 D D 5 D #> 6 6 F F NA NA #> 7 NA NA E 6 E