vignettes/explore-vocabulary-relationships-and-ancestors.Rmd
explore-vocabulary-relationships-and-ancestors.Rmd
There are 3 different types of relationships represented in the Concept Relationship table depending on whether they are hierarchical and if they define ancestry.
relationship_types <-
queryAthena(sql_statement =
"
SELECT DISTINCT
is_hierarchical,
defines_ancestry
FROM omop_vocabulary.relationship
")
#> [2021-05-26 16:09:43] Connected to 'athena'
#> [2021-05-26 16:09:43] Loading Cache...
#> [2021-05-26 16:09:43] Cached SQL:
#> SELECT DISTINCT
#> is_hierarchical,
#> defines_ancestry
#> FROM omop_vocabulary.relationship
#>
#> [2021-05-26 16:09:43] Cached resultset found...
#> [2021-05-26 16:09:43] Postgres connection to 'athena' closed
relationship_types
#> # A tibble: 4 x 2
#> is_hierarchical defines_ancestry
#> <chr> <chr>
#> 1 1 0
#> 2 0 1
#> 3 1 1
#> 4 0 0
The valid relationship types amongst each of the most commonly employed target controlled vocabularies used in OMOP are derived.
target_vocabularies <- c("SNOMED", "LOINC",
"RxNorm", "RxNorm Extension",
"HemOnc", "ATC")
SELECT DISTINCT
c.vocabulary_id,
c.concept_class_id AS concept_class_id_1,
c2.concept_class_id AS concept_class_id_2,
r.is_hierarchical,
r.defines_ancestry
FROM omop_vocabulary.concept c
INNER JOIN omop_vocabulary.concept_relationship cr
ON cr.concept_id_1 = c.concept_id
INNER JOIN omop_vocabulary.concept c2
ON cr.concept_id_2 = c2.concept_id
LEFT JOIN omop_vocabulary.relationship r
ON r.relationship_id = cr.relationship_id
WHERE
c.invalid_reason IS NULL
AND c.vocabulary_id IN ('@target_vocabulary')
AND c2.invalid_reason IS NULL
AND c2.vocabulary_id IN ('@target_vocabulary')
AND cr.invalid_reason IS NULL;
output <- list()
for (i in seq_along(target_vocabularies)) {
target_vocabulary <- target_vocabularies[i]
sql_statement <-
SqlRender::render(
"
SELECT DISTINCT
c.vocabulary_id,
c.concept_class_id AS concept_class_id_1,
c2.concept_class_id AS concept_class_id_2,
r.is_hierarchical,
r.defines_ancestry
FROM omop_vocabulary.concept c
INNER JOIN omop_vocabulary.concept_relationship cr
ON cr.concept_id_1 = c.concept_id
INNER JOIN omop_vocabulary.concept c2
ON cr.concept_id_2 = c2.concept_id
LEFT JOIN omop_vocabulary.relationship r
ON r.relationship_id = cr.relationship_id
WHERE
c.invalid_reason IS NULL
AND c.vocabulary_id IN ('@target_vocabulary')
AND c2.invalid_reason IS NULL
AND c2.vocabulary_id IN ('@target_vocabulary')
AND cr.invalid_reason IS NULL;
",
target_vocabulary = target_vocabulary
)
output[[i]] <-
queryAthena(
sql_statement = sql_statement
)
}
#> [2021-05-26 15:34:20] Connected to 'athena'
#> [2021-05-26 15:34:20] Loading Cache...
#> [2021-05-26 15:34:20] Cached SQL:
#> SELECT DISTINCT
#> c.vocabulary_id,
#> c.concept_class_id AS concept_class_id_1,
#> c2.concept_class_id AS concept_class_id_2,
#> r.is_hierarchical,
#> r.defines_ancestry
#> FROM omop_vocabulary.concept c
#> INNER JOIN omop_vocabulary.concept_relationship cr
#> ON cr.concept_id_1 = c.concept_id
#> INNER JOIN omop_vocabulary.concept c2
#> ON cr.concept_id_2 = c2.concept_id
#> LEFT JOIN omop_vocabulary.relationship r
#> ON r.relationship_id = cr.relationship_id
#> WHERE
#> c.invalid_reason IS NULL
#> AND c.vocabulary_id IN ('SNOMED')
#> AND c2.invalid_reason IS NULL
#> AND c2.vocabulary_id IN ('SNOMED')
#> AND cr.invalid_reason IS NULL;
#>
#> [2021-05-26 15:34:20] Cached resultset found...
#> [2021-05-26 15:34:20] Postgres connection to 'athena' closed
#> [2021-05-26 15:34:20] Connected to 'athena'
#> [2021-05-26 15:34:20] Loading Cache...
#> [2021-05-26 15:34:20] Cached SQL:
#> SELECT DISTINCT
#> c.vocabulary_id,
#> c.concept_class_id AS concept_class_id_1,
#> c2.concept_class_id AS concept_class_id_2,
#> r.is_hierarchical,
#> r.defines_ancestry
#> FROM omop_vocabulary.concept c
#> INNER JOIN omop_vocabulary.concept_relationship cr
#> ON cr.concept_id_1 = c.concept_id
#> INNER JOIN omop_vocabulary.concept c2
#> ON cr.concept_id_2 = c2.concept_id
#> LEFT JOIN omop_vocabulary.relationship r
#> ON r.relationship_id = cr.relationship_id
#> WHERE
#> c.invalid_reason IS NULL
#> AND c.vocabulary_id IN ('LOINC')
#> AND c2.invalid_reason IS NULL
#> AND c2.vocabulary_id IN ('LOINC')
#> AND cr.invalid_reason IS NULL;
#>
#> [2021-05-26 15:34:20] Cached resultset found...
#> [2021-05-26 15:34:20] Postgres connection to 'athena' closed
#> [2021-05-26 15:34:20] Connected to 'athena'
#> [2021-05-26 15:34:20] Loading Cache...
#> [2021-05-26 15:34:20] Cached SQL:
#> SELECT DISTINCT
#> c.vocabulary_id,
#> c.concept_class_id AS concept_class_id_1,
#> c2.concept_class_id AS concept_class_id_2,
#> r.is_hierarchical,
#> r.defines_ancestry
#> FROM omop_vocabulary.concept c
#> INNER JOIN omop_vocabulary.concept_relationship cr
#> ON cr.concept_id_1 = c.concept_id
#> INNER JOIN omop_vocabulary.concept c2
#> ON cr.concept_id_2 = c2.concept_id
#> LEFT JOIN omop_vocabulary.relationship r
#> ON r.relationship_id = cr.relationship_id
#> WHERE
#> c.invalid_reason IS NULL
#> AND c.vocabulary_id IN ('RxNorm')
#> AND c2.invalid_reason IS NULL
#> AND c2.vocabulary_id IN ('RxNorm')
#> AND cr.invalid_reason IS NULL;
#>
#> [2021-05-26 15:34:20] Cached resultset found...
#> [2021-05-26 15:34:20] Postgres connection to 'athena' closed
#> [2021-05-26 15:34:20] Connected to 'athena'
#> [2021-05-26 15:34:20] Loading Cache...
#> [2021-05-26 15:34:20] Cached SQL:
#> SELECT DISTINCT
#> c.vocabulary_id,
#> c.concept_class_id AS concept_class_id_1,
#> c2.concept_class_id AS concept_class_id_2,
#> r.is_hierarchical,
#> r.defines_ancestry
#> FROM omop_vocabulary.concept c
#> INNER JOIN omop_vocabulary.concept_relationship cr
#> ON cr.concept_id_1 = c.concept_id
#> INNER JOIN omop_vocabulary.concept c2
#> ON cr.concept_id_2 = c2.concept_id
#> LEFT JOIN omop_vocabulary.relationship r
#> ON r.relationship_id = cr.relationship_id
#> WHERE
#> c.invalid_reason IS NULL
#> AND c.vocabulary_id IN ('RxNorm Extension')
#> AND c2.invalid_reason IS NULL
#> AND c2.vocabulary_id IN ('RxNorm Extension')
#> AND cr.invalid_reason IS NULL;
#>
#> [2021-05-26 15:34:20] No cached resultset found... querying Athena...
#> [2021-05-26 15:34:21] ✓ Open connection
#> [2021-05-26 15:34:21] ✓ JDBC connection
#> [2021-05-26 15:34:21] SQL: SELECT DISTINCT c.vocabulary_id, c.concept_class_id AS concept_class_id_1, c2.concept_class_id AS concept_class_id_2, r.is_hierarchical, r.defines_ancestry FROM omop_vocabulary.concept c INNER JOIN omop_vocabulary.concept_relationship cr ON cr.concept_id_1 = c.concept_id INNER JOIN omop_vocabulary.concept c2 ON cr.concept_id_2 = c2.concept_id LEFT JOIN omop_vocabulary.relationship r ON r.relationship_id = cr.relationship_id WHERE c.invalid_reason IS NULL AND c.vocabulary_id IN ('RxNorm Extension') AND c2.invalid_reason IS NULL AND c2.vocabulary_id IN ('RxNorm Extension') AND cr.invalid_reason IS NULL;
#> [2021-05-26 15:34:21] Querying...
#> [2021-05-26 15:36:58] Querying...complete
#> [2021-05-26 15:36:58] ✓ Returned data has more than 0 rows
#> [2021-05-26 15:36:58] Caching resultset...
#> [2021-05-26 15:36:58] Postgres connection to 'athena' closed
#> [2021-05-26 15:36:58] Connected to 'athena'
#> [2021-05-26 15:36:58] Loading Cache...
#> [2021-05-26 15:36:58] Cached SQL:
#> SELECT DISTINCT
#> c.vocabulary_id,
#> c.concept_class_id AS concept_class_id_1,
#> c2.concept_class_id AS concept_class_id_2,
#> r.is_hierarchical,
#> r.defines_ancestry
#> FROM omop_vocabulary.concept c
#> INNER JOIN omop_vocabulary.concept_relationship cr
#> ON cr.concept_id_1 = c.concept_id
#> INNER JOIN omop_vocabulary.concept c2
#> ON cr.concept_id_2 = c2.concept_id
#> LEFT JOIN omop_vocabulary.relationship r
#> ON r.relationship_id = cr.relationship_id
#> WHERE
#> c.invalid_reason IS NULL
#> AND c.vocabulary_id IN ('HemOnc')
#> AND c2.invalid_reason IS NULL
#> AND c2.vocabulary_id IN ('HemOnc')
#> AND cr.invalid_reason IS NULL;
#>
#> [2021-05-26 15:36:58] No cached resultset found... querying Athena...
#> [2021-05-26 15:36:59] ✓ Open connection
#> [2021-05-26 15:36:59] ✓ JDBC connection
#> [2021-05-26 15:36:59] SQL: SELECT DISTINCT c.vocabulary_id, c.concept_class_id AS concept_class_id_1, c2.concept_class_id AS concept_class_id_2, r.is_hierarchical, r.defines_ancestry FROM omop_vocabulary.concept c INNER JOIN omop_vocabulary.concept_relationship cr ON cr.concept_id_1 = c.concept_id INNER JOIN omop_vocabulary.concept c2 ON cr.concept_id_2 = c2.concept_id LEFT JOIN omop_vocabulary.relationship r ON r.relationship_id = cr.relationship_id WHERE c.invalid_reason IS NULL AND c.vocabulary_id IN ('HemOnc') AND c2.invalid_reason IS NULL AND c2.vocabulary_id IN ('HemOnc') AND cr.invalid_reason IS NULL;
#> [2021-05-26 15:36:59] Querying...
#> [2021-05-26 15:37:18] Querying...complete
#> [2021-05-26 15:37:18] ✓ Returned data has more than 0 rows
#> [2021-05-26 15:37:18] Caching resultset...
#> [2021-05-26 15:37:18] Postgres connection to 'athena' closed
#> [2021-05-26 15:37:18] Connected to 'athena'
#> [2021-05-26 15:37:18] Loading Cache...
#> [2021-05-26 15:37:18] Cached SQL:
#> SELECT DISTINCT
#> c.vocabulary_id,
#> c.concept_class_id AS concept_class_id_1,
#> c2.concept_class_id AS concept_class_id_2,
#> r.is_hierarchical,
#> r.defines_ancestry
#> FROM omop_vocabulary.concept c
#> INNER JOIN omop_vocabulary.concept_relationship cr
#> ON cr.concept_id_1 = c.concept_id
#> INNER JOIN omop_vocabulary.concept c2
#> ON cr.concept_id_2 = c2.concept_id
#> LEFT JOIN omop_vocabulary.relationship r
#> ON r.relationship_id = cr.relationship_id
#> WHERE
#> c.invalid_reason IS NULL
#> AND c.vocabulary_id IN ('ATC')
#> AND c2.invalid_reason IS NULL
#> AND c2.vocabulary_id IN ('ATC')
#> AND cr.invalid_reason IS NULL;
#>
#> [2021-05-26 15:37:18] No cached resultset found... querying Athena...
#> [2021-05-26 15:37:19] ✓ Open connection
#> [2021-05-26 15:37:19] ✓ JDBC connection
#> [2021-05-26 15:37:19] SQL: SELECT DISTINCT c.vocabulary_id, c.concept_class_id AS concept_class_id_1, c2.concept_class_id AS concept_class_id_2, r.is_hierarchical, r.defines_ancestry FROM omop_vocabulary.concept c INNER JOIN omop_vocabulary.concept_relationship cr ON cr.concept_id_1 = c.concept_id INNER JOIN omop_vocabulary.concept c2 ON cr.concept_id_2 = c2.concept_id LEFT JOIN omop_vocabulary.relationship r ON r.relationship_id = cr.relationship_id WHERE c.invalid_reason IS NULL AND c.vocabulary_id IN ('ATC') AND c2.invalid_reason IS NULL AND c2.vocabulary_id IN ('ATC') AND cr.invalid_reason IS NULL;
#> [2021-05-26 15:37:19] Querying...
#> [2021-05-26 15:37:34] Querying...complete
#> [2021-05-26 15:37:34] ✓ Returned data has more than 0 rows
#> [2021-05-26 15:37:34] Caching resultset...
#> [2021-05-26 15:37:34] Postgres connection to 'athena' closed