vignettes/taxonomy-and-relationships.Rmd
taxonomy-and-relationships.Rmd
3 types of relationships can be derived from the OMOP Vocabulary tables:
Here, I am using “[V] [S] 4115276 Non-small cell lung cancer [SNOMED 254637007] [Condition] [Clinical Finding]” as an example.
The ancestry of 4115276 is derived and ordered in a top-to-bottom format. The CONCEPT_ANCESTOR table provides 2 types of paths: the min_levels_of_separation
and max_levels_of_separation
that represents the minimum and maximum distances between the concepts within the hierarchy, respectively. Here, the minimum is used.
-- The ancestors are ordered from top-to-bottom
SELECT c.concept_name, ca.min_levels_of_separation
FROM omop_vocabulary.concept_ancestor ca
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = ca.ancestor_concept_id
WHERE
ca.descendant_concept_id = 4115276
AND c.invalid_reason IS NULL
ORDER BY min_levels_of_separation DESC
UNION
-- The descendants ordered inverse to maintain top-to-bottom format
SELECT c.concept_name, ca.min_levels_of_separation
FROM omop_vocabulary.concept_ancestor ca
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = ca.descendant_concept_id
WHERE ca.ancestor_concept_id = 4115276
AND c.invalid_reason IS NULL
ORDER BY min_levels_of_separation;
taxonomy <-
queryAthena(
"
WITH a AS (
SELECT c.concept_name, ca.min_levels_of_separation
FROM omop_vocabulary.concept_ancestor ca
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = ca.ancestor_concept_id
WHERE
ca.descendant_concept_id = 4115276
AND c.invalid_reason IS NULL
ORDER BY min_levels_of_separation DESC
),
d AS (
SELECT c.concept_name, ca.min_levels_of_separation
FROM omop_vocabulary.concept_ancestor ca
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = ca.descendant_concept_id
WHERE ca.ancestor_concept_id = 4115276
AND c.invalid_reason IS NULL
ORDER BY min_levels_of_separation
)
SELECT *
FROM a
UNION
SELECT *
FROM d
",
verbose = FALSE,
render_sql = FALSE
)
The hierarchy seen above can also be derived from the CONCEPT_RELATIONSHIP table by filtering for the Is a and Subsumes relationships.
SELECT relationship_id, c.*
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = cr.concept_id_2
WHERE
cr.concept_id_1 = 4115276 AND
cr.invalid_reason IS NULL AND
relationship_id IN
('Subsumes', 'Is a');
taxonomy_relationship <-
queryAthena(
"
SELECT relationship_id, c.*
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = cr.concept_id_2
WHERE
cr.concept_id_1 = 4115276 AND
cr.invalid_reason IS NULL AND
relationship_id IN
('Subsumes', 'Is a');
",
verbose = FALSE,
render_sql = FALSE
)
However, the CONCEPT_RELATIONSHIP table can only represent direct relationships that are 1 degree of separation between a concept. The CONCEPT_ANCESTOR table is used in situations where traversing the taxonomy beyond a single level is required.
The Maps to
and Mapped from
relationship can be used to either find the new concept assigned to a deprecated concept or the mapping between a non-standard vocabulary such as an ICD billing code to SNOMED.
For 4115276, a Mapped from relationship exists from MeSH, ICD-O-3, HemOnc, other SNOMED concepts, etc…. These relationships allows one to leverage existing codes in source systems to route them to the correct mapping in the OMOP CDM during an ETL.
SELECT relationship_id, c.*
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = cr.concept_id_2
WHERE
cr.concept_id_1 = 4115276 AND
cr.invalid_reason IS NULL AND
relationship_id IN
('Mapped from', 'Maps to');
df <-
queryAthena(
"
SELECT relationship_id, c.*
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = cr.concept_id_2
WHERE
cr.concept_id_1 = 4115276 AND
cr.invalid_reason IS NULL AND
relationship_id IN
('Mapped from', 'Maps to');
",
verbose = FALSE,
render_sql = FALSE
)
Another example of this type of relationship are mappings between NDC codes and RxNorm. Here, I find all the NDC concepts for acetaminophen.
SELECT c.*
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%acetaminophen%'
AND c.vocabulary_id = 'NDC'
AND c.invalid_reason IS NULL
;
df <-
queryAthena(
"
SELECT c.*
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%acetaminophen%'
AND c.vocabulary_id = 'NDC'
AND c.invalid_reason IS NULL
;
",
verbose = FALSE,
render_sql = FALSE
)
The CONCEPT_RELATIONSHIP table is used to map NDC codes to RxNorm, which is often the case in an ETL from a source drug orders table to the OMOP DRUG_EXPOSURES table.
SELECT
c1.concept_code AS ndc_code,
c1.concept_name AS ndc_drug,
cr.relationship_id,
c2.concept_code AS target_code,
c2.concept_name AS target_name,
c2.vocabulary_id AS target_vocabulary
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c1
ON c1.concept_id = cr.concept_id_1
LEFT JOIN omop_vocabulary.concept c2
ON c2.concept_id = cr.concept_id_2
WHERE cr.concept_id_1 IN (
SELECT DISTINCT c.concept_id
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%acetaminophen%'
AND c.vocabulary_id = 'NDC'
AND c.invalid_reason IS NULL
)
;
df <-
queryAthena(
"
SELECT
c1.concept_code AS ndc_code,
c1.concept_name AS ndc_drug,
cr.relationship_id,
c2.concept_code AS target_code,
c2.concept_name AS target_name,
c2.vocabulary_id AS target_vocabulary
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c1
ON c1.concept_id = cr.concept_id_1
LEFT JOIN omop_vocabulary.concept c2
ON c2.concept_id = cr.concept_id_2
WHERE cr.concept_id_1 IN (
SELECT DISTINCT c.concept_id
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%acetaminophen%'
AND c.vocabulary_id = 'NDC'
AND c.invalid_reason IS NULL
)
;
",
verbose = FALSE,
render_sql = FALSE
)
In addition to the use case above, concepts that are deprecated usually have a Maps to relationship to a valid replacement.
Since a search for NSCLC does not return any deprecated concepts, a search for diabetes is used to exemplify instead.
SELECT DISTINCT c.*
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%diabetes%'
AND c.invalid_reason IS NOT NULL;
df <-
queryAthena(
"
SELECT DISTINCT c.*
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%diabetes%'
AND c.invalid_reason IS NOT NULL;
",
verbose = FALSE,
render_sql = FALSE
)
For these deprecated diabetes concepts, the Maps to relationship exposes each of their valid counterparts.
SELECT
c1.concept_code AS deprecated_concept_code,
c1.concept_name AS deprecated_concept_name,
c1.vocabulary_id AS deprecated_vocabulary_id,
cr.relationship_id,
c2.concept_code,
c2.concept_name,
c2.vocabulary_id,
c2.invalid_reason
FROM omop_vocabulary.concept_relationship cr
INNER JOIN omop_vocabulary.concept c1
ON cr.concept_id_1 = c1.concept_id
INNER JOIN omop_vocabulary.concept c2
ON cr.concept_id_2 = c2.concept_id
WHERE
cr.concept_id_1 IN (
SELECT DISTINCT c.concept_id
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%diabetes%'
AND c.invalid_reason IS NOT NULL
)
AND cr.invalid_reason IS NULL
AND cr.relationship_id = 'Maps to'
;
df <-
queryAthena(
"
SELECT
c1.concept_code AS deprecated_concept_code,
c1.concept_name AS deprecated_concept_name,
c1.vocabulary_id AS deprecated_vocabulary_id,
cr.relationship_id,
c2.concept_code,
c2.concept_name,
c2.vocabulary_id,
c2.invalid_reason
FROM omop_vocabulary.concept_relationship cr
INNER JOIN omop_vocabulary.concept c1
ON cr.concept_id_1 = c1.concept_id
INNER JOIN omop_vocabulary.concept c2
ON cr.concept_id_2 = c2.concept_id
WHERE
cr.concept_id_1 IN (
SELECT DISTINCT c.concept_id
FROM omop_vocabulary.concept_synonym cs
INNER JOIN omop_vocabulary.concept c
ON c.concept_id = cs.concept_id
WHERE
cs.concept_synonym_name LIKE '%diabetes%'
AND c.invalid_reason IS NOT NULL
)
AND cr.invalid_reason IS NULL
AND cr.relationship_id = 'Maps to'
;
",
verbose = FALSE,
render_sql = FALSE
)
These relationships are all other relationships that are non-hierarchical, meaning that a corresponding entry would not be found in the CONCEPT_ANCESTOR table.
SELECT relationship_id, c.*
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = cr.concept_id_2
WHERE
cr.concept_id_1 = 4115276 AND
cr.invalid_reason IS NULL AND
relationship_id NOT IN
('Subsumes', 'Is a', 'Mapped from', 'Maps to');
df <-
queryAthena(
"
SELECT relationship_id, c.*
FROM omop_vocabulary.concept_relationship cr
LEFT JOIN omop_vocabulary.concept c
ON c.concept_id = cr.concept_id_2
WHERE
cr.concept_id_1 = 4115276 AND
cr.invalid_reason IS NULL AND
relationship_id NOT IN
('Subsumes', 'Is a', 'Mapped from', 'Maps to');
",
verbose = FALSE,
render_sql = FALSE
)