tidyOhdsiSolutions is a lightweight R package of
utilities for working with OMOP CDM data in the
OHDSI ecosystem. It is
intentionally dependency-light: the only hard runtime dependency beyond
base R is jsonlite.
The package provides four main capabilities:
| Area | What it does |
|---|---|
| Functional helpers | Base-R reimplementations of purrr functions
(map, walk, imap,
pluck, …) — no purrr dependency |
| Concept set builders | Convert plain data.frames into CIRCE concept set
expression lists |
| SQL generators | Build SQL to resolve concept sets against an OMOP vocabulary schema
— no Java / CirceR required |
| Cohort builders | Create CirceR-compatible cohort definition objects programmatically |
# install.packages("remotes")
remotes::install_github("<owner>/tidyOhdsiSolutions")library(tidyOhdsiSolutions)data.frame to a concept set expressionconcepts <- data.frame(
concept_id = c(201826L, 442793L),
concept_name = c("Type 2 diabetes mellitus", "Type 1 diabetes mellitus"),
domain_id = "Condition",
vocabulary_id = "SNOMED",
concept_class_id = "Clinical Finding",
standard_concept = "S",
concept_code = c("44054006", "46635009"),
invalid_reason = "V",
excluded = FALSE,
descendants = TRUE,
mapped = FALSE
)
cs_expr <- toConceptSet(concepts, name = "Diabetes")
str(cs_expr, max.level = 2)
#> List of 1
#> $ items:List of 2
#> ..$ :List of 4
#> ..$ :List of 4Multiple concept sets at once:
cs_list <- toConceptSets(
list(
diabetes = concepts,
hypertension = data.frame(concept_id = 316866L)
)
)
names(cs_list)
#> [1] "diabetes" "hypertension"sql <- buildConceptSetQuery(cs_expr, vocabularyDatabaseSchema = "cdm")
cat(sql)
#> select distinct I.concept_id FROM
#> (
#> select concept_id from cdm.CONCEPT where (concept_id in (201826,442793))
#> UNION
#> select c.concept_id
#> from cdm.CONCEPT c
#> join cdm.CONCEPT_ANCESTOR ca on c.concept_id = ca.descendant_concept_id
#> WHERE c.invalid_reason is null
#> and (ca.ancestor_concept_id in (201826,442793))
#> ) IResolve multiple concept sets at once:
sql_list <- buildConceptSetQueries(cs_list, vocabularyDatabaseSchema = "cdm")cohort <- createConceptSetCohort(
conceptSetExpression = cs_expr,
name = "Diabetes Cohort",
limit = "first",
requiredObservation = c(365L, 0L),
end = "observation_period_end_date"
)
# Serialise to CirceR-compatible JSON
json <- cohortToJson(cohort)
cat(substr(json, 1, 300))
#> {
#> "ConceptSets": [
#> {
#> "id": 0,
#> "name": "Diabetes Cohort",
#> "expression": {
#> "items": [
#> {
#> "concept": {
#> "CONCEPT_ID": 201826,
#> "CONCEPT_NAME": "Type 2 diabetes mellitus",
#> "STANDARD_CONCEPT": "S",
#> cohortFromConceptSet() accepts a named list of concept
set expressions and builds a single cohort with all of them:
drug_df <- data.frame(
concept_id = 1503297L,
concept_name = "Metformin",
domain_id = "Drug",
vocabulary_id = "RxNorm",
standard_concept = "S",
descendants = TRUE
)
multi_cs <- toConceptSets(list(
diabetes = concepts,
metformin = drug_df
))
multi_cohort <- cohortFromConceptSet(
conceptSetList = multi_cs,
limit = "earliest",
requiredObservation = c(365L, 0L),
end = "observation_period_end_date"
)
# Each concept set gets its own id
vapply(multi_cohort$ConceptSets, `[[`, character(1), "name")
#> [1] "diabetes" "metformin"# Continuous drug era
cohort_drug <- createConceptSetCohort(
cs_expr,
end = "drug_exit",
endArgs = list(persistenceWindow = 30, surveillanceWindow = 0)
)
# Fixed offset from index
cohort_fixed <- createConceptSetCohort(
cs_expr,
end = "fixed_exit",
endArgs = list(index = "startDate", offsetDays = 365)
)# cohort_def is a list produced by e.g. CirceR::cohortExpressionFromJson()
concept_sets <- collectCsFromCohort(cohort_def)
# Returns a named list keyed by lowerCamelCase concept set names# map / map_chr / map_dbl / map_int / map_lgl
tidyOhdsiSolutions:::map(1:4, ~ .x^2)
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 4
#>
#> [[3]]
#> [1] 9
#>
#> [[4]]
#> [1] 16
# map2
tidyOhdsiSolutions:::map2_chr(c("hello", "foo"), c("world", "bar"), paste)
#> hello foo
#> "hello world" "foo bar"
# pluck — safely extract from nested structures
nested <- list(a = list(b = list(c = 42)))
tidyOhdsiSolutions:::pluck(nested, "a", "b", "c")
#> [1] 42
tidyOhdsiSolutions:::pluck(nested, "a", "missing", .default = 0)
#> [1] 0
# walk — side-effects only, returns .x invisibly
tidyOhdsiSolutions:::walk(1:3, ~ message("item ", .x))
#> item 1
#> item 2
#> item 3
# imap — index-aware map
tidyOhdsiSolutions:::imap(c(a = 10, b = 20), ~ paste(.y, "=", .x))
#> $a
#> [1] "a = 10"
#>
#> $b
#> [1] "b = 20"createConceptSetCohort(),
cohortFromConceptSet(), and
buildConceptSetQuery() support the following domains:
Condition, Drug, Procedure,
Observation, Measurement, Visit,
Device
CirceR, or JVM is
required.purrr dependency — all functional
helpers are self-contained base-R wrappers; the API is intentionally
compatible with purrr so pipelines can be migrated with
minimal changes.toConceptSets() / toConceptSet() can resolve
concept metadata from an OMOP vocabulary schema when a
DatabaseConnector connection is supplied, but work fine
offline with data already in the input data.frame.