From 97915db1bd4f29539b75b97009df2a930ed28c53 Mon Sep 17 00:00:00 2001 From: b_falquet Date: Mon, 29 Sep 2025 15:43:04 +0200 Subject: [PATCH 1/7] add new `mode` argument --- DESCRIPTION | 2 +- NEWS.md | 2 ++ R/filter.R | 26 ++++++++++++++++---------- R/reformat.R | 4 ++++ man/get_arg.Rd | 2 +- man/log_filter.Rd | 10 ++++++---- man/reformat.Rd | 3 ++- tests/testthat/test-filter.R | 17 +++++++++++++++++ 8 files changed, 49 insertions(+), 17 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9b8dcc8f..6e9a1418 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,4 +50,4 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/NEWS.md b/NEWS.md index 871bc4b7..9879da7e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # dunlin 0.1.11.9000 +* New `mode` argument in `log_filter` determining whether all tables should be filtered to retain only the rows whose identifiers are present in the filtered data set selected with the `table` argument. (Previously, the behavior was triggered automatically when the `table` name was `adsl`) + # dunlin 0.1.11 * Added `subject_level_flag()` function. diff --git a/R/filter.R b/R/filter.R index 7543cad0..f5069b49 100644 --- a/R/filter.R +++ b/R/filter.R @@ -5,11 +5,10 @@ #' @param ... further arguments to be passed to or from other methods. #' @returns a `data.frame` or `list` of `data.frame` filtered for the provided conditions. #' @details -#' `log_filter` will filter the data/named list of data according to the `condition`. +#' `log_filter` will filter the `data.frame` /named list of `data.frame` according to the `condition`. #' All the variables in `condition` must exist in the data (as variables) or in the parent #' frame(e.g., in global environment). -#' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all -#' other datasets with `USUBJID`. +#' For named list of `data.frame`, #' @export log_filter <- function(data, condition, ...) { UseMethod("log_filter") @@ -44,29 +43,36 @@ log_filter.data.frame <- function(data, condition, suffix = NULL, ...) { #' @rdname log_filter #' @param table (`string`) table name. -#' @param by (`character`) variable names shared by `adsl` and other datasets for filtering. +#' @param by (`character`) variable names shared by `table` and other datasets for filtering when `mode == "all"`. #' @param verbose (`flag`) whether to print a report about the filtering. +#' @param mode (`string`) one of `all` or `unique` whether the other tables should be filtered based on the rows retained in `table`. Default value is `"all"` is `table == "adsl"` and `"unique"` otherwise. #' @export #' @examples #' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0)) -log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, ...) { +log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, mode = ifelse(table == "adsl", "all", "unique"), ...) { checkmate::assert_list(data, types = "data.frame", names = "unique") assert_all_tablenames(data, table) checkmate::assert_names(colnames(data[[table]]), must.include = by) + checkmate::assert_character(by, null.ok = TRUE) + checkmate::assert_string(suffix, null.ok = TRUE) + checkmate::assert_flag(verbose) + checkmate::assert_subset(mode, c("all", "unique")) + condition <- match.call()$condition data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix)))) - if (identical(table, "adsl")) { - for (k in setdiff(names(data), "adsl")) { + + if (mode == "all") { + for (k in setdiff(names(data), table)) { if (all(by %in% names(data[[k]]))) { - if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl)) + if (length(by) == 0) by <- intersect(names(data[[k]]), names(data[[table]])) ori_n <- nrow(data[[k]]) ori_att <- attr(data[[k]], "rows") - data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by) + data[[k]] <- dplyr::semi_join(data[[k]], data[[table]], by = by) rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix)) - names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "") + names(rows) <- paste0(sprintf("Filtered by %s: ", table), deparse(condition), collapse = "") attr(data[[k]], "rows") <- c(ori_att, rows) } } diff --git a/R/reformat.R b/R/reformat.R index 278e4109..41a27269 100644 --- a/R/reformat.R +++ b/R/reformat.R @@ -1,4 +1,8 @@ #' Reformat Values +#' +#' Replaces substitute values in `vectors` or `list` of `data.frame` using used defined [`rule`]. +#' See \code{\link{vignette}("Reformatting", package = "dunlin")} for a detailed guide on using this function. +#' #' @param obj (`character`, `factor` or `list of data.frame`) to reformat. #' @param format (`rule`) or (`list`) of `rule` depending on the class of obj. #' @param ... for compatibility between methods and pass additional special mapping to transform rules. diff --git a/man/get_arg.Rd b/man/get_arg.Rd index d180dcc8..7cede776 100644 --- a/man/get_arg.Rd +++ b/man/get_arg.Rd @@ -23,7 +23,7 @@ if defined, the value of the option (\code{opt}), a \code{character} from the en Getting Argument From System, Option or Default } \examples{ -\dontshow{if (require("withr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("withr")) withAutoprint(\{ # examplesIf} get_arg("my.option", "MY_ARG", "default") withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default")) withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default")) diff --git a/man/log_filter.Rd b/man/log_filter.Rd index 015c2b8c..0ca342ed 100644 --- a/man/log_filter.Rd +++ b/man/log_filter.Rd @@ -17,6 +17,7 @@ log_filter(data, condition, ...) by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, + mode = ifelse(table == "adsl", "all", "unique"), ... ) } @@ -31,9 +32,11 @@ log_filter(data, condition, ...) \item{table}{(\code{string}) table name.} -\item{by}{(\code{character}) variable names shared by \code{adsl} and other datasets for filtering.} +\item{by}{(\code{character}) variable names shared by \code{table} and other datasets for filtering when \code{mode == "all"}.} \item{verbose}{(\code{flag}) whether to print a report about the filtering.} + +\item{mode}{(\code{string}) one of \code{all} or \code{unique} whether the other tables should be filtered based on the rows retained in \code{table}. Default value is \code{"all"} is \code{table == "adsl"} and \code{"unique"} otherwise.} } \value{ a \code{data.frame} or \code{list} of \code{data.frame} filtered for the provided conditions. @@ -42,11 +45,10 @@ a \code{data.frame} or \code{list} of \code{data.frame} filtered for the provide Filter Data with Log } \details{ -\code{log_filter} will filter the data/named list of data according to the \code{condition}. +\code{log_filter} will filter the \code{data.frame} /named list of \code{data.frame} according to the \code{condition}. All the variables in \code{condition} must exist in the data (as variables) or in the parent frame(e.g., in global environment). -For named list of data, if \code{ADSL} is available, \code{log_filter} will also try to subset all -other datasets with \code{USUBJID}. +For named list of \code{data.frame}, } \examples{ data <- iris diff --git a/man/reformat.Rd b/man/reformat.Rd index 99f1e366..1026cd6e 100644 --- a/man/reformat.Rd +++ b/man/reformat.Rd @@ -44,7 +44,8 @@ the rule. (\code{character}, \code{factor} or \verb{list of data.frame}) with remapped values. } \description{ -Reformat Values +Replaces substitute values in \code{vectors} or \code{list} of \code{data.frame} using used defined \code{\link{rule}}. +See \code{\link{vignette}("Reformatting", package = "dunlin")} for a detailed guide on using this function. } \note{ When the rule is empty rule or when values subject to reformatting are absent from the object, no error is diff --git a/tests/testthat/test-filter.R b/tests/testthat/test-filter.R index 06667206..03589994 100644 --- a/tests/testthat/test-filter.R +++ b/tests/testthat/test-filter.R @@ -145,6 +145,23 @@ test_that("log_filter works with long conditions", { expect_identical(df1, df2, ignore_attr = TRUE) }) +test_that("log_filters works with custom `mode` argument", { + dfa <- data.frame(USUBJID = letters[5:14], b = 1:10) + dfb <- data.frame(USUBJID = letters[1:10], c = 1:10) + + attr(dfa$USUBJID, "label") <- "usubjid_dfa" + attr(dfb$USUBJID, "label") <- "usubjid_dfb" + + df_raw <- list(adsl = dfa, dfb = dfb) + res <- expect_silent(log_filter(df_raw, c >= 7, "dfb", by = "USUBJID", mode = "all")) + expect_equal(nrow(res$dfb), 4) + expect_equal(nrow(res$adsl), 4) + + res <- expect_silent(log_filter(df_raw, c >= 7, "dfb", by = "USUBJID", mode = "unique")) + expect_equal(nrow(res$dfb), 4) + expect_equal(nrow(res$adsl), 10) +}) + # get_log ---- test_that("get_log works as expected", { From d18a93b21f47c0b799a5ce2d3bb8a38a056d3837 Mon Sep 17 00:00:00 2001 From: b_falquet Date: Wed, 1 Oct 2025 15:02:49 +0200 Subject: [PATCH 2/7] update doc link --- R/reformat.R | 2 +- man/reformat.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/reformat.R b/R/reformat.R index 41a27269..5d6d7809 100644 --- a/R/reformat.R +++ b/R/reformat.R @@ -1,7 +1,7 @@ #' Reformat Values #' #' Replaces substitute values in `vectors` or `list` of `data.frame` using used defined [`rule`]. -#' See \code{\link{vignette}("Reformatting", package = "dunlin")} for a detailed guide on using this function. +#' See `vignette("Reformatting", package = "dunlin")` for a detailed guide on using this function. #' #' @param obj (`character`, `factor` or `list of data.frame`) to reformat. #' @param format (`rule`) or (`list`) of `rule` depending on the class of obj. diff --git a/man/reformat.Rd b/man/reformat.Rd index 1026cd6e..4680081c 100644 --- a/man/reformat.Rd +++ b/man/reformat.Rd @@ -45,7 +45,7 @@ the rule. } \description{ Replaces substitute values in \code{vectors} or \code{list} of \code{data.frame} using used defined \code{\link{rule}}. -See \code{\link{vignette}("Reformatting", package = "dunlin")} for a detailed guide on using this function. +See \code{vignette("Reformatting", package = "dunlin")} for a detailed guide on using this function. } \note{ When the rule is empty rule or when values subject to reformatting are absent from the object, no error is From fa6442ad48d2ca44840f5eb6f4e3711429d8a54a Mon Sep 17 00:00:00 2001 From: b_falquet Date: Thu, 2 Oct 2025 09:50:14 +0200 Subject: [PATCH 3/7] update documentation --- R/filter.R | 20 +++++++++++++++----- man/log_filter.Rd | 5 +++-- tests/testthat/test-filter.R | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/R/filter.R b/R/filter.R index f5069b49..f257a578 100644 --- a/R/filter.R +++ b/R/filter.R @@ -8,7 +8,8 @@ #' `log_filter` will filter the `data.frame` /named list of `data.frame` according to the `condition`. #' All the variables in `condition` must exist in the data (as variables) or in the parent #' frame(e.g., in global environment). -#' For named list of `data.frame`, +#' For a named list of `data.frame`, set `mode = "all"`` to filter other tables by keys retained +#' in table (using by), or `mode = "unique"` to leave other tables unchanged. #' @export log_filter <- function(data, condition, ...) { UseMethod("log_filter") @@ -45,23 +46,32 @@ log_filter.data.frame <- function(data, condition, suffix = NULL, ...) { #' @param table (`string`) table name. #' @param by (`character`) variable names shared by `table` and other datasets for filtering when `mode == "all"`. #' @param verbose (`flag`) whether to print a report about the filtering. -#' @param mode (`string`) one of `all` or `unique` whether the other tables should be filtered based on the rows retained in `table`. Default value is `"all"` is `table == "adsl"` and `"unique"` otherwise. +#' @param mode (`string`) one of `all` or `unique` whether the other tables should be filtered based on the rows retained in `table`. +#' Default value is `"all"` is `table == "adsl"` and `"unique"` otherwise. #' @export #' @examples #' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0)) -log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, mode = ifelse(table == "adsl", "all", "unique"), ...) { +log_filter.list <- function(data, + condition, + table, + by = c("USUBJID", "STUDYID"), + suffix = NULL, + verbose = FALSE, + mode = ifelse(table == "adsl", "all", "unique"), + ...) { checkmate::assert_list(data, types = "data.frame", names = "unique") assert_all_tablenames(data, table) checkmate::assert_names(colnames(data[[table]]), must.include = by) - checkmate::assert_character(by, null.ok = TRUE) checkmate::assert_string(suffix, null.ok = TRUE) checkmate::assert_flag(verbose) - checkmate::assert_subset(mode, c("all", "unique")) + checkmate::assert_choice(mode, c("all", "unique")) condition <- match.call()$condition data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix)))) if (mode == "all") { + checkmate::assert_character(by, null.ok = TRUE) + for (k in setdiff(names(data), table)) { if (all(by %in% names(data[[k]]))) { if (length(by) == 0) by <- intersect(names(data[[k]]), names(data[[table]])) diff --git a/man/log_filter.Rd b/man/log_filter.Rd index 0ca342ed..cd67ecde 100644 --- a/man/log_filter.Rd +++ b/man/log_filter.Rd @@ -36,7 +36,8 @@ log_filter(data, condition, ...) \item{verbose}{(\code{flag}) whether to print a report about the filtering.} -\item{mode}{(\code{string}) one of \code{all} or \code{unique} whether the other tables should be filtered based on the rows retained in \code{table}. Default value is \code{"all"} is \code{table == "adsl"} and \code{"unique"} otherwise.} +\item{mode}{(\code{string}) one of \code{all} or \code{unique} whether the other tables should be filtered based on the rows retained in \code{table}. +Default value is \code{"all"} is \code{table == "adsl"} and \code{"unique"} otherwise.} } \value{ a \code{data.frame} or \code{list} of \code{data.frame} filtered for the provided conditions. @@ -48,7 +49,7 @@ Filter Data with Log \code{log_filter} will filter the \code{data.frame} /named list of \code{data.frame} according to the \code{condition}. All the variables in \code{condition} must exist in the data (as variables) or in the parent frame(e.g., in global environment). -For named list of \code{data.frame}, +For a named list of \code{data.frame}, set \verb{mode = "all"`` to filter other tables by keys retained in table (using by), or }mode = "unique"` to leave other tables unchanged. } \examples{ data <- iris diff --git a/tests/testthat/test-filter.R b/tests/testthat/test-filter.R index 03589994..482c0968 100644 --- a/tests/testthat/test-filter.R +++ b/tests/testthat/test-filter.R @@ -145,7 +145,7 @@ test_that("log_filter works with long conditions", { expect_identical(df1, df2, ignore_attr = TRUE) }) -test_that("log_filters works with custom `mode` argument", { +test_that("log_filter works with custom `mode` argument", { dfa <- data.frame(USUBJID = letters[5:14], b = 1:10) dfb <- data.frame(USUBJID = letters[1:10], c = 1:10) From bbdbb67399eb9c22b5f8e0df764f3b71bd6ba13a Mon Sep 17 00:00:00 2001 From: b_falquet Date: Thu, 2 Oct 2025 09:55:25 +0200 Subject: [PATCH 4/7] correct typo --- R/reformat.R | 2 +- man/reformat.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/reformat.R b/R/reformat.R index 5d6d7809..51e80898 100644 --- a/R/reformat.R +++ b/R/reformat.R @@ -1,6 +1,6 @@ #' Reformat Values #' -#' Replaces substitute values in `vectors` or `list` of `data.frame` using used defined [`rule`]. +#' Replaces values in `vectors` or `list` of `data.frame` using used-defined [`rule`] or list of [`rule`]. #' See `vignette("Reformatting", package = "dunlin")` for a detailed guide on using this function. #' #' @param obj (`character`, `factor` or `list of data.frame`) to reformat. diff --git a/man/reformat.Rd b/man/reformat.Rd index 4680081c..48c562ff 100644 --- a/man/reformat.Rd +++ b/man/reformat.Rd @@ -44,7 +44,7 @@ the rule. (\code{character}, \code{factor} or \verb{list of data.frame}) with remapped values. } \description{ -Replaces substitute values in \code{vectors} or \code{list} of \code{data.frame} using used defined \code{\link{rule}}. +Replaces values in \code{vectors} or \code{list} of \code{data.frame} using used-defined \code{\link{rule}} or list of \code{\link{rule}}. See \code{vignette("Reformatting", package = "dunlin")} for a detailed guide on using this function. } \note{ From 37d96e5fba578f87249ee68c8a9483d6d30340f3 Mon Sep 17 00:00:00 2001 From: b_falquet Date: Thu, 2 Oct 2025 10:32:56 +0200 Subject: [PATCH 5/7] linter --- R/filter.R | 3 ++- man/log_filter.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/filter.R b/R/filter.R index f257a578..77bad2bd 100644 --- a/R/filter.R +++ b/R/filter.R @@ -46,7 +46,8 @@ log_filter.data.frame <- function(data, condition, suffix = NULL, ...) { #' @param table (`string`) table name. #' @param by (`character`) variable names shared by `table` and other datasets for filtering when `mode == "all"`. #' @param verbose (`flag`) whether to print a report about the filtering. -#' @param mode (`string`) one of `all` or `unique` whether the other tables should be filtered based on the rows retained in `table`. +#' @param mode (`string`) one of `all` or `unique` whether the other tables should be filtered based on the rows +#' retained in `table`. #' Default value is `"all"` is `table == "adsl"` and `"unique"` otherwise. #' @export #' @examples diff --git a/man/log_filter.Rd b/man/log_filter.Rd index cd67ecde..04bd9f78 100644 --- a/man/log_filter.Rd +++ b/man/log_filter.Rd @@ -36,7 +36,8 @@ log_filter(data, condition, ...) \item{verbose}{(\code{flag}) whether to print a report about the filtering.} -\item{mode}{(\code{string}) one of \code{all} or \code{unique} whether the other tables should be filtered based on the rows retained in \code{table}. +\item{mode}{(\code{string}) one of \code{all} or \code{unique} whether the other tables should be filtered based on the rows +retained in \code{table}. Default value is \code{"all"} is \code{table == "adsl"} and \code{"unique"} otherwise.} } \value{ From 3e4d55d5f22f2a6d860822291a799a93e80de5de Mon Sep 17 00:00:00 2001 From: b_falquet Date: Mon, 6 Oct 2025 16:39:08 +0200 Subject: [PATCH 6/7] Davide's suggestion --- R/filter.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/filter.R b/R/filter.R index 77bad2bd..81ee11a7 100644 --- a/R/filter.R +++ b/R/filter.R @@ -3,13 +3,16 @@ #' @param condition (`call`) of subset condition. Must evaluate as logical. #' @param suffix (`string`) optional argument describing the filter. #' @param ... further arguments to be passed to or from other methods. +#' #' @returns a `data.frame` or `list` of `data.frame` filtered for the provided conditions. +#' #' @details #' `log_filter` will filter the `data.frame` /named list of `data.frame` according to the `condition`. #' All the variables in `condition` must exist in the data (as variables) or in the parent #' frame(e.g., in global environment). #' For a named list of `data.frame`, set `mode = "all"`` to filter other tables by keys retained #' in table (using by), or `mode = "unique"` to leave other tables unchanged. +#' #' @export log_filter <- function(data, condition, ...) { UseMethod("log_filter") From 1042bfc1f8968d77f299247b0312ccb24d318ecc Mon Sep 17 00:00:00 2001 From: b_falquet Date: Mon, 6 Oct 2025 16:43:33 +0200 Subject: [PATCH 7/7] styler --- R/filter.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/filter.R b/R/filter.R index 81ee11a7..c1053abd 100644 --- a/R/filter.R +++ b/R/filter.R @@ -3,16 +3,16 @@ #' @param condition (`call`) of subset condition. Must evaluate as logical. #' @param suffix (`string`) optional argument describing the filter. #' @param ... further arguments to be passed to or from other methods. -#' +#' #' @returns a `data.frame` or `list` of `data.frame` filtered for the provided conditions. -#' +#' #' @details #' `log_filter` will filter the `data.frame` /named list of `data.frame` according to the `condition`. #' All the variables in `condition` must exist in the data (as variables) or in the parent #' frame(e.g., in global environment). #' For a named list of `data.frame`, set `mode = "all"`` to filter other tables by keys retained #' in table (using by), or `mode = "unique"` to leave other tables unchanged. -#' +#' #' @export log_filter <- function(data, condition, ...) { UseMethod("log_filter")