Title: | Implement the Group Walk Algorithm |
---|---|
Description: | A procedure that uses target-decoy competition (or knockoffs) to reject multiple hypotheses in the presence of group structure. The procedure controls the false discovery rate (FDR) at a user-specified threshold. |
Authors: | Jack Freestone [aut, cre, cph], Uri Keich [aut, cph] |
Maintainer: | Jack Freestone <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.2 |
Built: | 2025-03-04 04:47:31 UTC |
Source: | https://github.com/freejstone/groupwalk |
This function returns a list of q-values corresponding to hypotheses that have been partitioned into groups. For FDR control, users should report the target-hypotheses with q-values less than or equal to their choice of threshold, alpha. For further details about how group-walk works, see: https://www.biorxiv.org/content/10.1101/2022.01.30.478144v1
group_walk( winning_scores, labels, all_group_ids, K = 40, return_frontier = FALSE, correction = 1 )
group_walk( winning_scores, labels, all_group_ids, K = 40, return_frontier = FALSE, correction = 1 )
winning_scores |
A numerical vector of winning scores generated from the target-decoy competitions for each hypothesis. |
labels |
A vector of winning labels indicating whether it was a target (= 1) or a decoy (!= 1) for each hypothesis. |
all_group_ids |
A vector of group IDs associated to each hypothesis (can be recorded as integers, factors, characters). |
K |
A window size parameter (integer). |
return_frontier |
A boolean indicating whether the function should return the complete sequence of frontiers. |
correction |
A correction factor used to in the numerator of the estimated false discovery rate (FDR) (Use 1 for FDR control). |
A sequence of q-values for each hypothesis. If return_frontier = T, additionally the sequence of frontiers will be returned.
create_uncalibrated_hypotheses <- function(m_vec, pi_0_vec, mus, sds) { total <- sum(m_vec) g_total <- length(m_vec) data <- matrix(0, ncol = 4, nrow = total) for (g in 1:length(m_vec)){ m <- m_vec[g] pi_0 <- pi_0_vec[g] mu <- mus[g] sd <- sds[g] if (g == 1) { start <- 0 } else { start <- sum(m_vec[1:(g - 1)]) } targets_nonnull <- rnorm(floor(m*pi_0), mean = mu, sd = sd) targets_null <- rnorm(m - floor(m*pi_0), mean = 0, sd = 1) decoys <- rnorm(m, mean = 0, sd = 1) targets <- c(targets_nonnull, targets_null) W <- pmax(targets, decoys) data[(start + 1):(start + m), 1] <- W data[(start + 1):(start + m), 2] <- g decoy_inds <- which(decoys > targets) inc_native_inds <- (which(targets_null > decoys[(floor(m*pi_0) + 1):m])) + floor(m*pi_0) X <- rep(0, m) X[decoy_inds] <- -1 X[inc_native_inds] <- 1 Y <- X X[X == 0] <- 1 data[(start + 1):(start + m), 3] <- Y data[(start + 1):(start + m), 4] <- X } return(data) } data <- create_uncalibrated_hypotheses(m_vec = rep(1000, 3), pi_0_vec = rep(0.6, 3), mus = c(2.5, 3, 3.5), sds = rep(1, 3)) winning_scores <- data[, 1] all_group_ids <- data[, 2] labels <- data[, 4] q_vals <- group_walk(winning_scores, labels, all_group_ids)
create_uncalibrated_hypotheses <- function(m_vec, pi_0_vec, mus, sds) { total <- sum(m_vec) g_total <- length(m_vec) data <- matrix(0, ncol = 4, nrow = total) for (g in 1:length(m_vec)){ m <- m_vec[g] pi_0 <- pi_0_vec[g] mu <- mus[g] sd <- sds[g] if (g == 1) { start <- 0 } else { start <- sum(m_vec[1:(g - 1)]) } targets_nonnull <- rnorm(floor(m*pi_0), mean = mu, sd = sd) targets_null <- rnorm(m - floor(m*pi_0), mean = 0, sd = 1) decoys <- rnorm(m, mean = 0, sd = 1) targets <- c(targets_nonnull, targets_null) W <- pmax(targets, decoys) data[(start + 1):(start + m), 1] <- W data[(start + 1):(start + m), 2] <- g decoy_inds <- which(decoys > targets) inc_native_inds <- (which(targets_null > decoys[(floor(m*pi_0) + 1):m])) + floor(m*pi_0) X <- rep(0, m) X[decoy_inds] <- -1 X[inc_native_inds] <- 1 Y <- X X[X == 0] <- 1 data[(start + 1):(start + m), 3] <- Y data[(start + 1):(start + m), 4] <- X } return(data) } data <- create_uncalibrated_hypotheses(m_vec = rep(1000, 3), pi_0_vec = rep(0.6, 3), mus = c(2.5, 3, 3.5), sds = rep(1, 3)) winning_scores <- data[, 1] all_group_ids <- data[, 2] labels <- data[, 4] q_vals <- group_walk(winning_scores, labels, all_group_ids)