Supervised Principal Components (SuperPC) for Cox regression with high-dimensional data. This method first screens features using univariate Cox regression, then applies PCA to selected features, and finally fits a Cox model using the principal components. Particularly effective for genomics data where many features are correlated.
Usage
superpc(
data,
time,
event,
features,
threshold = 0.1,
n_components = 5,
cv_folds = 10,
standardize = TRUE,
screening_method = "univariate_cox",
pca_method = "standard",
validation_method = "cv",
plot_screening = TRUE,
plot_pca = TRUE,
plot_survival = TRUE,
export_components = FALSE
)Arguments
- data
the data as a data frame
- time
survival time variable
- event
event indicator (1=event, 0=censored)
- features
high-dimensional feature variables (e.g., gene expression)
- threshold
p-value threshold for initial feature screening
- n_components
number of principal components to extract
- cv_folds
number of folds for cross-validation
- standardize
standardize features before analysis
- screening_method
method for initial feature screening
- pca_method
principal component analysis method
- validation_method
model validation approach
- plot_screening
display feature screening results
- plot_pca
display principal component analysis plots
- plot_survival
display survival curves by risk groups
- export_components
export principal component scores to data
Value
A results object containing:
results$instructions | a html | ||||
results$analysis_summary | a table | ||||
results$feature_screening | a table | ||||
results$principal_components | a table | ||||
results$component_loadings | a table | ||||
results$model_performance | a table | ||||
results$risk_groups | a table | ||||
results$cross_validation_results | a table | ||||
results$feature_screening_plot | an image | ||||
results$pca_biplot | an image | ||||
results$variance_explained_plot | an image | ||||
results$survival_curves_plot | an image | ||||
results$component_heatmap | an image | ||||
results$clinical_interpretation | a html |
Tables can be converted to data frames with asDF or as.data.frame. For example:
results$analysis_summary$asDF
as.data.frame(results$analysis_summary)
Examples
superpc(
data = data,
time = "time",
event = "event",
features = c("gene1", "gene2", "gene3"),
threshold = 0.1,
n_components = 5
)
#> Error in initialize(...): unused arguments (options = <environment>, data = function (..., list = character(), package = NULL, lib.loc = NULL, verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
#> {
#> fileExt <- function(x) {
#> db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
#> ans <- sub(".*\\.", "", x)
#> ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2", x[db])
#> ans
#> }
#> my_read_table <- function(...) {
#> lcc <- Sys.getlocale("LC_COLLATE")
#> on.exit(Sys.setlocale("LC_COLLATE", lcc))
#> Sys.setlocale("LC_COLLATE", "C")
#> read.table(...)
#> }
#> stopifnot(is.character(list))
#> names <- c(as.character(substitute(list(...))[-1]), list)
#> if (!is.null(package)) {
#> if (!is.character(package)) stop("'package' must be a character vector or NULL")
#> }
#> paths <- find.package(package, lib.loc, verbose = verbose)
#> if (is.null(lib.loc)) paths <- c(path.package(package, TRUE), if (!length(package)) getwd(), paths)
#> paths <- unique(normalizePath(paths[file.exists(paths)]))
#> paths <- paths[dir.exists(file.path(paths, "data"))]
#> dataExts <- tools:::.make_file_exts("data")
#> if (length(names) == 0) {
#> db <- matrix(character(), nrow = 0, ncol = 4)
#> for (path in paths) {
#> entries <- NULL
#> packageName <- if (file_test("-f", file.path(path, "DESCRIPTION"))) basename(path) else "."
#> if (file_test("-f", INDEX <- file.path(path, "Meta", "data.rds"))) {
#> entries <- readRDS(INDEX)
#> } else {
#> dataDir <- file.path(path, "data")
#> entries <- tools::list_files_with_type(dataDir, "data")
#> if (length(entries)) {
#> entries <- unique(tools::file_path_sans_ext(basename(entries)))
#> entries <- cbind(entries, "")
#> }
#> }
#> if (NROW(entries)) {
#> if (is.matrix(entries) && ncol(entries) == 2) db <- rbind(db, cbind(packageName, dirname(path), entries)) else warning(gettextf("data index for package %s is invalid and will be ignored", sQuote(packageName)), domain = NA, call. = FALSE)
#> }
#> }
#> colnames(db) <- c("Package", "LibPath", "Item", "Title")
#> footer <- if (missing(package)) paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")), "\n", "to list the data sets in all *available* packages.") else NULL
#> y <- list(title = "Data sets", header = NULL, results = db, footer = footer)
#> class(y) <- "packageIQR"
#> return(y)
#> }
#> paths <- file.path(paths, "data")
#> for (name in names) {
#> found <- FALSE
#> for (p in paths) {
#> tmp_env <- if (overwrite) envir else new.env()
#> if (file_test("-f", file.path(p, "Rdata.rds"))) {
#> rds <- readRDS(file.path(p, "Rdata.rds"))
#> if (name %in% names(rds)) {
#> found <- TRUE
#> if (verbose) message(sprintf("name=%s:\t found in Rdata.rds", name), domain = NA)
#> objs <- rds[[name]]
#> lazyLoad(file.path(p, "Rdata"), envir = tmp_env, filter = function(x) x %in% objs)
#> break
#> } else if (verbose) message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n", name, paste(names(rds), collapse = ",")), domain = NA)
#> }
#> files <- list.files(p, full.names = TRUE)
#> files <- files[grep(name, files, fixed = TRUE)]
#> if (length(files) > 1) {
#> o <- match(fileExt(files), dataExts, nomatch = 100)
#> paths0 <- dirname(files)
#> paths0 <- factor(paths0, levels = unique(paths0))
#> files <- files[order(paths0, o)]
#> }
#> if (length(files)) {
#> for (file in files) {
#> if (verbose) message("name=", name, ":\t file= ...", .Platform$file.sep, basename(file), "::\t", appendLF = FALSE, domain = NA)
#> ext <- fileExt(file)
#> if (basename(file) != paste0(name, ".", ext)) found <- FALSE else {
#> found <- TRUE
#> switch(ext, R = , r = {
#> library("utils")
#> sys.source(file, chdir = TRUE, envir = tmp_env)
#> }, RData = , rdata = , rda = load(file, envir = tmp_env), TXT = , txt = , tab = , tab.gz = , tab.bz2 = , tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name, my_read_table(file, header = TRUE, as.is = FALSE), envir = tmp_env), CSV = , csv = , csv.gz = , csv.bz2 = , csv.xz = assign(name, my_read_table(file, header = TRUE, sep = ";", as.is = FALSE), envir = tmp_env), found <- FALSE)
#> }
#> if (found) break
#> }
#> if (verbose) message(if (!found) "*NOT* ", "found", domain = NA)
#> }
#> if (found) break
#> }
#> if (!found) {
#> warning(gettextf("data set %s not found", sQuote(name)), domain = NA)
#> } else if (!overwrite) {
#> for (o in ls(envir = tmp_env, all.names = TRUE)) {
#> if (exists(o, envir = envir, inherits = FALSE)) warning(gettextf("an object named %s already exists and will not be overwritten", sQuote(o))) else assign(o, get(o, envir = tmp_env, inherits = FALSE), envir = envir)
#> }
#> rm(tmp_env)
#> }
#> }
#> invisible(names)
#> })