Skip to contents

Supervised Principal Components (SuperPC) for Cox regression with high-dimensional data. This method first screens features using univariate Cox regression, then applies PCA to selected features, and finally fits a Cox model using the principal components. Particularly effective for genomics data where many features are correlated.

Usage

superpc(
  data,
  time,
  event,
  features,
  threshold = 0.1,
  n_components = 5,
  cv_folds = 10,
  standardize = TRUE,
  screening_method = "univariate_cox",
  pca_method = "standard",
  validation_method = "cv",
  plot_screening = TRUE,
  plot_pca = TRUE,
  plot_survival = TRUE,
  export_components = FALSE
)

Arguments

data

the data as a data frame

time

survival time variable

event

event indicator (1=event, 0=censored)

features

high-dimensional feature variables (e.g., gene expression)

threshold

p-value threshold for initial feature screening

n_components

number of principal components to extract

cv_folds

number of folds for cross-validation

standardize

standardize features before analysis

screening_method

method for initial feature screening

pca_method

principal component analysis method

validation_method

model validation approach

plot_screening

display feature screening results

plot_pca

display principal component analysis plots

plot_survival

display survival curves by risk groups

export_components

export principal component scores to data

Value

A results object containing:

results$instructionsa html
results$analysis_summarya table
results$feature_screeninga table
results$principal_componentsa table
results$component_loadingsa table
results$model_performancea table
results$risk_groupsa table
results$cross_validation_resultsa table
results$feature_screening_plotan image
results$pca_biplotan image
results$variance_explained_plotan image
results$survival_curves_plotan image
results$component_heatmapan image
results$clinical_interpretationa html

Tables can be converted to data frames with asDF or as.data.frame. For example:

results$analysis_summary$asDF

as.data.frame(results$analysis_summary)

Examples

superpc(
    data = data,
    time = "time",
    event = "event",
    features = c("gene1", "gene2", "gene3"),
    threshold = 0.1,
    n_components = 5
)
#> Error in initialize(...): unused arguments (options = <environment>, data = function (..., list = character(), package = NULL, lib.loc = NULL, verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE) 
#> {
#>     fileExt <- function(x) {
#>         db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
#>         ans <- sub(".*\\.", "", x)
#>         ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2", x[db])
#>         ans
#>     }
#>     my_read_table <- function(...) {
#>         lcc <- Sys.getlocale("LC_COLLATE")
#>         on.exit(Sys.setlocale("LC_COLLATE", lcc))
#>         Sys.setlocale("LC_COLLATE", "C")
#>         read.table(...)
#>     }
#>     stopifnot(is.character(list))
#>     names <- c(as.character(substitute(list(...))[-1]), list)
#>     if (!is.null(package)) {
#>         if (!is.character(package)) stop("'package' must be a character vector or NULL")
#>     }
#>     paths <- find.package(package, lib.loc, verbose = verbose)
#>     if (is.null(lib.loc)) paths <- c(path.package(package, TRUE), if (!length(package)) getwd(), paths)
#>     paths <- unique(normalizePath(paths[file.exists(paths)]))
#>     paths <- paths[dir.exists(file.path(paths, "data"))]
#>     dataExts <- tools:::.make_file_exts("data")
#>     if (length(names) == 0) {
#>         db <- matrix(character(), nrow = 0, ncol = 4)
#>         for (path in paths) {
#>             entries <- NULL
#>             packageName <- if (file_test("-f", file.path(path, "DESCRIPTION"))) basename(path) else "."
#>             if (file_test("-f", INDEX <- file.path(path, "Meta", "data.rds"))) {
#>                 entries <- readRDS(INDEX)
#>             } else {
#>                 dataDir <- file.path(path, "data")
#>                 entries <- tools::list_files_with_type(dataDir, "data")
#>                 if (length(entries)) {
#>                   entries <- unique(tools::file_path_sans_ext(basename(entries)))
#>                   entries <- cbind(entries, "")
#>                 }
#>             }
#>             if (NROW(entries)) {
#>                 if (is.matrix(entries) && ncol(entries) == 2) db <- rbind(db, cbind(packageName, dirname(path), entries)) else warning(gettextf("data index for package %s is invalid and will be ignored", sQuote(packageName)), domain = NA, call. = FALSE)
#>             }
#>         }
#>         colnames(db) <- c("Package", "LibPath", "Item", "Title")
#>         footer <- if (missing(package)) paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")), "\n", "to list the data sets in all *available* packages.") else NULL
#>         y <- list(title = "Data sets", header = NULL, results = db, footer = footer)
#>         class(y) <- "packageIQR"
#>         return(y)
#>     }
#>     paths <- file.path(paths, "data")
#>     for (name in names) {
#>         found <- FALSE
#>         for (p in paths) {
#>             tmp_env <- if (overwrite) envir else new.env()
#>             if (file_test("-f", file.path(p, "Rdata.rds"))) {
#>                 rds <- readRDS(file.path(p, "Rdata.rds"))
#>                 if (name %in% names(rds)) {
#>                   found <- TRUE
#>                   if (verbose) message(sprintf("name=%s:\t found in Rdata.rds", name), domain = NA)
#>                   objs <- rds[[name]]
#>                   lazyLoad(file.path(p, "Rdata"), envir = tmp_env, filter = function(x) x %in% objs)
#>                   break
#>                 } else if (verbose) message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n", name, paste(names(rds), collapse = ",")), domain = NA)
#>             }
#>             files <- list.files(p, full.names = TRUE)
#>             files <- files[grep(name, files, fixed = TRUE)]
#>             if (length(files) > 1) {
#>                 o <- match(fileExt(files), dataExts, nomatch = 100)
#>                 paths0 <- dirname(files)
#>                 paths0 <- factor(paths0, levels = unique(paths0))
#>                 files <- files[order(paths0, o)]
#>             }
#>             if (length(files)) {
#>                 for (file in files) {
#>                   if (verbose) message("name=", name, ":\t file= ...", .Platform$file.sep, basename(file), "::\t", appendLF = FALSE, domain = NA)
#>                   ext <- fileExt(file)
#>                   if (basename(file) != paste0(name, ".", ext)) found <- FALSE else {
#>                     found <- TRUE
#>                     switch(ext, R = , r = {
#>                       library("utils")
#>                       sys.source(file, chdir = TRUE, envir = tmp_env)
#>                     }, RData = , rdata = , rda = load(file, envir = tmp_env), TXT = , txt = , tab = , tab.gz = , tab.bz2 = , tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name, my_read_table(file, header = TRUE, as.is = FALSE), envir = tmp_env), CSV = , csv = , csv.gz = , csv.bz2 = , csv.xz = assign(name, my_read_table(file, header = TRUE, sep = ";", as.is = FALSE), envir = tmp_env), found <- FALSE)
#>                   }
#>                   if (found) break
#>                 }
#>                 if (verbose) message(if (!found) "*NOT* ", "found", domain = NA)
#>             }
#>             if (found) break
#>         }
#>         if (!found) {
#>             warning(gettextf("data set %s not found", sQuote(name)), domain = NA)
#>         } else if (!overwrite) {
#>             for (o in ls(envir = tmp_env, all.names = TRUE)) {
#>                 if (exists(o, envir = envir, inherits = FALSE)) warning(gettextf("an object named %s already exists and will not be overwritten", sQuote(o))) else assign(o, get(o, envir = tmp_env, inherits = FALSE), envir = envir)
#>             }
#>             rm(tmp_env)
#>         }
#>     }
#>     invisible(names)
#> })