library("tidyverse")
library("tibble")
library("msigdbr")
library("ggplot2")
library("TCGAbiolinks")
library("RNAseqQC")
library("DESeq2")
library("ensembldb")
library("purrr")
library("magrittr")
library("vsn")
library("matrixStats")
library("dplyr")
library("grex")
library("survminer")
library("survival")
Create a function for downloading TCGA gene expression data.
For more detailed documentation, refer to
2. Differential Gene Expression Analysis - TCGA.Rmd
.
query_and_filter_samples <- function(project) {
query_tumor <- GDCquery(
project = project,
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
experimental.strategy = "RNA-Seq",
workflow.type = "STAR - Counts",
access = "open",
sample.type = "Primary Tumor"
)
tumor <- getResults(query_tumor)
query_normal <- GDCquery(
project = project,
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
experimental.strategy = "RNA-Seq",
workflow.type = "STAR - Counts",
access = "open",
sample.type = "Solid Tissue Normal"
)
normal <- getResults(query_normal)
submitter_ids <- inner_join(tumor, normal, by = "cases.submitter_id") %>%
dplyr::select(cases.submitter_id)
tumor <- tumor %>%
dplyr::filter(cases.submitter_id %in% submitter_ids$cases.submitter_id)
normal <- normal %>%
dplyr::filter(cases.submitter_id %in% submitter_ids$cases.submitter_id)
samples <- rbind(tumor, normal)
unique(samples$sample_type)
query_project <- GDCquery(
project = project,
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
experimental.strategy = "RNA-Seq",
workflow.type = "STAR - Counts",
access = "open",
sample.type = c("Solid Tissue Normal", "Primary Tumor"),
barcode = as.list(samples$sample.submitter_id)
)
# If this is your first time running this notebook (i.e., you have not yet downloaded the results of the query in the previous block),
# uncomment the line below
# GDCdownload(query_project)
return(list(samples = samples, query_project = query_project))
}
Download the TCGA gene expression data for colorectal cancer (TCGA-COAD).
projects <- c("TCGA-COAD")
with_results_projects <- c()
samples <- list()
project_data <- list()
for (project in projects) {
result <- tryCatch(
{
result <- query_and_filter_samples(project)
samples[[project]] <- result$samples
project_data[[project]] <- result$query_project
with_results_projects <- c(with_results_projects, project)
},
error = function(e) {
}
)
}
Running the code block above should generate and populate a directory
named GDCdata
.
Construct the RNA-seq count matrix for each cancer type.
tcga_data <- list()
tcga_matrix <- list()
projects <- with_results_projects
for (project in projects) {
tcga_data[[project]] <- GDCprepare(project_data[[project]], summarizedExperiment = TRUE)
}
for (project in projects) {
count_matrix <- assay(tcga_data[[project]], "unstranded")
# Remove duplicate entries
count_matrix_df <- data.frame(count_matrix)
count_matrix_df <- count_matrix_df[!duplicated(count_matrix_df), ]
count_matrix <- data.matrix(count_matrix_df)
rownames(count_matrix) <- cleanid(rownames(count_matrix))
count_matrix <- count_matrix[!(duplicated(rownames(count_matrix)) | duplicated(rownames(count_matrix), fromLast = TRUE)), ]
tcga_matrix[[project]] <- count_matrix
}
Format the samples
table so that it can be fed as input
to DESeq2.
for (project in projects) {
rownames(samples[[project]]) <- samples[[project]]$cases
samples[[project]] <- samples[[project]] %>%
dplyr::select(case = "cases.submitter_id", type = "sample_type")
samples[[project]]$type <- str_replace(samples[[project]]$type, "Solid Tissue Normal", "normal")
samples[[project]]$type <- str_replace(samples[[project]]$type, "Primary Tumor", "tumor")
}
DESeq2 requires the row names of samples
should be
identical to the column names of count_matrix
.
for (project in projects) {
colnames(tcga_matrix[[project]]) <- gsub(x = colnames(tcga_matrix[[project]]), pattern = "\\.", replacement = "-")
tcga_matrix[[project]] <- tcga_matrix[[project]][, rownames(samples[[project]])]
# Sanity check
print(all(colnames(tcga_matrix[[project]]) == rownames(samples[[project]])))
}
For more detailed documentation on obtaining the gene set, refer to
7. Differential Gene Expression Analysis - TCGA - Pan-cancer - Unique Genes.Rmd
.
RCDdb <- "temp/unique_genes/necroptosis_ferroptosis_pyroptosis/"
Write utility functions for filtering the gene sets, performing differential gene expression analysis, plotting the results, and performing variance-stabilizing transformation.
filter_gene_set_and_perform_dgea <- function(genes) {
tcga_rcd <- list()
for (project in projects) {
rownames(genes) <- genes$gene_id
tcga_rcd[[project]] <- tcga_matrix[[project]][rownames(tcga_matrix[[project]]) %in% genes$gene_id, ]
tcga_rcd[[project]] <- tcga_rcd[[project]][, rownames(samples[[project]])]
}
dds_rcd <- list()
res_rcd <- list()
for (project in projects) {
print(project)
print("=============")
dds <- DESeqDataSetFromMatrix(
countData = tcga_rcd[[project]],
colData = samples[[project]],
design = ~type
)
dds <- filter_genes(dds, min_count = 10)
dds$type <- relevel(dds$type, ref = "normal")
dds_rcd[[project]] <- DESeq(dds)
res_rcd[[project]] <- results(dds_rcd[[project]])
}
deseq.bbl.data <- list()
for (project in projects) {
deseq.results <- res_rcd[[project]]
deseq.bbl.data[[project]] <- data.frame(
row.names = rownames(deseq.results),
baseMean = deseq.results$baseMean,
log2FoldChange = deseq.results$log2FoldChange,
lfcSE = deseq.results$lfcSE,
stat = deseq.results$stat,
pvalue = deseq.results$pvalue,
padj = deseq.results$padj,
cancer_type = project,
gene_symbol = genes[rownames(deseq.results), "gene"]
)
}
deseq.bbl.data.combined <- bind_rows(deseq.bbl.data)
deseq.bbl.data.combined <- dplyr::filter(deseq.bbl.data.combined, abs(log2FoldChange) >= 1.5 & padj < 0.05)
return(deseq.bbl.data.combined)
}
plot_dgea <- function(deseq.bbl.data.combined) {
sizes <- c("<10^-15" = 4, "10^-10" = 3, "10^-5" = 2, "0.05" = 1)
deseq.bbl.data.combined <- deseq.bbl.data.combined %>%
mutate(fdr_category = cut(padj,
breaks = c(-Inf, 1e-15, 1e-10, 1e-5, 0.05),
labels = c("<10^-15", "10^-10", "10^-5", "0.05"),
right = FALSE
))
top_genes <- deseq.bbl.data.combined %>%
group_by(cancer_type) %>%
mutate(rank = rank(-abs(log2FoldChange))) %>%
dplyr::filter(rank <= 10) %>%
ungroup()
ggplot(top_genes, aes(y = cancer_type, x = gene_symbol, size = fdr_category, fill = log2FoldChange)) +
geom_point(alpha = 0.5, shape = 21, color = "black") +
scale_size_manual(values = sizes) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", limits = c(min(deseq.bbl.data.combined$log2FoldChange), max(deseq.bbl.data.combined$log2FoldChange))) +
theme_minimal() +
theme(
axis.text.x = element_text(size = 9, angle = 90, hjust = 1)
) +
theme(legend.position = "bottom") +
theme(legend.position = "bottom") +
labs(size = "Adjusted p-value", fill = "log2 FC", y = "Cancer type", x = "Gene")
}
perform_vsd <- function(genes) {
tcga_rcd <- list()
for (project in projects) {
rownames(genes) <- genes$gene_id
tcga_rcd[[project]] <- tcga_matrix[[project]][rownames(tcga_matrix[[project]]) %in% genes$gene_id, ]
tcga_rcd[[project]] <- tcga_rcd[[project]][, rownames(samples[[project]])]
}
vsd_rcd <- list()
for (project in projects) {
print(project)
print("=============")
dds <- DESeqDataSetFromMatrix(
countData = tcga_rcd[[project]],
colData = samples[[project]],
design = ~type
)
dds <- filter_genes(dds, min_count = 10)
# Perform variance stabilization
dds <- estimateSizeFactors(dds)
nsub <- sum(rowMeans(counts(dds, normalized = TRUE)) > 10)
vsd <- vst(dds, nsub = nsub)
vsd_rcd[[project]] <- assay(vsd)
}
return(vsd_rcd)
}
Fetch the gene set of interest.
genes <- read.csv(paste0(RCDdb, "Pyroptosis.csv"))
print(genes)
genes$gene_id <- cleanid(genes$gene_id)
genes <- distinct(genes, gene_id, .keep_all = TRUE)
genes <- subset(genes, gene_id != "")
genes
Filter the genes to include only those in the gene set of interest, and then perform differential gene expression analysis.
deseq.bbl.data.combined <- filter_gene_set_and_perform_dgea(genes)
[1] "TCGA-COAD"
[1] "============="
Warning: some variables in design formula are characters, converting to factorsestimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 3 genes
-- DESeq argument 'minReplicatesForReplace' = 7
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
deseq.bbl.data.combined
Plot the results.
plot_dgea(deseq.bbl.data.combined)
Perform variance-stabilizing transformation for further downstream analysis (i.e., for survival analysis).
vsd <- perform_vsd(genes)
[1] "TCGA-COAD"
[1] "============="
Download clinical data from TCGA, and perform some preprocessing: -
The deceased
column should be FALSE
if the
patient is alive and TRUE
otherwise - The
overall_survival
column should reflect the follow-up time
if the patient is alive and the days to death otherwise
download_clinical_data <- function(project) {
clinical_data <- GDCquery_clinic(project)
clinical_data$deceased <- ifelse(clinical_data$vital_status == "Alive", FALSE, TRUE)
clinical_data$overall_survival <- ifelse(clinical_data$vital_status == "Alive",
clinical_data$days_to_last_follow_up,
clinical_data$days_to_death
)
return(clinical_data)
}
tcga_clinical <- list()
for (project in projects) {
tcga_clinical[[project]] <- download_clinical_data(project)
}
Write utility functions for performing survival analysis.
construct_gene_df <- function(gene_of_interest, project) {
gene_df <- vsd[[project]] %>%
as.data.frame() %>%
rownames_to_column(var = "gene_id") %>%
gather(key = "case_id", value = "counts", -gene_id) %>%
left_join(., genes, by = "gene_id") %>%
dplyr::filter(gene == gene_of_interest) %>%
dplyr::filter(case_id %in% rownames(samples[[project]] %>% dplyr::filter(type == "normal")))
q1 <- quantile(gene_df$counts, probs = 0.25)
q3 <- quantile(gene_df$counts, probs = 0.75)
gene_df$strata <- ifelse(gene_df$counts >= q3, "HIGH", ifelse(gene_df$counts <= q1, "LOW", "MIDDLE"))
gene_df <- gene_df %>% dplyr::filter(strata %in% c("LOW", "HIGH"))
gene_df$case_id <- paste0(sapply(strsplit(as.character(gene_df$case_id), "-"), `[`, 1), '-',
sapply(strsplit(as.character(gene_df$case_id), "-"), `[`, 2), '-',
sapply(strsplit(as.character(gene_df$case_id), "-"), `[`, 3))
gene_df <- merge(gene_df, tcga_clinical[[project]], by.x = "case_id", by.y = "submitter_id")
return(gene_df)
}
compute_surival_fit <- function(gene_df) {
return (survfit(Surv(overall_survival, deceased) ~ strata, data = gene_df))
}
compute_cox <- function(gene_df) {
return (coxph(Surv(overall_survival, deceased) ~ strata, data=gene_df))
}
plot_survival <- function(fit) {
return(ggsurvplot(fit,
data = gene_df,
pval = T,
risk.table = T,
risk.table.height = 0.3
))
}
compute_survival_diff <- function(gene_df) {
return(survdiff(Surv(overall_survival, deceased) ~ strata, data = gene_df))
}
Perform survival analysis by testing for the difference in the Kaplan-Meier curves using the G-rho family of Harrington and Fleming tests: https://rdrr.io/cran/survival/man/survdiff.html
Our genes of interest are GSDMD (the primary executor of pyroptosis) and the differentially expressed genes.
significant_projects <- c()
significant_genes <- c()
ctr <- 1
for (project in projects) {
for (gene in c("GSDMD", genes$gene)) {
cat(project, gene, "\n\n")
error <- tryCatch (
{
gene_df <- construct_gene_df(gene, project)
},
error = function(e) {
cat("\n\n============================\n\n")
e
}
)
if(inherits(error, "error")) next
if (nrow(gene_df) > 0) {
fit <- compute_surival_fit(gene_df)
tryCatch (
{
survival <- compute_survival_diff(gene_df)
cox <- compute_cox(gene_df)
print(ctr)
ctr <- ctr + 1
print(survival)
cat("\n")
print(cox)
print(plot_survival(fit))
if (pchisq(survival$chisq, length(survival$n)-1, lower.tail = FALSE) < 0.05) {
significant_projects <- c(significant_projects, project)
significant_genes <- c(significant_genes, gene)
}
},
error = function(e) {
}
)
}
cat("\n\n============================\n\n")
}
}
TCGA-COAD GSDMD
[1] 1
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.4 0.152 0.403
strata=LOW 11 2 2.6 0.140 0.403
Chisq= 0.4 on 1 degrees of freedom, p= 0.5
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.7327 0.4806 1.1773 -0.622 0.534
Likelihood ratio test=0.43 on 1 df, p=0.5139
n= 22, number of events= 5
============================
TCGA-COAD CHMP7
[1] 2
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 4 2.73 0.596 0.984
strata=LOW 16 3 4.27 0.380 0.984
Chisq= 1 on 1 degrees of freedom, p= 0.3
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.7437 0.4754 0.7668 -0.97 0.332
Likelihood ratio test=0.95 on 1 df, p=0.3291
n= 27, number of events= 7
============================
TCGA-COAD GSDMC
[1] 3
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 4 4.04 0.000366 0.00148
strata=LOW 11 2 1.96 0.000754 0.00148
Chisq= 0 on 1 degrees of freedom, p= 1
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.03851 1.03926 1.00056 0.038 0.969
Likelihood ratio test=0 on 1 df, p=0.9693
n= 22, number of events= 6
============================
TCGA-COAD ELANE
[1] 4
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 5 3.34 0.824 1.59
strata=LOW 11 3 4.66 0.591 1.59
Chisq= 1.6 on 1 degrees of freedom, p= 0.2
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -1.0177 0.3614 0.8404 -1.211 0.226
Likelihood ratio test=1.63 on 1 df, p=0.2013
n= 22, number of events= 8
============================
TCGA-COAD IRF1
[1] 5
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 3.14 0.00627 0.018
strata=LOW 11 2 1.86 0.01058 0.018
Chisq= 0 on 1 degrees of freedom, p= 0.9
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.1270 1.1355 0.9463 0.134 0.893
Likelihood ratio test=0.02 on 1 df, p=0.8936
n= 22, number of events= 5
============================
TCGA-COAD CYCS
[1] 6
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 1 3.7 1.97 5.2
strata=LOW 11 5 2.3 3.15 5.2
Chisq= 5.2 on 1 degrees of freedom, p= 0.02
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 2.112 8.267 1.101 1.919 0.055
Likelihood ratio test=5.21 on 1 df, p=0.02242
n= 22, number of events= 6
============================
TCGA-COAD GSDMA
[1] 7
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 3.1 0.00327 0.00685
strata=LOW 11 5 4.9 0.00207 0.00685
Chisq= 0 on 1 degrees of freedom, p= 0.9
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.06796 1.07032 0.82132 0.083 0.934
Likelihood ratio test=0.01 on 1 df, p=0.9341
n= 22, number of events= 8
============================
TCGA-COAD CASP4
[1] 8
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 2.7 0.183 0.336
strata=LOW 11 5 4.3 0.115 0.336
Chisq= 0.3 on 1 degrees of freedom, p= 0.6
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.4982 1.6458 0.8683 0.574 0.566
Likelihood ratio test=0.34 on 1 df, p=0.5572
n= 22, number of events= 7
============================
TCGA-COAD BAK1
[1] 9
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 4.21 1.16 2.98
strata=LOW 11 5 2.79 1.75 2.98
Chisq= 3 on 1 degrees of freedom, p= 0.08
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 1.358 3.890 0.845 1.607 0.108
Likelihood ratio test=2.92 on 1 df, p=0.08733
n= 22, number of events= 7
============================
TCGA-COAD NOD1
[1] 10
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 4 2.94 0.382 0.762
strata=LOW 11 2 3.06 0.367 0.762
Chisq= 0.8 on 1 degrees of freedom, p= 0.4
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.7457 0.4744 0.8730 -0.854 0.393
Likelihood ratio test=0.78 on 1 df, p=0.3786
n= 22, number of events= 6
============================
TCGA-COAD NLRP7
[1] 11
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 2.62 0.145 0.26
strata=LOW 11 4 3.38 0.112 0.26
Chisq= 0.3 on 1 degrees of freedom, p= 0.6
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.4409 1.5541 0.8713 0.506 0.613
Likelihood ratio test=0.27 on 1 df, p=0.6055
n= 22, number of events= 6
============================
TCGA-COAD CASP3
[1] 12
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 3.7 0.779 1.66
strata=LOW 11 5 3.3 0.871 1.66
Chisq= 1.7 on 1 degrees of freedom, p= 0.2
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 1.0334 2.8105 0.8383 1.233 0.218
Likelihood ratio test=1.7 on 1 df, p=0.1929
n= 22, number of events= 7
============================
TCGA-COAD GSDMB
[1] 13
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 2.071 0.00246 0.0103
strata=LOW 11 1 0.929 0.00549 0.0103
Chisq= 0 on 1 degrees of freedom, p= 0.9
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.1438 1.1547 1.4179 0.101 0.919
Likelihood ratio test=0.01 on 1 df, p=0.9192
n= 22, number of events= 3
============================
TCGA-COAD GZMB
[1] 14
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 3.25 0.0193 0.0428
strata=LOW 11 4 3.75 0.0168 0.0428
Chisq= 0 on 1 degrees of freedom, p= 0.8
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.1698 1.1851 0.8220 0.207 0.836
Likelihood ratio test=0.04 on 1 df, p=0.8364
n= 22, number of events= 7
============================
TCGA-COAD GSDME
[1] 15
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 4 3.7 0.0244 0.0764
strata=LOW 11 2 2.3 0.0392 0.0764
Chisq= 0.1 on 1 degrees of freedom, p= 0.8
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.2576 0.7729 0.9341 -0.276 0.783
Likelihood ratio test=0.08 on 1 df, p=0.7814
n= 22, number of events= 6
============================
TCGA-COAD CHMP3
[1] 16
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.79 0.0155 0.0259
strata=LOW 11 4 4.21 0.0103 0.0259
Chisq= 0 on 1 degrees of freedom, p= 0.9
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.1233 0.8840 0.7663 -0.161 0.872
Likelihood ratio test=0.03 on 1 df, p=0.8726
n= 22, number of events= 7
============================
TCGA-COAD DPP9
[1] 17
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 6 4.61 0.418 1.11
strata=LOW 11 2 3.39 0.569 1.11
Chisq= 1.1 on 1 degrees of freedom, p= 0.3
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.8568 0.4245 0.8384 -1.022 0.307
Likelihood ratio test=1.15 on 1 df, p=0.2835
n= 22, number of events= 8
============================
TCGA-COAD NOD2
[1] 18
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.52 0.0928 0.162
strata=LOW 11 3 3.48 0.0670 0.162
Chisq= 0.2 on 1 degrees of freedom, p= 0.7
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.3296 0.7192 0.8223 -0.401 0.689
Likelihood ratio test=0.16 on 1 df, p=0.6892
n= 22, number of events= 6
============================
TCGA-COAD NLRC4
[1] 19
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.01 0.486 0.758
strata=LOW 11 4 4.99 0.196 0.758
Chisq= 0.8 on 1 degrees of freedom, p= 0.4
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.7124 0.4905 0.8335 -0.855 0.393
Likelihood ratio test=0.72 on 1 df, p=0.397
n= 22, number of events= 7
============================
TCGA-COAD GSDMD
[1] 20
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.4 0.152 0.403
strata=LOW 11 2 2.6 0.140 0.403
Chisq= 0.4 on 1 degrees of freedom, p= 0.5
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.7327 0.4806 1.1773 -0.622 0.534
Likelihood ratio test=0.43 on 1 df, p=0.5139
n= 22, number of events= 5
============================
TCGA-COAD TIRAP
[1] 21
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 1.95 0.001517 0.00251
strata=LOW 11 3 3.05 0.000966 0.00251
Chisq= 0 on 1 degrees of freedom, p= 1
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.0459 0.9551 0.9171 -0.05 0.96
Likelihood ratio test=0 on 1 df, p=0.9601
n= 22, number of events= 5
============================
TCGA-COAD SCAF11
[1] 22
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 4 3.29 0.151 0.377
strata=LOW 11 3 3.71 0.134 0.377
Chisq= 0.4 on 1 degrees of freedom, p= 0.5
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.5182 0.5956 0.8517 -0.608 0.543
Likelihood ratio test=0.37 on 1 df, p=0.544
n= 22, number of events= 7
============================
TCGA-COAD NLRP6
[1] 23
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 3.19 0.01184 0.0225
strata=LOW 11 5 4.81 0.00787 0.0225
Chisq= 0 on 1 degrees of freedom, p= 0.9
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.1164 1.1234 0.7759 0.15 0.881
Likelihood ratio test=0.02 on 1 df, p=0.8805
n= 22, number of events= 8
============================
TCGA-COAD AIM2
[1] 24
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 4.3 0.394 1
strata=LOW 11 5 3.7 0.458 1
Chisq= 1 on 1 degrees of freedom, p= 0.3
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.8305 2.2945 0.8504 0.977 0.329
Likelihood ratio test=1.05 on 1 df, p=0.3066
n= 22, number of events= 8
============================
TCGA-COAD CASP6
[1] 25
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 4.06 1.04 2.5
strata=LOW 11 6 3.94 1.08 2.5
Chisq= 2.5 on 1 degrees of freedom, p= 0.1
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 1.2475 3.4817 0.8393 1.486 0.137
Likelihood ratio test=2.49 on 1 df, p=0.1143
n= 22, number of events= 8
============================
TCGA-COAD NLRP2
[1] 26
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 3.89 0.917 2.09
strata=LOW 11 5 3.11 1.145 2.09
Chisq= 2.1 on 1 degrees of freedom, p= 0.1
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 1.1538 3.1704 0.8415 1.371 0.17
Likelihood ratio test=2.11 on 1 df, p=0.1463
n= 22, number of events= 7
============================
TCGA-COAD IRF2
[1] 27
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.33 0.1918 0.315
strata=LOW 11 5 5.67 0.0789 0.315
Chisq= 0.3 on 1 degrees of freedom, p= 0.6
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.4560 0.6338 0.8189 -0.557 0.578
Likelihood ratio test=0.31 on 1 df, p=0.5792
n= 22, number of events= 8
============================
TCGA-COAD PJVK
[1] 28
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 2 2.46 0.0868 0.149
strata=LOW 11 4 3.54 0.0604 0.149
Chisq= 0.1 on 1 degrees of freedom, p= 0.7
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.3355 1.3987 0.8719 0.385 0.7
Likelihood ratio test=0.15 on 1 df, p=0.6957
n= 22, number of events= 6
============================
TCGA-COAD CASP5
[1] 29
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.42 0.136 0.352
strata=LOW 11 1 1.58 0.210 0.352
Chisq= 0.4 on 1 degrees of freedom, p= 0.6
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.6780 0.5076 1.1631 -0.583 0.56
Likelihood ratio test=0.37 on 1 df, p=0.5406
n= 22, number of events= 4
============================
TCGA-COAD NLRP1
[1] 30
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 3.69 0.131 0.329
strata=LOW 11 4 3.31 0.146 0.329
Chisq= 0.3 on 1 degrees of freedom, p= 0.6
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.4944 1.6395 0.8707 0.568 0.57
Likelihood ratio test=0.34 on 1 df, p=0.5614
n= 22, number of events= 7
============================
TCGA-COAD CASP9
[1] 31
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 5 3.51 0.628 1.17
strata=LOW 11 4 5.49 0.402 1.17
Chisq= 1.2 on 1 degrees of freedom, p= 0.3
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.7944 0.4519 0.7508 -1.058 0.29
Likelihood ratio test=1.17 on 1 df, p=0.2798
n= 22, number of events= 9
============================
TCGA-COAD PLCG1
[1] 32
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 4 4.63 0.0861 0.233
strata=LOW 11 4 3.37 0.1184 0.233
Chisq= 0.2 on 1 degrees of freedom, p= 0.6
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 0.3705 1.4484 0.7717 0.48 0.631
Likelihood ratio test=0.23 on 1 df, p=0.6291
n= 22, number of events= 8
============================
TCGA-COAD IL18
[1] 33
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 3 2.66 0.0432 0.0971
strata=LOW 11 3 3.34 0.0344 0.0971
Chisq= 0.1 on 1 degrees of freedom, p= 0.8
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.2899 0.7483 0.9335 -0.311 0.756
Likelihood ratio test=0.1 on 1 df, p=0.7546
n= 22, number of events= 6
============================
TCGA-COAD DPP8
[1] 34
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 4 3.76 0.0147 0.0372
strata=LOW 11 3 3.24 0.0171 0.0372
Chisq= 0 on 1 degrees of freedom, p= 0.8
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW -0.1574 0.8543 0.8174 -0.193 0.847
Likelihood ratio test=0.04 on 1 df, p=0.8474
n= 22, number of events= 7
============================
Display the results only for genes where a significant difference in survival has been reported.
significant_genes
[1] "CYCS"
num_significant_genes <- length(significant_genes)
if (num_significant_genes > 0) {
for (i in 1 : num_significant_genes) {
project <- significant_projects[[i]]
gene <- significant_genes[[i]]
cat(project, gene, "\n\n")
gene_df <- construct_gene_df(gene, project)
fit <- compute_surival_fit(gene_df)
survival <- compute_survival_diff(gene_df)
cox <- compute_cox(gene_df)
print(survival)
cat("\n")
print(cox)
print(plot_survival(fit))
cat("\n\n============================\n\n")
}
}
TCGA-COAD CYCS
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata,
data = gene_df)
N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11 1 3.7 1.97 5.2
strata=LOW 11 5 2.3 3.15 5.2
Chisq= 5.2 on 1 degrees of freedom, p= 0.02
Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)
coef exp(coef) se(coef) z p
strataLOW 2.112 8.267 1.101 1.919 0.055
Likelihood ratio test=5.21 on 1 df, p=0.02242
n= 22, number of events= 6
============================
De La Salle University, Manila, Philippines, gonzales.markedward@gmail.com↩︎
De La Salle University, Manila, Philippines, anish.shrestha@dlsu.edu.ph↩︎