Chapter 10 Differential expression with edgeR (QLF) - Dataset 2

In this chapter we run edgeR using the recommended quasi-likelihood pipeline:

  1. Start from raw counts in a DGEList
  2. Estimate dispersions
  3. Fit a QL GLM
  4. Perform QL F-tests for contrasts of interest

We use the same additive design:

\[ \text{counts} \sim \text{status} + \text{patient} \]

obj <- readRDS("data/d2_qc_objects.rds")
dge_f <- obj$dge_f
meta <- obj$meta
mappings <- obj$mappings

design <- obj$design

# Contrasts
contr <- limma::makeContrasts(
  `status0vs1` = status0 - (status1),
  levels = design
)

# Estimate dispersion and fit QL model
edge <- edgeR::estimateDisp(dge_f, design)
fit <- edgeR::glmQLFit(edge, design)

10.1 Plot Biological coefficient of variance plot

plotBCV(edge)

10.2 Test: status 0 vs status 1

qlf_0_vs_1 <- edgeR::glmQLFTest(fit, contrast = contr[, "status0vs1"])

edgeR::topTags(qlf_0_vs_1)
## Coefficient:  1*status0 -1*status1 
##                         logFC   logCPM        F       PValue        FDR
## ENSG00000240429.1  -1.1832705 3.671905 29.34794 1.145213e-05 0.06015600
## ENSG00000138829.11 -1.9731998 3.991667 34.33254 1.308962e-05 0.06015600
## ENSG00000221867.8   3.1695213 3.122014 78.27375 1.742877e-05 0.06015600
## ENSG00000196604.12 -1.6168936 3.867533 28.54729 2.170176e-05 0.06015600
## ENSG00000151729.10  0.9484458 3.946041 27.69842 2.306418e-05 0.06015600
## ENSG00000157429.15 -0.7927040 3.495731 22.82475 3.820839e-05 0.08271539
## ENSG00000162998.4  -1.5853343 3.385425 26.06719 4.439903e-05 0.08271539
## ENSG00000240382.3   4.4279973 5.238228 31.24078 7.216175e-05 0.10844245
## ENSG00000013297.11  1.7036739 3.566455 22.57187 7.483951e-05 0.10844245
## ENSG00000211956.2   3.4714536 4.407648 24.55495 1.140482e-04 0.14168075

10.3 Extract full results

tab_0_vs_1 <- edgeR::topTags(qlf_0_vs_1, n = Inf)$table %>% rownames_to_column("ensembl")


head(tab_0_vs_1)
##              ensembl      logFC   logCPM        F       PValue        FDR
## 1  ENSG00000240429.1 -1.1832705 3.671905 29.34794 1.145213e-05 0.06015600
## 2 ENSG00000138829.11 -1.9731998 3.991667 34.33254 1.308962e-05 0.06015600
## 3  ENSG00000221867.8  3.1695213 3.122014 78.27375 1.742877e-05 0.06015600
## 4 ENSG00000196604.12 -1.6168936 3.867533 28.54729 2.170176e-05 0.06015600
## 5 ENSG00000151729.10  0.9484458 3.946041 27.69842 2.306418e-05 0.06015600
## 6 ENSG00000157429.15 -0.7927040 3.495731 22.82475 3.820839e-05 0.08271539

10.4 Volcano plot of EdgeR results

volcano_df <- tab_0_vs_1 %>%
  mutate(sig = FDR < 0.05)

ggplot(volcano_df, aes(x = logFC, y = -log10(PValue), color = sig)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  labs(
    title = "EdgeR: 0 vs 1",
    x = "log2 fold-change",
    y = "-log10(p-value)"
  )

10.5 Try and run glmFit

10.6 Test: status 0 vs status 1

glmfit_0_vs_1 <- edgeR::glmFit(edge, design)
glmtest_0_vs_1 <- edgeR::glmLRT(fit, contrast = contr[, "status0vs1"])

edgeR::topTags(glmtest_0_vs_1)
## Coefficient:  1*status0 -1*status1 
##                        logFC    logCPM       LR       PValue          FDR
## ENSG00000240382.3   4.427997  5.238228 56.76214 4.918460e-14 6.414164e-10
## ENSG00000211947.2   3.587843  5.992715 55.36579 1.000628e-13 6.524593e-10
## ENSG00000211663.2   3.466166  4.997326 49.14025 2.382995e-12 1.035888e-08
## ENSG00000224650.2   3.397552  4.693907 39.47905 3.315983e-10 9.979335e-07
## ENSG00000211956.2   3.471454  4.407648 39.19962 3.826139e-10 9.979335e-07
## ENSG00000283029.1  -1.902125 16.790777 37.62136 8.589875e-10 1.867009e-06
## ENSG00000243290.3   3.048243  4.133752 36.58053 1.464875e-09 2.729062e-06
## ENSG00000276775.1   2.787666  5.156642 36.04396 1.929157e-09 3.144766e-06
## ENSG00000112936.18  2.768605  5.273166 35.43942 2.631083e-09 3.812439e-06
## ENSG00000102837.6  -4.896848  4.754747 34.84953 3.561951e-09 4.645140e-06
glm_tab_0_vs_1 <- edgeR::topTags(glmtest_0_vs_1, n = Inf)$table %>% rownames_to_column("ensembl")

tab_glm <- glm_tab_0_vs_1 %>%
  rownames_to_column("gene") %>%
  mutate(
    neglog10P = -log10(PValue),
    direction = case_when(
      FDR < 0.05 & logFC > 0 ~ "Up",
      FDR < 0.05 & logFC < 0 ~ "Down",
      TRUE ~ "Not sig"
    )
  )

ggplot(tab, aes(x = logFC, y = neglog10P)) +
  geom_point(aes(color = direction), alpha = 0.7, size = 1.2) +
  scale_color_manual(values = c(
    "Up" = "red",
    "Down" = "blue",
    "Not sig" = "grey70"
  )) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "steelblue") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "steelblue") +
  theme_minimal() +
  labs(
    title = "edgeR volcano: 0 vs rest",
    x = "log2 fold change (logFC)",
    y = "-log10(p-value)",
    color = NULL
  )

10.7 Save edgeR results

saveRDS(
  list(
    meta = meta,
    mappings = mappings,
    fit = fit,
    qlf_0_vs_1  = qlf_0_vs_1 ,
    glm_tab_0_vs_1  = glm_tab_0_vs_1 
  ),
  file = "data/d2_edger_results.rds"
)