Chapter 3 Setting up GEOmetadb

GEOmetadb provides a SQLite mirror of GEO metadata that you can query from R. It mirrors GEO as-is (no curation). Download the current SQLite file once and reuse it.

3.1 Install packages

if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
if (!requireNamespace("GEOmetadb", quietly = TRUE))
  BiocManager::install("GEOmetadb")
if (!requireNamespace("DBI", quietly = TRUE))
  install.packages("DBI")
if (!requireNamespace("RSQLite", quietly = TRUE))
  install.packages("RSQLite")
if (!requireNamespace("dplyr", quietly = TRUE))
  install.packages("dplyr")
if (!requireNamespace("ggplot2", quietly = TRUE))
  install.packages("ggplot2")

3.2 Get the SQLite database

library(GEOmetadb)
if (!file.exists('GEOmetadb.sqlite')) getSQLiteFile()
file.info('GEOmetadb.sqlite')
##                         size isdir mode               mtime               ctime               atime  uid  gid   uname
## GEOmetadb.sqlite 19576775680 FALSE  644 2026-01-18 01:34:48 2026-01-19 16:25:09 2026-01-19 16:28:41 1000 1000 rstudio
##                   grname
## GEOmetadb.sqlite rstudio

3.3 Connect to the database

library(DBI)
library(RSQLite)
con <- dbConnect(SQLite(), 'GEOmetadb.sqlite')

3.4 Explore tables and fields

dbListTables(con)
##  [1] "gds"               "gds_subset"        "geoConvert"        "geodb_column_desc" "gpl"              
##  [6] "gse"               "gse_gpl"           "gse_gsm"           "gsm"               "metaInfo"         
## [11] "sMatrix"
dbListFields(con, 'gse')
##  [1] "ID"                   "title"                "gse"                  "status"               "submission_date"     
##  [6] "last_update_date"     "pubmed_id"            "summary"              "type"                 "contributor"         
## [11] "web_link"             "overall_design"       "repeats"              "repeats_sample_list"  "variable"            
## [16] "variable_description" "contact"              "supplementary_file"