Chapter 3 Setting up GEOmetadb

GEOmetadb provides a SQLite mirror of GEO metadata that you can query from R. It mirrors GEO as-is (no curation). Download the current SQLite file once and reuse it.

3.1 Install packages

if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
if (!requireNamespace("GEOmetadb", quietly = TRUE))
  BiocManager::install("GEOmetadb")
if (!requireNamespace("DBI", quietly = TRUE))
  install.packages("DBI")
if (!requireNamespace("RSQLite", quietly = TRUE))
  install.packages("RSQLite")
if (!requireNamespace("dplyr", quietly = TRUE))
  install.packages("dplyr")
if (!requireNamespace("ggplot2", quietly = TRUE))
  install.packages("ggplot2")

3.2 Get the SQLite database

library(GEOmetadb)
## Loading required package: R.utils
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.27.1 (2025-05-02 21:00:05 UTC) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
## 
##     throw
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, load, save
## R.utils v2.13.0 (2025-02-24 21:20:02 UTC) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, isOpen, nullfile, parse, use, warnings
## Loading required package: RSQLite
if (!file.exists('GEOmetadb.sqlite')) getSQLiteFile()
## Unzipping...
## Metadata associate with downloaded file:
##                 name               value
## 1     schema version                 1.0
## 2 creation timestamp 2024-03-06 03:05:59
## [1] "/home/rstudio/projects/in_class_exercises/using_geometadb/GEOmetadb.sqlite"
file.info('GEOmetadb.sqlite')
##                         size isdir mode               mtime               ctime               atime  uid  gid   uname
## GEOmetadb.sqlite 19576775680 FALSE  644 2026-01-19 18:18:44 2026-01-19 18:18:45 2026-01-19 18:18:45 1000 1000 rstudio
##                   grname
## GEOmetadb.sqlite rstudio

3.3 Connect to the database

library(DBI)
library(RSQLite)
con <- dbConnect(SQLite(), 'GEOmetadb.sqlite')

3.4 Explore tables and fields

dbListTables(con)
##  [1] "gds"               "gds_subset"        "geoConvert"        "geodb_column_desc" "gpl"              
##  [6] "gse"               "gse_gpl"           "gse_gsm"           "gsm"               "metaInfo"         
## [11] "sMatrix"
dbListFields(con, 'gse')
##  [1] "ID"                   "title"                "gse"                  "status"               "submission_date"     
##  [6] "last_update_date"     "pubmed_id"            "summary"              "type"                 "contributor"         
## [11] "web_link"             "overall_design"       "repeats"              "repeats_sample_list"  "variable"            
## [16] "variable_description" "contact"              "supplementary_file"