Chapter 3 Setting up GEOmetadb

GEOmetadb provides a SQLite mirror of GEO metadata that you can query from R. It mirrors GEO as-is (no curation). Download the current SQLite file once and reuse it.

3.1 Install packages

if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
if (!requireNamespace("GEOmetadb", quietly = TRUE))
  BiocManager::install("GEOmetadb")
if (!requireNamespace("DBI", quietly = TRUE))
  install.packages("DBI")
if (!requireNamespace("RSQLite", quietly = TRUE))
  install.packages("RSQLite")
if (!requireNamespace("dplyr", quietly = TRUE))
  install.packages("dplyr")
if (!requireNamespace("ggplot2", quietly = TRUE))
  install.packages("ggplot2")

3.2 Get the SQLite database

library(GEOmetadb)
if (!file.exists('GEOmetadb.sqlite')) getSQLiteFile()
## Unzipping...
## Metadata associate with downloaded file:
##                 name               value
## 1     schema version                 1.0
## 2 creation timestamp 2024-03-06 03:05:59
## [1] "/home/rstudio/projects/in_class_exercises/using_geometadb/GEOmetadb.sqlite"
file.info('GEOmetadb.sqlite')
##                         size isdir mode               mtime               ctime
## GEOmetadb.sqlite 19576775680 FALSE  644 2026-02-24 16:37:29 2026-02-24 16:37:29
##                                atime  uid  gid   uname  grname
## GEOmetadb.sqlite 2026-02-24 16:37:29 1000 1000 rstudio rstudio

3.3 Connect to the database

library(DBI)
library(RSQLite)
con <- dbConnect(SQLite(), 'GEOmetadb.sqlite')

3.4 Explore tables and fields

dbListTables(con)
##  [1] "gds"               "gds_subset"        "geoConvert"        "geodb_column_desc"
##  [5] "gpl"               "gse"               "gse_gpl"           "gse_gsm"          
##  [9] "gsm"               "metaInfo"          "sMatrix"
dbListFields(con, 'gse')
##  [1] "ID"                   "title"                "gse"                 
##  [4] "status"               "submission_date"      "last_update_date"    
##  [7] "pubmed_id"            "summary"              "type"                
## [10] "contributor"          "web_link"             "overall_design"      
## [13] "repeats"              "repeats_sample_list"  "variable"            
## [16] "variable_description" "contact"              "supplementary_file"