Chapter 3 Data Processing

Outline of this Chapter:

3.1 Loading Packages

library(dplyr)
library(tibble)
library(phyloseq)
library(SummarizedExperiment)
library(MicrobiomeAnalysis)

3.2 Importing Data

  • Converting the output of dada2 into phyloseq object
seq_tab <- readRDS(
  system.file("extdata", "dada2_seqtab.rds",
              package = "MicrobiomeAnalysis"))
tax_tab <- readRDS(
  system.file("extdata", "dada2_taxtab.rds",
              package = "MicrobiomeAnalysis"))
sam_tab <- read.table(
  system.file("extdata", "dada2_samdata.txt",
              package = "MicrobiomeAnalysis"),
  sep = "\t", header = TRUE, row.names = 1)

ps_dada2 <- import_dada2(
   seq_tab = seq_tab,
   tax_tab = tax_tab,
   sam_tab = sam_tab)

ps_dada2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 232 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 4 sample variables ]
## tax_table()   Taxonomy Table:    [ 232 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 232 reference sequences ]
  • Converting the qiime2 output of dada2 into phyloseq object
otuqza_file <- system.file(
    "extdata", "table.qza",
    package = "MicrobiomeAnalysis")
taxaqza_file <- system.file(
    "extdata", "taxonomy.qza",
    package = "MicrobiomeAnalysis")
sample_file <- system.file(
    "extdata", "sample-metadata.tsv",
    package = "MicrobiomeAnalysis")
treeqza_file <- system.file(
    "extdata", "tree.qza",
    package = "MicrobiomeAnalysis")
ps_qiime2 <- import_qiime2(
    otu_qza = otuqza_file, taxa_qza = taxaqza_file,
    sam_tab = sample_file, tree_qza = treeqza_file
)

ps_qiime2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 770 taxa and 34 samples ]
## sample_data() Sample Data:       [ 34 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 770 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 770 tips and 768 internal nodes ]
  • Convertings inputs into SummarizedExperiment object
data("Zeybel_2022_protein")
assay <- SummarizedExperiment::assay(Zeybel_2022_protein) %>%
  data.frame()
rowData <- SummarizedExperiment::rowData(Zeybel_2022_protein) %>%
  data.frame()
colData <- SummarizedExperiment::colData(Zeybel_2022_protein) %>%
  data.frame()
metadata <- list(lab="hua", type="protein")

assay <- assay[1:10, 1:10]

se_protein <- import_SE(
    object = assay,
    rowdata = rowData,
    coldata = colData,
    metadata = metadata)

se_protein
## class: SummarizedExperiment 
## dim: 10 10 
## metadata(2): lab type
## assays(1): ''
## rownames(10): IL8 VEGFA ... uPA IL6
## rowData names(3): ProteinID LOD prop
## colnames(10): P101001 P101003 ... P101013 P101016
## colData names(47): PatientID Gender ... Right_leg_fat_free_mass Right_leg_total_body_water

3.3 Extracting specific levels

  • Extracting “Genus” levels phyloseq object
ps_genus <- aggregate_taxa(x = ps_dada2, 
                           level = "Genus")
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 66 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 4 sample variables ]
## tax_table()   Taxonomy Table:    [ 66 taxa by 7 taxonomic ranks ]

3.4 Summarizing specific levels

  • Phyloseq object contains from Kingdom to the the specific taxonomic levels (Phylum)
ps_summarize_genus <- summarize_taxa(
    ps = ps_dada2, 
    level = "Genus")
ps_summarize_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 66 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 4 sample variables ]
## tax_table()   Taxonomy Table:    [ 66 taxa by 1 taxonomic ranks ]

3.5 Data Transformation

7 methods to transform individual values (by individual value).

  • “log10”, the transformation is log10(object), and if the data contains zeros the transformation is log10(1 + object).

  • “log10p”, the transformation is log10(1 + object).

  • “log2”, the transformation is log2(object), and if the data contains zeros the transformation is log2(1 + object).

  • “log2p”, the transformation is log2(1 + object).

  • “SquareRoot”, the transformation is Square Root.

  • “CubicRoot”, the transformation is Cubic Root.

  • “logit”, the transformation is Zero-inflated Logit Transformation (Does not work well for microbiome data).

Here is for phyloseq-class.

data("Zeybel_2022_gut")
ps_transform <- transform_abundances(
  object = Zeybel_2022_gut,
  level = "Phylum",
  transform = "log10p")

head(ps_transform@otu_table@.Data[, 1:5], 3)
##                     P101003    P101007    P101010    P101012    P101018
## p__Actinobacteria -1.651096 -2.0986943 -2.5452009 -1.6773074 -1.8931057
## p__Bacteroidetes  -0.330115 -0.1437857 -0.3379679 -0.1574846 -0.1295813
## p__Chloroflexi     0.000000  0.0000000  0.0000000  0.0000000  0.0000000
  • Here is for SummarizedExperiment-class.
data("Zeybel_2022_protein")
se_transform <- transform_abundances(
  object = Zeybel_2022_protein,
  transform = "SquareRoot")

head(assay(se_transform)[, 1:5], 3)
##        P101001  P101003  P101004  P101007  P101009
## IL8   2.246691 2.063092 2.170020 2.400327 2.168238
## VEGFA 3.344589 3.302819 3.246115 3.321395 3.258289
## CD8A  3.398494 3.185073 3.132900 3.262700 3.075131

3.6 Data Imputation

11 methods to impute missing value (NAs or Zeros)

  • “LOD”: specific Limit Of Detection which provides by user.

  • “half_min”: half minimal values across samples except zero.

  • “median”: median values across samples except zero.

  • “mean”: mean values across samples except zero.

  • “min”: minimal values across samples except zero.

  • “knn”: k-nearest neighbors samples.

  • “rf”: nonparametric missing value imputation using Random Forest.

  • “global_mean”: a normal distribution with a mean that is down-shifted from the sample mean and a standard deviation that is a fraction of the standard deviation of the sample distribution.

  • “svd”: missing values imputation based Singular value decomposition.

  • “QRILC”: missing values imputation based quantile regression. (default: “none”).

phyloseq-class as inputs

data("Zeybel_2022_gut")
ps_impute <- impute_abundance(
  object = Zeybel_2022_gut,
  level = "Phylum",
  group = "LiverFatClass",
  ZerosAsNA = TRUE,
  RemoveNA = TRUE,
  cutoff = 20,
  method = "knn")

head(ps_impute@otu_table@.Data[, 1:5], 3)
##                     P101003   P101007   P101010   P101012   P101018
## p__Actinobacteria 0.0223308 0.0079672 0.0028497 0.0210229 0.0127907
## p__Bacteroidetes  0.4676113 0.7181486 0.4592320 0.6958497 0.7420252
## p__Firmicutes     0.4818712 0.2715033 0.4577191 0.2712132 0.1701021
  • Inputs are from SummarizedExperiment-class.
data("Zeybel_2022_protein")
se_impute <- impute_abundance(
  object = Zeybel_2022_protein,
  group = "LiverFatClass",
  ZerosAsNA = TRUE,
  RemoveNA = TRUE,
  cutoff = 20,
  method = "knn")

head(assay(se_impute)[, 1:5], 3)
##         P101001  P101003  P101004  P101007  P101009
## IL8    5.047325  4.25600  4.70867  5.76131  4.70094
## VEGFA 11.186140 10.90848 10.53712 11.03153 10.61631
## CD8A  11.549635 10.14454  9.81491 10.64507  9.45627

3.7 Data Normalization

Normalizing the OTU_table in phyloseq-class object sample by sample to reduce the effects of systematic differences such as library size (by sample).

  • “rarefy”: random subsampling counts to the smallest library size in the data set.

  • “TSS”: total sum scaling, also referred to as “relative abundance”, the abundances were normalized by dividing the corresponding sample library size.

  • “TMM”: trimmed mean of m-values. First, a sample is chosen as reference. The scaling factor is then derived using a weighted trimmed mean over the differences of the log-transformed gene-count fold-change between the sample and the reference.

  • “RLE”, relative log expression, RLE uses a pseudo-reference calculated using the geometric mean of the gene-specific abundances over all samples. The scaling factors are then calculated as the median of the gene counts ratios between the samples and the reference.

  • “CSS”: cumulative sum scaling, calculates scaling factors as the cumulative sum of gene abundances up to a data-derived threshold.

  • “CLR”: centered log-ratio normalization.

  • “CPM”: pre-sample normalization of the sum of the values to 1e+06.

data("caporaso")
ps_norm <- normalize(
    object = caporaso,
    method = "TSS")

head(ps_norm@otu_table@.Data[, 1:5], 3)
##                             L1S140       L1S208 L1S8 L1S281 L3S242
## New.CleanUp.ReferenceOTU647      0 0.0000000000    0      0      0
## 14030                            0 0.0000000000    0      0      0
## New.CleanUp.ReferenceOTU858      0 0.0001013993    0      0      0

3.8 Data Scaling

Data scaling adjusts each variable/feature by a scaling factor computed based on the dispersion of the variable (by variable/feature).

  • “mean_center”: values minus mean statistic.

  • “zscore”: mean-centered and divided by the standard deviation of each variable.

  • “pareto”: mean-centered and divided by the square root of the standard deviation of each variable.

  • “range”: mean-centered and divided by the range of each variable. (default: “none”).

phyloseq-class as inputs

data("enterotypes_arumugam")
ps_scale <- scale_variables(
  object = enterotypes_arumugam,
  level = "Phylum",
  method = "range")

head(ps_scale@otu_table@.Data[, 1:5], 3)
##                    AM-AD-1     AM-AD-2  AM-F10-T1   AM-F10-T2    DA-AD-1
## Acidobacteria   0.00000000  0.00000000  0.0000000 0.000000000  0.0000000
## Actinobacteria  0.03379937 -0.05187261  0.0180014 0.882710081 -0.1132282
## Bacteroidetes  -0.21758868 -0.21769150 -0.0870175 0.004106405  0.4083952

Inputs are from SummarizedExperiment-class.

data("Zeybel_2022_protein")

se_impute <- impute_abundance(
  object = Zeybel_2022_protein,
  group = "LiverFatClass",
  ZerosAsNA = TRUE,
  RemoveNA = TRUE,
  cutoff = 20,
  method = "knn")

se_scale <- scale_variables(
  se_impute,
  method = "zscore")

head(assay(se_scale)[, 1:5], 3)
##         P101001     P101003    P101004   P101007    P101009
## IL8   0.1612462 -1.49334575 -0.5468520 1.6541269 -0.5630148
## VEGFA 0.8041564  0.08413878 -0.8788582 0.4032275 -0.6735057
## CD8A  2.8143331  0.18882517 -0.4271091 1.1240968 -1.0972504

3.9 Data Trimming

Trimming samples or features whose prevalence is less than threshold

  • “both”, prevalence of features and samples more than cutoff.

  • “feature”, prevalence of features more than cutoff.

  • “feature_group”, prevalence of features more than cutoff by groups.

  • “sample”, prevalence of samples more than cutoff.

phyloseq-class as inputs

data("Zeybel_2022_gut")
ps_trim <- trim_prevalence(
  Zeybel_2022_gut,
  group = "LiverFatClass",
  level = "Phylum",
  cutoff = 0.1,
  trim = "feature_group")

ps_trim
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5 taxa and 42 samples ]
## sample_data() Sample Data:       [ 42 samples by 46 sample variables ]
## tax_table()   Taxonomy Table:    [ 5 taxa by 3 taxonomic ranks ]

Inputs are from SummarizedExperiment-class.

data("Zeybel_2022_protein")
se_trim <- trim_prevalence(
  Zeybel_2022_protein,
  cutoff = 0.99,
  trim = "both")
se_trim
## class: SummarizedExperiment 
## dim: 66 54 
## metadata(0):
## assays(1): ''
## rownames(66): IL8 VEGFA ... TNFB CSF_1
## rowData names(3): ProteinID LOD prop
## colnames(54): P101001 P101003 ... P101095 P101096
## colData names(47): PatientID Gender ... Right_leg_fat_free_mass Right_leg_total_body_water

3.10 Data Filtering

Filtering feature who is low relative abundance or unclassified (Ref: (Thingholm et al. 2019))

  • Feature is more than Mean relative abundance across all samples;

  • Feature is more than Minimum relative abundance at least one sample.

phyloseq-class as inputs

data("Zeybel_2022_gut")
Zeybel_2022_gut_counts <- phyloseq::transform_sample_counts(
Zeybel_2022_gut, function(x) {round(x * 10^7)})

# absolute abundance
ps_filter_absolute <- filter_abundance(
   object = Zeybel_2022_gut_counts,
   level = "Genus",
   cutoff_mean = 100,
   cutoff_one = 1000,
   unclass = FALSE)

ps_filter_absolute
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 94 taxa and 42 samples ]
## sample_data() Sample Data:       [ 42 samples by 46 sample variables ]
## tax_table()   Taxonomy Table:    [ 94 taxa by 7 taxonomic ranks ]
# relative abundance
ps_filter_relative <- filter_abundance(
   object = Zeybel_2022_gut,
   level = "Genus",
   cutoff_mean = 1e-04,
   cutoff_one = 1e-03,
   unclass = TRUE)

ps_filter_relative
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 67 taxa and 42 samples ]
## sample_data() Sample Data:       [ 42 samples by 46 sample variables ]
## tax_table()   Taxonomy Table:    [ 67 taxa by 7 taxonomic ranks ]

Inputs are from SummarizedExperiment-class.

data("Zeybel_2022_protein")
se_filter <- filter_abundance(
  object = Zeybel_2022_protein,
  cutoff_mean = 5,
  cutoff_one = 8)

se_filter
## class: SummarizedExperiment 
## dim: 39 54 
## metadata(0):
## assays(1): ''
## rownames(39): VEGFA CD8A ... STAMBP CSF_1
## rowData names(3): ProteinID LOD prop
## colnames(54): P101001 P101003 ... P101095 P101096
## colData names(47): PatientID Gender ... Right_leg_fat_free_mass Right_leg_total_body_water

3.11 Systematic Information

devtools::session_info()
## ─ Session info ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.1.3 (2022-03-10)
##  os       macOS Monterey 12.2.1
##  system   x86_64, darwin17.0
##  ui       RStudio
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Asia/Shanghai
##  date     2023-08-16
##  rstudio  2023.06.1+524 Mountain Hydrangea (desktop)
##  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##  package              * version    date (UTC) lib source
##  ade4                   1.7-22     2023-02-06 [2] CRAN (R 4.1.2)
##  ANCOMBC                1.4.0      2021-10-26 [2] Bioconductor
##  annotate               1.72.0     2021-10-26 [2] Bioconductor
##  AnnotationDbi          1.60.2     2023-03-10 [2] Bioconductor
##  ape                    5.7-1      2023-03-13 [2] CRAN (R 4.1.2)
##  backports              1.4.1      2021-12-13 [2] CRAN (R 4.1.0)
##  base64enc              0.1-3      2015-07-28 [2] CRAN (R 4.1.0)
##  Biobase              * 2.54.0     2021-10-26 [2] Bioconductor
##  BiocGenerics         * 0.40.0     2021-10-26 [2] Bioconductor
##  BiocParallel           1.28.3     2021-12-09 [2] Bioconductor
##  biomformat             1.22.0     2021-10-26 [2] Bioconductor
##  Biostrings             2.62.0     2021-10-26 [2] Bioconductor
##  bit                    4.0.5      2022-11-15 [2] CRAN (R 4.1.2)
##  bit64                  4.0.5      2020-08-30 [2] CRAN (R 4.1.0)
##  bitops                 1.0-7      2021-04-24 [2] CRAN (R 4.1.0)
##  blob                   1.2.4      2023-03-17 [2] CRAN (R 4.1.2)
##  bookdown               0.34       2023-05-09 [2] CRAN (R 4.1.2)
##  broom                  1.0.5      2023-06-09 [2] CRAN (R 4.1.3)
##  bslib                  0.5.0      2023-06-09 [2] CRAN (R 4.1.3)
##  cachem                 1.0.8      2023-05-01 [2] CRAN (R 4.1.2)
##  callr                  3.7.3      2022-11-02 [2] CRAN (R 4.1.2)
##  caret                * 6.0-94     2023-03-21 [2] CRAN (R 4.1.2)
##  caTools                1.18.2     2021-03-28 [2] CRAN (R 4.1.0)
##  cellranger             1.1.0      2016-07-27 [2] CRAN (R 4.1.0)
##  checkmate              2.2.0      2023-04-27 [2] CRAN (R 4.1.2)
##  class                  7.3-22     2023-05-03 [2] CRAN (R 4.1.2)
##  cli                    3.6.1      2023-03-23 [2] CRAN (R 4.1.2)
##  cluster                2.1.4      2022-08-22 [2] CRAN (R 4.1.2)
##  codetools              0.2-19     2023-02-01 [2] CRAN (R 4.1.2)
##  colorspace             2.1-0      2023-01-23 [2] CRAN (R 4.1.2)
##  cowplot                1.1.1      2020-12-30 [2] CRAN (R 4.1.0)
##  crayon                 1.5.2      2022-09-29 [2] CRAN (R 4.1.2)
##  crosstalk              1.2.0      2021-11-04 [2] CRAN (R 4.1.0)
##  data.table           * 1.14.8     2023-02-17 [2] CRAN (R 4.1.2)
##  DBI                    1.1.3      2022-06-18 [2] CRAN (R 4.1.2)
##  DelayedArray           0.20.0     2021-10-26 [2] Bioconductor
##  DESeq2                 1.34.0     2021-10-26 [2] Bioconductor
##  devtools               2.4.5      2022-10-11 [2] CRAN (R 4.1.2)
##  digest                 0.6.33     2023-07-07 [1] CRAN (R 4.1.3)
##  dplyr                * 1.1.2      2023-04-20 [2] CRAN (R 4.1.2)
##  DT                     0.28       2023-05-18 [2] CRAN (R 4.1.3)
##  e1071                  1.7-13     2023-02-01 [2] CRAN (R 4.1.2)
##  ellipsis               0.3.2      2021-04-29 [2] CRAN (R 4.1.0)
##  evaluate               0.21       2023-05-05 [2] CRAN (R 4.1.2)
##  fansi                  1.0.4      2023-01-22 [2] CRAN (R 4.1.2)
##  farver                 2.1.1      2022-07-06 [2] CRAN (R 4.1.2)
##  fastmap                1.1.1      2023-02-24 [2] CRAN (R 4.1.2)
##  forcats              * 1.0.0      2023-01-29 [2] CRAN (R 4.1.2)
##  foreach                1.5.2      2022-02-02 [2] CRAN (R 4.1.2)
##  foreign                0.8-84     2022-12-06 [2] CRAN (R 4.1.2)
##  Formula                1.2-5      2023-02-24 [2] CRAN (R 4.1.2)
##  fs                     1.6.2      2023-04-25 [2] CRAN (R 4.1.2)
##  future                 1.33.0     2023-07-01 [2] CRAN (R 4.1.3)
##  future.apply           1.11.0     2023-05-21 [2] CRAN (R 4.1.3)
##  geepack              * 1.3.9      2022-08-16 [1] CRAN (R 4.1.2)
##  genefilter             1.76.0     2021-10-26 [2] Bioconductor
##  geneplotter            1.72.0     2021-10-26 [2] Bioconductor
##  generics               0.1.3      2022-07-05 [2] CRAN (R 4.1.2)
##  GenomeInfoDb         * 1.30.1     2022-01-30 [2] Bioconductor
##  GenomeInfoDbData       1.2.7      2022-03-09 [2] Bioconductor
##  GenomicRanges        * 1.46.1     2021-11-18 [2] Bioconductor
##  ggplot2              * 3.4.2      2023-04-03 [2] CRAN (R 4.1.2)
##  glmnet                 4.1-7      2023-03-23 [2] CRAN (R 4.1.2)
##  globals                0.16.2     2022-11-21 [2] CRAN (R 4.1.2)
##  glue                   1.6.2      2022-02-24 [2] CRAN (R 4.1.2)
##  gower                  1.0.1      2022-12-22 [2] CRAN (R 4.1.2)
##  gplots                 3.1.3      2022-04-25 [2] CRAN (R 4.1.2)
##  gridExtra              2.3        2017-09-09 [2] CRAN (R 4.1.0)
##  gtable                 0.3.3      2023-03-21 [2] CRAN (R 4.1.2)
##  gtools                 3.9.4      2022-11-27 [2] CRAN (R 4.1.2)
##  hardhat                1.3.0      2023-03-30 [2] CRAN (R 4.1.2)
##  here                   1.0.1      2020-12-13 [2] CRAN (R 4.1.0)
##  highr                  0.10       2022-12-22 [2] CRAN (R 4.1.2)
##  Hmisc                * 5.1-0      2023-05-08 [2] CRAN (R 4.1.2)
##  hms                    1.1.3      2023-03-21 [2] CRAN (R 4.1.2)
##  htmlTable              2.4.1      2022-07-07 [2] CRAN (R 4.1.2)
##  htmltools              0.5.5      2023-03-23 [2] CRAN (R 4.1.2)
##  htmlwidgets            1.6.2      2023-03-17 [2] CRAN (R 4.1.2)
##  httpuv                 1.6.11     2023-05-11 [2] CRAN (R 4.1.3)
##  httr                   1.4.6      2023-05-08 [2] CRAN (R 4.1.2)
##  igraph                 1.5.0      2023-06-16 [1] CRAN (R 4.1.3)
##  impute                 1.68.0     2021-10-26 [2] Bioconductor
##  ipred                  0.9-14     2023-03-09 [2] CRAN (R 4.1.2)
##  IRanges              * 2.28.0     2021-10-26 [2] Bioconductor
##  iterators              1.0.14     2022-02-05 [2] CRAN (R 4.1.2)
##  jquerylib              0.1.4      2021-04-26 [2] CRAN (R 4.1.0)
##  jsonlite               1.8.7      2023-06-29 [2] CRAN (R 4.1.3)
##  KEGGREST               1.34.0     2021-10-26 [2] Bioconductor
##  KernSmooth             2.23-22    2023-07-10 [2] CRAN (R 4.1.3)
##  knitr                  1.43       2023-05-25 [2] CRAN (R 4.1.3)
##  labeling               0.4.2      2020-10-20 [2] CRAN (R 4.1.0)
##  later                  1.3.1      2023-05-02 [2] CRAN (R 4.1.2)
##  lattice              * 0.21-8     2023-04-05 [2] CRAN (R 4.1.2)
##  lava                   1.7.2.1    2023-02-27 [2] CRAN (R 4.1.2)
##  lifecycle              1.0.3      2022-10-07 [2] CRAN (R 4.1.2)
##  limma                  3.50.3     2022-04-07 [2] Bioconductor
##  listenv                0.9.0      2022-12-16 [2] CRAN (R 4.1.2)
##  locfit                 1.5-9.8    2023-06-11 [2] CRAN (R 4.1.3)
##  lubridate            * 1.9.2      2023-02-10 [2] CRAN (R 4.1.2)
##  magrittr               2.0.3      2022-03-30 [2] CRAN (R 4.1.2)
##  MASS                   7.3-60     2023-05-04 [2] CRAN (R 4.1.2)
##  Matrix                 1.6-0      2023-07-08 [2] CRAN (R 4.1.3)
##  MatrixGenerics       * 1.6.0      2021-10-26 [2] Bioconductor
##  MatrixModels           0.5-2      2023-07-10 [2] CRAN (R 4.1.3)
##  matrixStats          * 1.0.0      2023-06-02 [2] CRAN (R 4.1.3)
##  memoise                2.0.1      2021-11-26 [2] CRAN (R 4.1.0)
##  metagenomeSeq          1.36.0     2021-10-26 [2] Bioconductor
##  mgcv                   1.8-42     2023-03-02 [2] CRAN (R 4.1.2)
##  microbiome             1.16.0     2021-10-26 [2] Bioconductor
##  MicrobiomeAnalysis   * 1.0.3      2023-08-16 [1] Bioconductor
##  mime                   0.12       2021-09-28 [2] CRAN (R 4.1.0)
##  miniUI                 0.1.1.1    2018-05-18 [2] CRAN (R 4.1.0)
##  ModelMetrics           1.2.2.2    2020-03-17 [2] CRAN (R 4.1.0)
##  multcomp               1.4-25     2023-06-20 [2] CRAN (R 4.1.3)
##  multtest               2.50.0     2021-10-26 [2] Bioconductor
##  munsell                0.5.0      2018-06-12 [2] CRAN (R 4.1.0)
##  mvtnorm                1.2-2      2023-06-08 [2] CRAN (R 4.1.3)
##  nlme                 * 3.1-162    2023-01-31 [2] CRAN (R 4.1.2)
##  nloptr                 2.0.3      2022-05-26 [2] CRAN (R 4.1.2)
##  nnet                   7.3-19     2023-05-03 [2] CRAN (R 4.1.2)
##  parallelly             1.36.0     2023-05-26 [2] CRAN (R 4.1.3)
##  permute                0.9-7      2022-01-27 [2] CRAN (R 4.1.2)
##  phyloseq             * 1.38.0     2021-10-26 [2] Bioconductor
##  pillar                 1.9.0      2023-03-22 [2] CRAN (R 4.1.2)
##  pkgbuild               1.4.2      2023-06-26 [2] CRAN (R 4.1.3)
##  pkgconfig              2.0.3      2019-09-22 [2] CRAN (R 4.1.0)
##  pkgload                1.3.2.1    2023-07-08 [2] CRAN (R 4.1.3)
##  plyr                   1.8.8      2022-11-11 [2] CRAN (R 4.1.2)
##  png                    0.1-8      2022-11-29 [2] CRAN (R 4.1.2)
##  polspline              1.1.23     2023-06-29 [1] CRAN (R 4.1.3)
##  prettyunits            1.1.1      2020-01-24 [2] CRAN (R 4.1.0)
##  pROC                 * 1.18.4     2023-07-06 [2] CRAN (R 4.1.3)
##  processx               3.8.2      2023-06-30 [2] CRAN (R 4.1.3)
##  prodlim                2023.03.31 2023-04-02 [2] CRAN (R 4.1.2)
##  profvis                0.3.8      2023-05-02 [2] CRAN (R 4.1.2)
##  promises               1.2.0.1    2021-02-11 [2] CRAN (R 4.1.0)
##  proxy                  0.4-27     2022-06-09 [2] CRAN (R 4.1.2)
##  ps                     1.7.5      2023-04-18 [2] CRAN (R 4.1.2)
##  purrr                * 1.0.1      2023-01-10 [2] CRAN (R 4.1.2)
##  quantreg               5.95       2023-04-08 [2] CRAN (R 4.1.2)
##  R6                     2.5.1      2021-08-19 [2] CRAN (R 4.1.0)
##  randomForest         * 4.7-1.1    2022-05-23 [2] CRAN (R 4.1.2)
##  rbibutils              2.2.13     2023-01-13 [2] CRAN (R 4.1.2)
##  RColorBrewer           1.1-3      2022-04-03 [2] CRAN (R 4.1.2)
##  Rcpp                   1.0.11     2023-07-06 [1] CRAN (R 4.1.3)
##  RCurl                  1.98-1.12  2023-03-27 [2] CRAN (R 4.1.2)
##  Rdpack                 2.4        2022-07-20 [2] CRAN (R 4.1.2)
##  readr                * 2.1.4      2023-02-10 [2] CRAN (R 4.1.2)
##  readxl                 1.4.3      2023-07-06 [2] CRAN (R 4.1.3)
##  recipes                1.0.6      2023-04-25 [2] CRAN (R 4.1.2)
##  remotes                2.4.2      2021-11-30 [2] CRAN (R 4.1.0)
##  reshape2               1.4.4      2020-04-09 [2] CRAN (R 4.1.0)
##  reticulate             1.30       2023-06-09 [2] CRAN (R 4.1.3)
##  rhdf5                  2.38.1     2022-03-10 [2] Bioconductor
##  rhdf5filters           1.6.0      2021-10-26 [2] Bioconductor
##  Rhdf5lib               1.16.0     2021-10-26 [2] Bioconductor
##  rlang                  1.1.1      2023-04-28 [1] CRAN (R 4.1.2)
##  rmarkdown              2.23       2023-07-01 [2] CRAN (R 4.1.3)
##  rms                  * 6.7-0      2023-05-08 [1] CRAN (R 4.1.2)
##  ropls                * 1.26.4     2022-01-11 [2] Bioconductor
##  rpart                  4.1.19     2022-10-21 [2] CRAN (R 4.1.2)
##  rprojroot              2.0.3      2022-04-02 [2] CRAN (R 4.1.2)
##  RSQLite                2.3.1      2023-04-03 [2] CRAN (R 4.1.2)
##  rstudioapi             0.15.0     2023-07-07 [2] CRAN (R 4.1.3)
##  Rtsne                  0.16       2022-04-17 [2] CRAN (R 4.1.2)
##  S4Vectors            * 0.32.4     2022-03-29 [2] Bioconductor
##  sandwich               3.0-2      2022-06-15 [2] CRAN (R 4.1.2)
##  sass                   0.4.6      2023-05-03 [2] CRAN (R 4.1.2)
##  scales                 1.2.1      2022-08-20 [2] CRAN (R 4.1.2)
##  sessioninfo            1.2.2      2021-12-06 [2] CRAN (R 4.1.0)
##  shape                  1.4.6      2021-05-19 [2] CRAN (R 4.1.0)
##  shiny                  1.7.4.1    2023-07-06 [2] CRAN (R 4.1.3)
##  SparseM                1.81       2021-02-18 [2] CRAN (R 4.1.0)
##  stringi                1.7.12     2023-01-11 [2] CRAN (R 4.1.2)
##  stringr              * 1.5.0      2022-12-02 [2] CRAN (R 4.1.2)
##  SummarizedExperiment * 1.24.0     2021-10-26 [2] Bioconductor
##  survival               3.5-5      2023-03-12 [2] CRAN (R 4.1.2)
##  TH.data                1.1-2      2023-04-17 [2] CRAN (R 4.1.2)
##  tibble               * 3.2.1      2023-03-20 [2] CRAN (R 4.1.2)
##  tidyr                * 1.3.0      2023-01-24 [2] CRAN (R 4.1.2)
##  tidyselect             1.2.0      2022-10-10 [2] CRAN (R 4.1.2)
##  tidyverse            * 2.0.0      2023-02-22 [1] CRAN (R 4.1.2)
##  timechange             0.2.0      2023-01-11 [2] CRAN (R 4.1.2)
##  timeDate               4022.108   2023-01-07 [2] CRAN (R 4.1.2)
##  tinytex                0.45       2023-04-18 [2] CRAN (R 4.1.2)
##  tzdb                   0.4.0      2023-05-12 [2] CRAN (R 4.1.3)
##  urlchecker             1.0.1      2021-11-30 [2] CRAN (R 4.1.0)
##  usethis                2.2.2      2023-07-06 [2] CRAN (R 4.1.3)
##  utf8                   1.2.3      2023-01-31 [2] CRAN (R 4.1.2)
##  vctrs                  0.6.3      2023-06-14 [1] CRAN (R 4.1.3)
##  vegan                  2.6-4      2022-10-11 [2] CRAN (R 4.1.2)
##  viridisLite            0.4.2      2023-05-02 [2] CRAN (R 4.1.2)
##  withr                  2.5.0      2022-03-03 [2] CRAN (R 4.1.2)
##  Wrench                 1.12.0     2021-10-26 [2] Bioconductor
##  xfun                   0.39       2023-04-20 [2] CRAN (R 4.1.2)
##  XML                    3.99-0.14  2023-03-19 [2] CRAN (R 4.1.2)
##  xtable                 1.8-4      2019-04-21 [2] CRAN (R 4.1.0)
##  XVector                0.34.0     2021-10-26 [2] Bioconductor
##  yaml                   2.3.7      2023-01-23 [2] CRAN (R 4.1.2)
##  zlibbioc               1.40.0     2021-10-26 [2] Bioconductor
##  zoo                    1.8-12     2023-04-13 [2] CRAN (R 4.1.2)
## 
##  [1] /Users/zouhua/Library/R/x86_64/4.1/library
##  [2] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
## 
## ─ Python configuration ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##  python:         /Users/zouhua/opt/anaconda3/bin/python3.9
##  libpython:      /Users/zouhua/opt/anaconda3/lib/libpython3.9.dylib
##  pythonhome:     /Users/zouhua/opt/anaconda3:/Users/zouhua/opt/anaconda3
##  version:        3.9.16 | packaged by conda-forge | (main, Feb  1 2023, 21:42:20)  [Clang 14.0.6 ]
##  numpy:          /Users/zouhua/opt/anaconda3/lib/python3.9/site-packages/numpy
##  numpy_version:  1.23.3
##  
##  NOTE: Python version was forced by use_python function
## 
## ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

References

Thingholm, Louise B, Malte C Rühlemann, Manja Koch, Brie Fuqua, Guido Laucke, Ruwen Boehm, Corinna Bang, et al. 2019. “Obese Individuals with and Without Type 2 Diabetes Show Different Gut Microbial Functional Capacity and Composition.” Cell Host & Microbe 26 (2): 252–64.