knitr::opts_chunk$set(echo = TRUE)

Advanced markdown script example


Setup

Load packages

list.of.packages <- c("patchwork", "ggpubr", "tidyverse", "data.table", "knitr", "markdown", "rmarkdown", "dplyr", "ggplot2", "moments", "car")

# install packages if new
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# load packages
invisible(lapply(list.of.packages, library, character.only = TRUE))

Download OASIS dataset

We’ll be downloading the data from http://www.oasis-brains.org/pdf/oasis_longitudinal.csv

A full explanation of the data can be found in:

Open Access Series of Imaging Studies (OASIS): Longitudinal MRI Data in Nondemented and Demented Older Adults. Marcus, DS, Fotenos, AF, Csernansky, JG, Morris, JC, Buckner, RL, 2010. Journal of Cognitive Neuroscience, 22, 2677-2684. doi: 10.1162/jocn.2009.21407

df <- read.csv("http://www.oasis-brains.org/pdf/oasis_longitudinal.csv") 

# print out top 5 rows of the DataFrame
kable(head(df, n = 5L))
Subject.ID MRI.ID Group Visit MR.Delay M.F Hand Age EDUC SES MMSE CDR eTIV nWBV ASF
OAS2_0001 OAS2_0001_MR1 Nondemented 1 0 M R 87 14 2 27 0.0 1987 0.696 0.883
OAS2_0001 OAS2_0001_MR2 Nondemented 2 457 M R 88 14 2 30 0.0 2004 0.681 0.876
OAS2_0002 OAS2_0002_MR1 Demented 1 0 M R 75 12 NA 23 0.5 1678 0.736 1.046
OAS2_0002 OAS2_0002_MR2 Demented 2 560 M R 76 12 NA 28 0.5 1738 0.713 1.010
OAS2_0002 OAS2_0002_MR3 Demented 3 1895 M R 80 12 NA 22 0.5 1698 0.701 1.034

Pre-processing the Data

Dementia status from CDR

We’re told in the associated paper (linked above) that the CDR values correspond to the following levels of severity:

  • 0 = no dementia
  • 0.5 = very mild AD
  • 1 = mild AD
  • 2 = moderate AD

So let’s derive this “Status” from the CDR values:

#create an ordered factor and map CDR values to status 
df <- df %>%
  #leave numeric version for later
  dplyr::mutate(CDR.num = CDR) %>%
  #Normally, mutate works iteratively within a command. However, I have experienced situations 
  #where the newly created variables could not be reused in the same mutate, so here is a second mutate function.
  dplyr::mutate(CDR = dplyr::case_when(CDR.num == 0 ~ 'no dementia',
                                       CDR.num == 0.5 ~ 'very mild AD',
                                       CDR.num == 1 ~ 'mild AD',
                                       CDR.num == 2 ~ 'moderate AD'), # if no cases match NA is returned
                CDR = forcats::fct_reorder(CDR, CDR.num))

Now we can look at this status, and how many records there are both overall, and at each of the visits (or time points).

# Let's look at how many records we have of each severity
kable(df %>%
        dplyr::group_by(CDR) %>%
        dplyr::tally())
CDR n
no dementia 206
very mild AD 123
mild AD 41
moderate AD 3
# How many of each status do we have at each visit?
kable(df %>%
        dplyr::group_by(Visit,CDR) %>%
        dplyr::tally())
Visit CDR n
1 no dementia 85
1 very mild AD 52
1 mild AD 13
2 no dementia 73
2 very mild AD 49
2 mild AD 19
2 moderate AD 3
3 no dementia 34
3 very mild AD 18
3 mild AD 6
4 no dementia 10
4 very mild AD 3
4 mild AD 2
5 no dementia 4
5 very mild AD 1
5 mild AD 1

Variable Types

str(df)
'data.frame':   373 obs. of  16 variables:
 $ Subject.ID: Factor w/ 150 levels "OAS2_0001","OAS2_0002",..: 1 1 2 2 2 3 3 4 4 4 ...
 $ MRI.ID    : Factor w/ 373 levels "OAS2_0001_MR1",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ Group     : Factor w/ 3 levels "Converted","Demented",..: 3 3 2 2 2 3 3 3 3 3 ...
 $ Visit     : int  1 2 1 2 3 1 2 1 2 3 ...
 $ MR.Delay  : int  0 457 0 560 1895 0 538 0 1010 1603 ...
 $ M.F       : Factor w/ 2 levels "F","M": 2 2 2 2 2 1 1 2 2 2 ...
 $ Hand      : Factor w/ 1 level "R": 1 1 1 1 1 1 1 1 1 1 ...
 $ Age       : int  87 88 75 76 80 88 90 80 83 85 ...
 $ EDUC      : int  14 14 12 12 12 18 18 12 12 12 ...
 $ SES       : int  2 2 NA NA NA 3 3 4 4 4 ...
 $ MMSE      : int  27 30 23 28 22 28 27 28 29 30 ...
 $ CDR       : Factor w/ 4 levels "no dementia",..: 1 1 2 2 2 1 1 1 2 1 ...
 $ eTIV      : int  1987 2004 1678 1738 1698 1215 1200 1689 1701 1699 ...
 $ nWBV      : num  0.696 0.681 0.736 0.713 0.701 0.71 0.718 0.712 0.711 0.705 ...
 $ ASF       : num  0.883 0.876 1.046 1.01 1.034 ...
 $ CDR.num   : num  0 0 0.5 0.5 0.5 0 0 0 0.5 0 ...

Change variable names

df <- df %>%
  dplyr::rename(Sex = M.F,
                Education = EDUC)

Coerche types? Not always needed but good practice.

Change “Visit” from numeric values to ordered factor (1-5).

df <- df %>%
  dplyr::mutate(Visit = factor(Visit, 
                               levels=c(1,2,3,4,5),
                               ordered=TRUE))

Change “SES” from numeric values to ordered factor (1 = highest to 5 = lowest status).

df <- df %>%
  dplyr::mutate(SES = factor(SES, 
                             levels=c(1,2,3,4,5),
                             ordered=TRUE))

Visits & Baseline Data

kable(df %>%
        dplyr::group_by(Visit) %>%
        dplyr::tally(), 
      caption = "Participant number per visit")
Participant number per visit
Visit n
1 150
2 144
3 58
4 15
5 6

Filter for baseline data, and look at their dementia status on arrival

df_baseline <- df %>% 
  dplyr::filter(Visit == 1)

kable(df_baseline %>%
        dplyr::group_by(CDR) %>%
        dplyr::tally(), 
      caption = "CDR status of participants in first visit")
CDR status of participants in first visit
CDR n
no dementia 85
very mild AD 52
mild AD 13

Data Exploration

Summarise Data (Non-numerical Variables)

With the exception of CDR, as this has already been done above.

numeric.var <- df %>%
  dplyr::select(where(is.numeric), -CDR.num) %>% 
  names()

kable(head(df %>% 
             dplyr::select(-all_of(numeric.var))) %>% 
        data.frame(),
      caption = "Head of all non-numeric variables in the dataset.")
Head of all non-numeric variables in the dataset.
Subject.ID MRI.ID Group Visit Sex Hand SES CDR CDR.num
OAS2_0001 OAS2_0001_MR1 Nondemented 1 M R 2 no dementia 0.0
OAS2_0001 OAS2_0001_MR2 Nondemented 2 M R 2 no dementia 0.0
OAS2_0002 OAS2_0002_MR1 Demented 1 M R NA very mild AD 0.5
OAS2_0002 OAS2_0002_MR2 Demented 2 M R NA very mild AD 0.5
OAS2_0002 OAS2_0002_MR3 Demented 3 M R NA very mild AD 0.5
OAS2_0004 OAS2_0004_MR1 Nondemented 1 F R 3 no dementia 0.0
kable(df %>%
        dplyr::group_by(Group) %>%
        dplyr::tally(), 
      caption = "Factor levels and frequency of variable 'Group'")
Factor levels and frequency of variable ‘Group’
Group n
Converted 37
Demented 146
Nondemented 190
kable(df %>%
        dplyr::group_by(Visit) %>%
        dplyr::tally(), 
      caption = "Factor levels and frequency of variable 'Visit'")
Factor levels and frequency of variable ‘Visit’
Visit n
1 150
2 144
3 58
4 15
5 6
kable(df %>%
        dplyr::group_by(Sex) %>%
        dplyr::tally(), 
      caption = "Factor levels and frequency of variable 'Sex'")
Factor levels and frequency of variable ‘Sex’
Sex n
F 213
M 160
kable(df %>%
        dplyr::group_by(Hand) %>%
        dplyr::tally(), 
      caption = "Factor levels and frequency of variable 'Hand'")
Factor levels and frequency of variable ‘Hand’
Hand n
R 373
kable(df %>%
        dplyr::group_by(SES) %>%
        dplyr::tally(), 
      caption = "Factor levels and frequency of variable 'SES'")
Factor levels and frequency of variable ‘SES’
SES n
1 88
2 103
3 82
4 74
5 7
NA 19

Summarise Data (Numerical Variables)

kable(df %>% 
        dplyr::select(all_of(numeric.var)) %>%
        summary() %>% 
        data.frame() %>%
        dplyr::select(-Var1) %>%
        dplyr::rename(Variable = Var2) %>%
        dplyr::mutate(Freq = gsub(" ", "", Freq, fixed = TRUE),
                      Freq = gsub("\'", "", Freq, fixed = TRUE)) %>%
        dplyr::filter(!is.na(Freq)) %>%
        tidyr::separate(Freq, c("V1", "V2"), sep= ":") %>%
        dplyr::group_by(Variable) %>%
        tidyr::spread(V1, V2),
      caption = "Summary of all numeric variables in the dataset")
Summary of all numeric variables in the dataset
Variable 1stQu. 3rdQu. Max. Mean Median Min. NAs
MR.Delay 0.0 873.0 2639.0 595.1 552.0 0.0 NA
Age 71.00 82.00 98.00 77.01 77.00 60.00 NA
Education 12.0 16.0 23.0 14.6 15.0 6.0 NA
MMSE 27.00 30.00 30.00 27.34 29.00 4.00 2
eTIV 1357 1597 2004 1488 1470 1106 NA
nWBV 0.7000 0.7560 0.8370 0.7296 0.7290 0.6440 NA
ASF 1.099 1.293 1.587 1.195 1.194 0.876 NA

Visualize Data (numerical variables)

par(mar = c(4, 4, 1, .1))
#distribution
distribution_plots <- function(variable) {
  df.plot <- df %>% 
    dplyr::filter(!is.na(.[[variable]])) 
  
  hist(df.plot[[variable]], 
       breaks = 50, 
       xlab = paste0(variable),
       main = paste0("Histogram of ", variable))
}

invisible(lapply(numeric.var, distribution_plots))

rm(distribution_plots)
par(mar = c(4, 4, 1, .1))
numeric.var <- c("Age", "Education", "MMSE")
#distribution
distribution_plots <- function(variable) {
  df.plot <- df %>% 
    dplyr::filter(!is.na(.[[variable]])) 
  
  plot <- ggplot(data = df.plot, 
                 aes_(x = as.name(variable), fill = ~ CDR)) +
    geom_histogram()
  print(plot)
}

invisible(lapply(numeric.var, distribution_plots))

rm(numeric.var)
rm(distribution_plots)

Sample Analysis: Table 1

Create a typical table one. Show it in this Markdown and save it also as csv for integrating it, for example, into a manuscript.

# 1. Select variables for table
myVars <- names(df %>%
                  dplyr::select(-Subject.ID, -MRI.ID, -CDR.num))


# 2. Make a list of all categorical variables from the myVars list for this table
catVars <- df %>% 
  dplyr::select(all_of(myVars)) %>%
  dplyr::select_if(purrr::negate(is.numeric)) %>% 
  names()

# 3. Check visually for non-normally distributed numerical variables 
#hist() or histograms
nonnormaldist <- c("MR.Delay", "MMSE")

# 4. Create dataset for table 1
table1df <- df %>%
  dplyr::select(all_of(myVars))

# 5. Create table
tab1 <- tableone::CreateTableOne(data = table1df, 
                                 vars = myVars, 
                                 factorVars = catVars)

# 6. Modify list of table to get a data frame
tab2 <- print(tab1, 
              showAllLevels = TRUE, 
              printToggle = FALSE, 
              noSpaces = TRUE, 
              explain = FALSE, 
              test = TRUE, 
              contDigits = 1, 
              catDigits = 1, 
              missing = TRUE,
              nonnormal = nonnormaldist) %>%
  as.data.frame() %>% 
  data.table::setDT(., keep.rownames = TRUE)
#summary(tab1) #gives more information regarding missing values

# 7. Rename column and rownames (of rn) to create a nice table that can be used in a manuscript
tab3 <- tab2 %>%
  dplyr::mutate(
    rn = ifelse(grepl("^X", rn), "", rn)) %>%
  dplyr::mutate(
    rn = dplyr::recode(rn,
                       "Group" = "Group, N (%)",
                       "Visit" = "Number of participants per visit, N (%)",
                       "MR.Delay" = "Time delay for MR imaging, median [IQR]",
                       "Sex" = "Sex, N (%)",
                       "Hand" = "Handedness, N (%)",
                       "Age" = "Age (years), M (SD)",
                       "Education" = "Education (years), M (SD)",
                       "SES" = "Socioeconomic status, N (%)",
                       "MMSE" = "Mini mental state examination, median [IQR]",
                       "CDR" = "Clinical Dementia Rating, N (%)",
                       "eTIV" = "Estimated total intracranial volume (cm3), M (SD)",
                       "nWBV" = "Normalized whole-brain volume, M (SD)",
                       "ASF" = "Atlas scaling factor, M (SD)"),
    level = dplyr::recode(level,
                          "F" = "women",
                          "M" = "men",
                          "R" = "right")) %>%
  dplyr::rename("Characteristic" = "rn") 

# 8. Print table in markdown
kable(tab3)
Characteristic level Overall Missing
n 373
Group, N (%) Converted 37 (9.9) 0.0
Demented 146 (39.1)
Nondemented 190 (50.9)
Number of participants per visit, N (%) 1 150 (40.2) 0.0
2 144 (38.6)
3 58 (15.5)
4 15 (4.0)
5 6 (1.6)
Time delay for MR imaging, median [IQR] 552.0 [0.0, 873.0] 0.0
Sex, N (%) women 213 (57.1) 0.0
men 160 (42.9)
Handedness, N (%) right 373 (100.0) 0.0
Age (years), M (SD) 77.0 (7.6) 0.0
Education (years), M (SD) 14.6 (2.9) 0.0
Socioeconomic status, N (%) 1 88 (24.9) 5.1
2 103 (29.1)
3 82 (23.2)
4 74 (20.9)
5 7 (2.0)
Mini mental state examination, median [IQR] 29.0 [27.0, 30.0] 0.5
Clinical Dementia Rating, N (%) no dementia 206 (55.2) 0.0
very mild AD 123 (33.0)
mild AD 41 (11.0)
moderate AD 3 (0.8)
Estimated total intracranial volume (cm3), M (SD) 1488.1 (176.1) 0.0
Normalized whole-brain volume, M (SD) 0.7 (0.0) 0.0
Atlas scaling factor, M (SD) 1.2 (0.1) 0.0
# 9. Save table for manuscript
write.csv(tab3, file = "Table_1.csv", row.names = FALSE) 

rm(tab1); rm(tab2); rm(tab3); rm(table1df); rm(catVars); rm(myVars); rm(nonnormaldist)

Data Analysis: MMSE Trajectories over Time

df %>% 
  dplyr::select(Subject.ID, MR.Delay, MMSE, CDR) %>%
  dplyr::filter(complete.cases(.)) %>%
  ggplot(aes(x = MR.Delay, y = MMSE, color = CDR, group = Subject.ID)) +
  geom_line()

Data Analysis: Regression Analysis

Simple Linear Regression

# To be able to create a multitude of plots with different y-axis values I created a function for the plots and used a loop (lapply) to create them. 
# We have to add numeric sex and CDR variables to get regression lines in ggplot
df <- df %>%
  dplyr::mutate(Sex.num = ifelse(Sex == "M", 1, 
                                 ifelse(Sex == "F", 2,
                                        NA)))

plotAgeAssociation <- function(xaxis) {
  # create complete cases dataset
  df.plot <- df %>%
    dplyr::select(MMSE, as.name(xaxis), CDR) %>%
    filter(complete.cases(.))
  
  print(ggplot(data=df.plot, aes_(x=as.name(xaxis), y=~MMSE)) + 
          geom_point(aes_(color=~CDR)) +
          geom_smooth(method = lm) +
          theme(legend.position = "bottom"))
  regr <- lm(as.formula(paste0("MMSE ~ ", xaxis)), 
             data = df.plot %>%
               # We recode CDR back as an unordered factor to have more interpretable results.
               dplyr::mutate(CDR = factor(CDR, ordered=FALSE)))
  result <- broom::tidy(regr, conf.int = TRUE, conf.level = 0.95)
  result$N <- regr$residuals %>% length()
  result$r.squared <- summary(regr)$r.squared
  result <- result %>%
    dplyr::mutate(across(where(is.numeric), round, 3))
  print(kable(result))
}

# run functions for education and MMSE on the y-axis
invisible(lapply(c("Age", "Sex.num", "Education", "CDR.num"), plotAgeAssociation))



|term        | estimate| std.error| statistic| p.value| conf.low| conf.high|   N| r.squared|
|:-----------|--------:|---------:|---------:|-------:|--------:|---------:|---:|---------:|
|(Intercept) |   25.283|     1.934|    13.072|   0.000|   21.480|    29.086| 371|     0.003|
|Age         |    0.027|     0.025|     1.070|   0.285|   -0.022|     0.076| 371|     0.003|



|term        | estimate| std.error| statistic| p.value| conf.low| conf.high|   N| r.squared|
|:-----------|--------:|---------:|---------:|-------:|--------:|---------:|---:|---------:|
|(Intercept) |   25.398|     0.627|    40.500|   0.000|   24.165|    26.631| 371|     0.028|
|Sex.num     |    1.239|     0.381|     3.251|   0.001|    0.490|     1.989| 371|     0.028|



|term        | estimate| std.error| statistic| p.value| conf.low| conf.high|   N| r.squared|
|:-----------|--------:|---------:|---------:|-------:|--------:|---------:|---:|---------:|
|(Intercept) |   23.698|     0.973|    24.350|       0|   21.784|    25.611| 371|     0.038|
|Education   |    0.249|     0.065|     3.817|       0|    0.121|     0.378| 371|     0.038|



|term        | estimate| std.error| statistic| p.value| conf.low| conf.high|   N| r.squared|
|:-----------|--------:|---------:|---------:|-------:|--------:|---------:|---:|---------:|
|(Intercept) |   29.294|     0.176|   166.473|       0|   28.948|    29.640| 371|     0.471|
|CDR.num     |   -6.799|     0.375|   -18.137|       0|   -7.536|    -6.062| 371|     0.471|
rm(plotAgeAssociation)

Then we can do the same again, but this time stratify by the dementia status (not including CDR this time):

par(mar = c(4, 4, 1, .1))
plotAgeAssociation <- function(xaxis) {
  # create complete cases dataset
  df.plot <- df %>%
    dplyr::select(MMSE, as.name(xaxis), CDR) %>%
    dplyr::filter(complete.cases(.))
  
  print(ggplot(data=df.plot, aes_(x=as.name(xaxis), y=~MMSE, color=~CDR)) + 
          geom_point() +
          geom_smooth(method = lm, se = FALSE) +
          theme(legend.position = "bottom"))
}

# run functions for education and MMSE on the y-axis
invisible(lapply(c("Age", "Sex.num", "Education"), plotAgeAssociation))

rm(plotAgeAssociation)

Multiple Linear Regression

We create a regression model with MMSE as the outcome and age, sex, education and CDR as the independents. Use the no dementia group as reference group for CDR.

model <- lm(MMSE ~ Age + Sex + Education + relevel(factor(CDR, ordered=FALSE), ref = "no dementia"), 
            data = df_baseline)

Assumption check

kable(moments::skewness(model$residuals) %>%
        data.frame() %>%
        dplyr::rename("Skewness of residuals" = "."), 
      caption = "Skewness of model residuals")
Skewness of model residuals
Skewness of residuals
-0.6230355
kable(car::vif(model), 
      caption = "Variance-inflation factors to assess multicolinearity of model independents")
Variance-inflation factors to assess multicolinearity of model independents
GVIF Df GVIF^(1/(2*Df))
Age 1.013001 1 1.006480
Sex 1.096688 1 1.047228
Education 1.109618 1 1.053384
relevel(factor(CDR, ordered = FALSE), ref = “no dementia”) 1.180960 2 1.042459

Plots to check other regression model assumptions

par(mfrow = c(2,2)) # Change the panel layout to 2 x 2
plot(model)

Result table

# Creating data frame including all results
result<- broom::tidy(model, conf.int = TRUE, conf.level = 0.95)
result$N <- model$residuals %>% length()
result$r.squared <- summary(model)$r.squared

# Modify outcome for export
result %>%
  dplyr::rename(independent = term) %>%
  dplyr::mutate(
    independent = dplyr::recode(independent,
                                SexM = 'Sex (ref. women)',
                                Education = 'Education (in years)',
                                'relevel(factor(CDR, ordered = FALSE), ref = \"no dementia\")mild AD' = 'CDR (ref. no dementia) mild AD',
                                'relevel(factor(CDR, ordered = FALSE), ref = \"no dementia\")very mild AD' = 'CDR (ref. no dementia) very mild AD')) %>%
  dplyr::mutate(across(where(is.numeric), round, 3)) %>%
  kable(., caption = "Outcome: MMSE")
Outcome: MMSE
independent estimate std.error statistic p.value conf.low conf.high N r.squared
(Intercept) 29.739 2.113 14.072 0.000 25.562 33.916 150 0.493
Age -0.019 0.024 -0.825 0.411 -0.066 0.027 150 0.493
Sex (ref. women) -0.564 0.374 -1.511 0.133 -1.303 0.174 150 0.493
Education (in years) 0.073 0.065 1.137 0.257 -0.054 0.201 150 0.493
CDR (ref. no dementia) very mild AD -2.960 0.411 -7.209 0.000 -3.772 -2.148 150 0.493
CDR (ref. no dementia) mild AD -6.038 0.649 -9.305 0.000 -7.321 -4.756 150 0.493

References R session information

citation()

To cite R in publications use:

  R Core Team (2020). R: A language and environment for statistical
  computing. R Foundation for Statistical Computing, Vienna, Austria.
  URL https://www.R-project.org/.

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {R: A Language and Environment for Statistical Computing},
    author = {{R Core Team}},
    organization = {R Foundation for Statistical Computing},
    address = {Vienna, Austria},
    year = {2020},
    url = {https://www.R-project.org/},
  }

We have invested a lot of time and effort in creating R, please cite it
when using it for data analysis. See also 'citation("pkgname")' for
citing R packages.
sessionInfo() 
R version 3.6.3 (2020-02-29)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] de_DE.UTF-8/de_DE.UTF-8/de_DE.UTF-8/C/de_DE.UTF-8/de_DE.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] car_3.0-10        carData_3.0-4     moments_0.14      rmarkdown_2.6    
 [5] markdown_1.1      knitr_1.30        data.table_1.13.6 forcats_0.5.1    
 [9] stringr_1.4.0     dplyr_1.0.7       purrr_0.3.4       readr_1.4.0      
[13] tidyr_1.1.3       tibble_3.1.2      tidyverse_1.3.1   ggpubr_0.4.0     
[17] ggplot2_3.3.3     patchwork_1.1.1  

loaded via a namespace (and not attached):
 [1] httr_1.4.2       jsonlite_1.7.2   splines_3.6.3    modelr_0.1.8    
 [5] assertthat_0.2.1 highr_0.8        cellranger_1.1.0 yaml_2.2.1      
 [9] pillar_1.6.1     backports_1.2.1  lattice_0.20-41  glue_1.4.2      
[13] digest_0.6.27    ggsignif_0.6.0   rvest_1.0.0      colorspace_2.0-0
[17] htmltools_0.5.1  Matrix_1.2-18    survey_4.0       pkgconfig_2.0.3 
[21] labelled_2.7.0   broom_0.7.7      haven_2.3.1      scales_1.1.1    
[25] openxlsx_4.2.3   rio_0.5.16       mgcv_1.8-31      generics_0.1.0  
[29] farver_2.0.3     ellipsis_0.3.2   withr_2.4.0      cli_2.5.0       
[33] survival_3.1-8   magrittr_2.0.1   crayon_1.4.1     readxl_1.3.1    
[37] evaluate_0.14    fs_1.5.0         fansi_0.4.2      nlme_3.1-144    
[41] class_7.3-15     rstatix_0.6.0    xml2_1.3.2       foreign_0.8-75  
[45] tableone_0.12.0  tools_3.6.3      mitools_2.4      hms_1.0.0       
[49] lifecycle_1.0.0  munsell_0.5.0    reprex_2.0.0     zip_2.1.1       
[53] e1071_1.7-4      compiler_3.6.3   rlang_0.4.10     grid_3.6.3      
[57] rstudioapi_0.13  labeling_0.4.2   gtable_0.3.0     abind_1.4-5     
[61] DBI_1.1.1        curl_4.3         R6_2.5.0         zoo_1.8-8       
[65] lubridate_1.7.10 utf8_1.1.4       stringi_1.5.3    Rcpp_1.0.6      
[69] vctrs_0.3.8      dbplyr_2.1.1     tidyselect_1.1.0 xfun_0.20       
lapply(list.of.packages, citation)
[[1]]

To cite package 'patchwork' in publications use:

  Thomas Lin Pedersen (2020). patchwork: The Composer of Plots. R
  package version 1.1.1. https://CRAN.R-project.org/package=patchwork

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {patchwork: The Composer of Plots},
    author = {Thomas Lin Pedersen},
    year = {2020},
    note = {R package version 1.1.1},
    url = {https://CRAN.R-project.org/package=patchwork},
  }


[[2]]

To cite package 'ggpubr' in publications use:

  Alboukadel Kassambara (2020). ggpubr: 'ggplot2' Based Publication
  Ready Plots. R package version 0.4.0.
  https://CRAN.R-project.org/package=ggpubr

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {ggpubr: 'ggplot2' Based Publication Ready Plots},
    author = {Alboukadel Kassambara},
    year = {2020},
    note = {R package version 0.4.0},
    url = {https://CRAN.R-project.org/package=ggpubr},
  }


[[3]]

  Wickham et al., (2019). Welcome to the tidyverse. Journal of Open
  Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686

A BibTeX entry for LaTeX users is

  @Article{,
    title = {Welcome to the {tidyverse}},
    author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
    year = {2019},
    journal = {Journal of Open Source Software},
    volume = {4},
    number = {43},
    pages = {1686},
    doi = {10.21105/joss.01686},
  }


[[4]]

To cite package 'data.table' in publications use:

  Matt Dowle and Arun Srinivasan (2020). data.table: Extension of
  `data.frame`. R package version 1.13.6.
  https://CRAN.R-project.org/package=data.table

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {data.table: Extension of `data.frame`},
    author = {Matt Dowle and Arun Srinivasan},
    year = {2020},
    note = {R package version 1.13.6},
    url = {https://CRAN.R-project.org/package=data.table},
  }


[[5]]

To cite the 'knitr' package in publications use:

  Yihui Xie (2020). knitr: A General-Purpose Package for Dynamic Report
  Generation in R. R package version 1.30.

  Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition.
  Chapman and Hall/CRC. ISBN 978-1498716963

  Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible
  Research in R. In Victoria Stodden, Friedrich Leisch and Roger D.
  Peng, editors, Implementing Reproducible Computational Research.
  Chapman and Hall/CRC. ISBN 978-1466561595

To see these entries in BibTeX format, use 'print(<citation>,
bibtex=TRUE)', 'toBibtex(.)', or set
'options(citation.bibtex.max=999)'.


[[6]]

To cite package 'markdown' in publications use:

  JJ Allaire, Jeffrey Horner, Yihui Xie, Vicent Marti and Natacha Porte
  (2019). markdown: Render Markdown with the C Library 'Sundown'. R
  package version 1.1. https://CRAN.R-project.org/package=markdown

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {markdown: Render Markdown with the C Library 'Sundown'},
    author = {JJ Allaire and Jeffrey Horner and Yihui Xie and Vicent Marti and Natacha Porte},
    year = {2019},
    note = {R package version 1.1},
    url = {https://CRAN.R-project.org/package=markdown},
  }


[[7]]

To cite the 'rmarkdown' package in publications, please use:

  JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi
  and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and
  Winston Chang and Richard Iannone (2020). rmarkdown: Dynamic
  Documents for R. R package version 2.6. URL
  https://rmarkdown.rstudio.com.

  Yihui Xie and J.J. Allaire and Garrett Grolemund (2018). R Markdown:
  The Definitive Guide. Chapman and Hall/CRC. ISBN 9781138359338. URL
  https://bookdown.org/yihui/rmarkdown.

  Yihui Xie and Christophe Dervieux and Emily Riederer (2020). R
  Markdown Cookbook. Chapman and Hall/CRC. ISBN 9780367563837. URL
  https://bookdown.org/yihui/rmarkdown-cookbook.

To see these entries in BibTeX format, use 'print(<citation>,
bibtex=TRUE)', 'toBibtex(.)', or set
'options(citation.bibtex.max=999)'.


[[8]]

To cite package 'dplyr' in publications use:

  Hadley Wickham, Romain François, Lionel Henry and Kirill Müller
  (2021). dplyr: A Grammar of Data Manipulation. R package version
  1.0.7. https://CRAN.R-project.org/package=dplyr

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {dplyr: A Grammar of Data Manipulation},
    author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller},
    year = {2021},
    note = {R package version 1.0.7},
    url = {https://CRAN.R-project.org/package=dplyr},
  }


[[9]]

To cite ggplot2 in publications, please use:

  H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
  Springer-Verlag New York, 2016.

A BibTeX entry for LaTeX users is

  @Book{,
    author = {Hadley Wickham},
    title = {ggplot2: Elegant Graphics for Data Analysis},
    publisher = {Springer-Verlag New York},
    year = {2016},
    isbn = {978-3-319-24277-4},
    url = {https://ggplot2.tidyverse.org},
  }


[[10]]

To cite package 'moments' in publications use:

  Lukasz Komsta and Frederick Novomestky (2015). moments: Moments,
  cumulants, skewness, kurtosis and related tests. R package version
  0.14. https://CRAN.R-project.org/package=moments

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {moments: Moments, cumulants, skewness, kurtosis and related tests},
    author = {Lukasz Komsta and Frederick Novomestky},
    year = {2015},
    note = {R package version 0.14},
    url = {https://CRAN.R-project.org/package=moments},
  }

ATTENTION: This citation information has been auto-generated from the
package DESCRIPTION file and may need manual editing, see
'help("citation")'.


[[11]]

To cite the car package in publications use:

  John Fox and Sanford Weisberg (2019). An {R} Companion to Applied
  Regression, Third Edition. Thousand Oaks CA: Sage. URL:
  https://socialsciences.mcmaster.ca/jfox/Books/Companion/

A BibTeX entry for LaTeX users is

  @Book{,
    title = {An {R} Companion to Applied Regression},
    edition = {Third},
    author = {John Fox and Sanford Weisberg},
    year = {2019},
    publisher = {Sage},
    address = {Thousand Oaks {CA}},
    url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
  }
#RStudio.Version()[-2] #both functions are not working in 
#rstudioapi::versionInfo()[-2] 
## both functions for the R Studio version are not working in R markdown. Run this code in the console and enter information manually. 

To cite RStudio in publications use: RStudio Team (2019). RStudio: Integrated Development for R. RStudio, Inc., Boston, MA URL http://www.rstudio.com/.version Version: 1.2.5033 Release name: “Orange Blossom”