init
This commit is contained in:
@@ -0,0 +1,148 @@
|
||||
# =============================================================================
|
||||
# Configuraion file
|
||||
# =============================================================================
|
||||
# This file contains necessary variables that will be used in the whole process
|
||||
# and all necessary library imports.
|
||||
# =============================================================================
|
||||
|
||||
# ---- Helper functions -------------------------------------------------------
|
||||
|
||||
install_if_missing <- function(pkgs) {
|
||||
to_install <- pkgs[!vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1))]
|
||||
if (length(to_install)) {
|
||||
message("Installing missing CRAN packages: ", paste(to_install, collapse = ", "))
|
||||
install.packages(to_install, repos = "https://cloud.r-project.org/")
|
||||
}
|
||||
}
|
||||
|
||||
install_github_if_missing <- function(pkg, repo) {
|
||||
if (!requireNamespace(pkg, quietly = TRUE)) {
|
||||
if (!requireNamespace("remotes", quietly = TRUE)) {
|
||||
install.packages("remotes", repos = "https://cloud.r-project.org/")
|
||||
}
|
||||
message("Installing GitHub package ", pkg, " from ", repo)
|
||||
remotes::install_github(repo)
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Install & Load
|
||||
# =============================================================================
|
||||
|
||||
# Define all required packages with comments
|
||||
pkgs <- c(
|
||||
# Data handling & plotting
|
||||
"tidyverse", # data manipulation, ggplot2, etc.
|
||||
"patchwork", # combine multiple plots
|
||||
"ggthemes",
|
||||
"ggpmisc",
|
||||
"scales",
|
||||
|
||||
# Tables & summaries
|
||||
"summarytools",
|
||||
"gtsummary",
|
||||
"gt",
|
||||
"knitr",
|
||||
"kableExtra",
|
||||
|
||||
# Misc helpers
|
||||
"rlang",
|
||||
"rmarkdown",
|
||||
"qs2", #
|
||||
"presize", # sample size calculations
|
||||
"survey"
|
||||
)
|
||||
|
||||
|
||||
# Install any that aren't already present
|
||||
missing <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
|
||||
if (length(missing)) {
|
||||
install.packages(missing, repos = "https://cloud.r-project.org/")
|
||||
}
|
||||
|
||||
# Install and load CRAN packages
|
||||
install_if_missing(pkgs)
|
||||
invisible(lapply(pkgs, library, character.only = TRUE))
|
||||
|
||||
# ---- GitHub packages --------------------------------------------------------
|
||||
|
||||
# Serosurvey
|
||||
install_github_if_missing("serosurvey", "avallecam/serosurvey")
|
||||
library(serosurvey)
|
||||
|
||||
# ggthemr
|
||||
install_github_if_missing("ggthemr", "Mikata-Project/ggthemr")
|
||||
library(ggthemr)
|
||||
|
||||
# =============================================================================
|
||||
# DIRECTORY PATHS
|
||||
# =============================================================================
|
||||
|
||||
# Base directories
|
||||
dir_data <- "data"
|
||||
dir_output <- "output"
|
||||
dir_logs <- "logs"
|
||||
dir_models <- "models"
|
||||
dir_code <- "code"
|
||||
dir_docs <- "docs"
|
||||
dir_renv <- "renv"
|
||||
dir_freeze <- "_freeze"
|
||||
dir_output_quarto <- "_output" # Quarto's rendered output
|
||||
|
||||
# Data subdirectories
|
||||
dir_data_meta <- file.path(dir_data, "meta")
|
||||
dir_data_crossref <- file.path(dir_data_meta, "crossref")
|
||||
dir_data_fulltext <- file.path(dir_data, "fulltext")
|
||||
dir_data_keywords <- file.path(dir_data, "keyword_dicts")
|
||||
|
||||
# Output subdirectories
|
||||
dir_output_plots <- file.path(dir_output, "pp") # presentation plots
|
||||
dir_output_tables <- file.path(dir_output, "tables")
|
||||
|
||||
# =============================================================================
|
||||
# Files
|
||||
# =============================================================================
|
||||
|
||||
# Primary data files
|
||||
file_journals <- file.path(dir_data_meta, "journals.qs2")
|
||||
file_meta_combined <- file.path(dir_data_meta, "meta_combined.qs2")
|
||||
file_meta_final <- file.path(dir_data_meta, "meta_final.qs2")
|
||||
file_sample_final <- file.path(dir_data_meta, "sample_final.qs2")
|
||||
file_sample_stat_final <- file.path(dir_data_meta, "sample_stat_final.qs2") # statistical inference classification sample
|
||||
file_sample_analysis <- file.path(dir_data, "sample_analysis.qs2") # final sample, ready for analysis
|
||||
|
||||
# Additional data files (wasn't used yet)
|
||||
file_dicts <- file.path(dir_data, "dicts.csv")
|
||||
|
||||
# Statistical inference sample files
|
||||
file_train_stat <- file.path(dir_data, "train_stat.csv") # statistical inference classification training sample
|
||||
file_train_stat_coded <- file.path(dir_data, "train_stat_coded.csv") # statistical inference classification hand coded training sample
|
||||
file_train_stat_downloader_meta <- file.path(dir_data, "train_stat_downloader_meta_export.csv") # statistical inference downloader metadata (containing file_path)
|
||||
|
||||
file_train_stat_coded_hand <- file.path(dir_data, "train_stat_coded_hand.csv") # statistical inference classification hand coded
|
||||
file_train_stat_coded_hand_corrected <- file.path(dir_data, "train_stat_coded_hand_corrected.csv") # statistical inference classification hand coded
|
||||
file_train_stat_coded_hand_corrected_gpt <- file.path(dir_data, "train_stat_coded_hand_corrected.csv") # statistical inference classification hand coded
|
||||
|
||||
# OSP Classification files
|
||||
file_sample <- file.path(dir_data, "full-sample.csv") # full sample of all years
|
||||
file_sample_statistical <- file.path(dir_data, "sample_statistical.csv") # full sample of statistical inference papers
|
||||
file_train <- file.path(dir_data, "train.csv") # training subset of statistical inference papers for osp classification
|
||||
file_train_coded_hand <- file.path(dir_data, "train_coded_hand.csv") # training subset (osp), hand-coded
|
||||
file_train_coded_hand_corrected <- file.path(dir_data, "train_coded_hand_corrected.csv") # training subset (osp), hand-coded, corrected
|
||||
file_train_coded_hand_corrected_gpt <- file.path(dir_data, "train_coded_hand_corrected_gpt.csv") # training subset (osp), corrected, hand-coded + remaining cases coded with gpt
|
||||
file_sample_downloader_meta <- file.path(dir_data, "train_downloader_meta_export.csv") # statistical inference classification hand coded
|
||||
|
||||
# Classified data
|
||||
file_classified_stat <- file.path(dir_data, "classified_stat.csv") # statistical inference
|
||||
file_classified_oa <- file.path(dir_data, "classified_oa.csv") # open access
|
||||
file_classified_osp <- file.path(dir_data, "classified_osp.csv") # other osp's
|
||||
file_classified_osp_probs <- file.path(dir_data, "classified_osp_probs.csv") # other osp's
|
||||
|
||||
file_sample_fully_classified <- file.path(dir_data, "full-sample_classified.csv") # combination of the above
|
||||
|
||||
# Downloader import CSV files
|
||||
file_downloader_full <- file.path(dir_data, "download_full.csv")
|
||||
file_downloader_sample <- file.path(dir_data, "download_sample.csv")
|
||||
|
||||
# Codebook
|
||||
file_codebook <- file.path(dir_output, "codebook.docx")
|
||||
Reference in New Issue
Block a user