init

2025-11-29 15:11:13 +01:00
commit 9b3618478f
293 changed files with 113456 additions and 0 deletions
@@ -0,0 +1,148 @@
+# =============================================================================
+# Configuraion file
+# =============================================================================
+# This file contains necessary variables that will be used in the whole process
+# and all necessary library imports.
+# =============================================================================
+
+# ---- Helper functions -------------------------------------------------------
+
+install_if_missing <- function(pkgs) {
+  to_install <- pkgs[!vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1))]
+  if (length(to_install)) {
+    message("Installing missing CRAN packages: ", paste(to_install, collapse = ", "))
+    install.packages(to_install, repos = "https://cloud.r-project.org/")
+  }
+}
+
+install_github_if_missing <- function(pkg, repo) {
+  if (!requireNamespace(pkg, quietly = TRUE)) {
+    if (!requireNamespace("remotes", quietly = TRUE)) {
+      install.packages("remotes", repos = "https://cloud.r-project.org/")
+    }
+    message("Installing GitHub package ", pkg, " from ", repo)
+    remotes::install_github(repo)
+  }
+}
+
+# =============================================================================
+# Install & Load
+# =============================================================================
+
+# Define all required packages with comments
+pkgs <- c(
+  # Data handling & plotting
+  "tidyverse",    # data manipulation, ggplot2, etc.
+  "patchwork",    # combine multiple plots
+  "ggthemes",
+  "ggpmisc",
+  "scales",
+
+  # Tables & summaries
+  "summarytools",
+  "gtsummary",
+  "gt",
+  "knitr",
+  "kableExtra",
+
+  # Misc helpers
+  "rlang",
+  "rmarkdown",
+  "qs2",        #
+  "presize",    # sample size calculations
+  "survey"
+)
+
+
+# Install any that aren't already present
+missing <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
+if (length(missing)) {
+  install.packages(missing, repos = "https://cloud.r-project.org/")
+}
+
+# Install and load CRAN packages
+install_if_missing(pkgs)
+invisible(lapply(pkgs, library, character.only = TRUE))
+
+# ---- GitHub packages --------------------------------------------------------
+
+# Serosurvey
+install_github_if_missing("serosurvey", "avallecam/serosurvey")
+library(serosurvey)
+
+# ggthemr
+install_github_if_missing("ggthemr", "Mikata-Project/ggthemr")
+library(ggthemr)
+
+# =============================================================================
+# DIRECTORY PATHS
+# =============================================================================
+
+# Base directories
+dir_data <- "data"
+dir_output <- "output"
+dir_logs <- "logs"
+dir_models <- "models"
+dir_code <- "code"
+dir_docs <- "docs"
+dir_renv <- "renv"
+dir_freeze <- "_freeze"
+dir_output_quarto <- "_output"  # Quarto's rendered output
+
+# Data subdirectories
+dir_data_meta <- file.path(dir_data, "meta")
+dir_data_crossref <- file.path(dir_data_meta, "crossref")
+dir_data_fulltext <- file.path(dir_data, "fulltext")
+dir_data_keywords <- file.path(dir_data, "keyword_dicts")
+
+# Output subdirectories
+dir_output_plots <- file.path(dir_output, "pp")  # presentation plots
+dir_output_tables <- file.path(dir_output, "tables")
+
+# =============================================================================
+# Files
+# =============================================================================
+
+# Primary data files
+file_journals <- file.path(dir_data_meta, "journals.qs2")
+file_meta_combined <- file.path(dir_data_meta, "meta_combined.qs2")
+file_meta_final <- file.path(dir_data_meta, "meta_final.qs2")
+file_sample_final <- file.path(dir_data_meta, "sample_final.qs2")
+file_sample_stat_final <- file.path(dir_data_meta, "sample_stat_final.qs2") # statistical inference classification sample
+file_sample_analysis <- file.path(dir_data, "sample_analysis.qs2") # final sample, ready for analysis
+
+# Additional data files (wasn't used yet)
+file_dicts <- file.path(dir_data, "dicts.csv")
+
+# Statistical inference sample files
+file_train_stat <- file.path(dir_data, "train_stat.csv") # statistical inference classification training sample
+file_train_stat_coded <- file.path(dir_data, "train_stat_coded.csv") # statistical inference classification hand coded training sample
+file_train_stat_downloader_meta <- file.path(dir_data, "train_stat_downloader_meta_export.csv") # statistical inference downloader metadata (containing file_path)
+
+file_train_stat_coded_hand <- file.path(dir_data, "train_stat_coded_hand.csv") # statistical inference classification hand coded
+file_train_stat_coded_hand_corrected <- file.path(dir_data, "train_stat_coded_hand_corrected.csv") # statistical inference classification hand coded
+file_train_stat_coded_hand_corrected_gpt <- file.path(dir_data, "train_stat_coded_hand_corrected.csv") # statistical inference classification hand coded
+
+# OSP Classification files
+file_sample <- file.path(dir_data, "full-sample.csv") # full sample of all years
+file_sample_statistical <- file.path(dir_data, "sample_statistical.csv") # full sample of statistical inference papers
+file_train <- file.path(dir_data, "train.csv") # training subset of statistical inference papers for osp classification
+file_train_coded_hand <- file.path(dir_data, "train_coded_hand.csv") # training subset (osp), hand-coded
+file_train_coded_hand_corrected <- file.path(dir_data, "train_coded_hand_corrected.csv") # training subset (osp), hand-coded, corrected
+file_train_coded_hand_corrected_gpt <- file.path(dir_data, "train_coded_hand_corrected_gpt.csv") # training subset (osp), corrected, hand-coded + remaining cases coded with gpt
+file_sample_downloader_meta <- file.path(dir_data, "train_downloader_meta_export.csv") # statistical inference classification hand coded
+
+# Classified data
+file_classified_stat <- file.path(dir_data, "classified_stat.csv") # statistical inference
+file_classified_oa <- file.path(dir_data, "classified_oa.csv") # open access
+file_classified_osp <- file.path(dir_data, "classified_osp.csv") # other osp's
+file_classified_osp_probs <- file.path(dir_data, "classified_osp_probs.csv") # other osp's
+
+file_sample_fully_classified <- file.path(dir_data, "full-sample_classified.csv") # combination of the above
+
+# Downloader import CSV files
+file_downloader_full <- file.path(dir_data, "download_full.csv")
+file_downloader_sample <- file.path(dir_data, "download_sample.csv")
+
+# Codebook
+file_codebook <- file.path(dir_output, "codebook.docx")