diff --git a/.gitignore b/.gitignore index b866619..4a4158e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,10 @@ /index_files/ *_bak + +*.tex +*.log +*.toc +*.aux + +index.pdf \ No newline at end of file diff --git a/Makefile b/Makefile index ce32812..c6288d0 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,6 @@ QUARTO ?= quarto +PROFILE ?= default +PROFILE_FLAG := $(if $(PROFILE),--profile $(PROFILE),) # Allow overriding the list of docx outputs via env var `docx`. # Example: `make docx docx="index Supplements"` or `make docx docx=index`. @@ -18,10 +20,10 @@ docx-main: index.docx # Pattern rules for either format %.pdf: %.qmd - OUTPUT_FORMAT=pdf/tex $(QUARTO) render $< --to pdf + OUTPUT_FORMAT=pdf/tex $(QUARTO) render $< --to pdf $(PROFILE_FLAG) %.docx: %.qmd - OUTPUT_FORMAT=docx $(QUARTO) render $< --to docx + OUTPUT_FORMAT=docx $(QUARTO) render $< --to docx $(PROFILE_FLAG) clean: rm -f index.pdf Supplements.pdf index.docx Supplements.docx diff --git a/_quarto-anon.yml b/_quarto-anon.yml new file mode 100644 index 0000000..5bcf115 --- /dev/null +++ b/_quarto-anon.yml @@ -0,0 +1,8 @@ +project: + output-dir: _output/anon + +authors: + - name: Anonymous + corresponding: false + +affiliations: [] diff --git a/_quarto-default -preprint.yml b/_quarto-default -preprint.yml new file mode 100644 index 0000000..dbef835 --- /dev/null +++ b/_quarto-default -preprint.yml @@ -0,0 +1,38 @@ +project: + output-dir: _output/normal + +authors: + - name: Michael Beck + affiliations: + - ref: die + corresponding: true + email: michaeljbeck@proton.me + orcid: 0009-0005-4622-4717 + +affiliations: + - id: die + name: German Institute for Adult Education - Leibniz Centre for Lifelong Learning (DIE), Bonn, Germany + +include-before: | + \captionsetup[figure]{format=plain, labelsep=period-newline, justification=centering, margin=2cm, font=footnotesize, singlelinecheck=false} + \captionsetup[table]{format=plain, labelsep=period-newline, justification=centering, margin=2cm, font=footnotesize, singlelinecheck=false} + + \begin{titlepage} + \thispagestyle{empty} + \begin{center} + \Large{\textbf{Michael Beck\footnote{German Institute for Adult Education - Leibniz Centre for Lifelong Learning (DIE), Bonn, Germany; ORCID: \href{https://orcid.org/0009-0005-4622-4717}{0009-0005-4622-4717}}}} \\ + \vspace*{1.5cm} + \end{center} + + \begin{abstract} + This pilot study addresses the current lack of systematic, large-scale evidence on Open Science Practices (OSPs) adoption in criminology and legal psychology. A scalable, machine-learning-based text classification pipeline is introduced to map the prevalence of Open Access (OA), Open Data (OD), Open Materials (OM), and Preregistration (PR). The analysis is based on publication metadata and a year-stratified sample of full texts from the top 100 journals in Criminology & Penology, Law, and Psychology (2013-2023). After identifying articles containing statistical inference (SI) via a high-performing classifier, the author utilized GPT-assisted coding and supervised learning to train specific classifiers for OD, OM, and PR. OA was classified using publicly available metadata. Among 1,763 SI articles with usable full text, design-based estimates reveal a significant disparity in OSP adoption. OA is relatively common (40.9%, 95% CI: 38.8-43.1) and has steadily increased from approximately 20% in 2013 to 50% in 2023. By sharp contrast, trends for OD, OM, and PR cannot be reliably quantified. Extreme class imbalance and the minimal number of positive cases indicate a very low underlying true prevalence for these practices in the assessed field. Methodologically, the study confirms that GPT-assisted coding supports accurate SI detection, but robust prevalence estimation for extremely low-frequency OSPs remains challenging for downstream classifiers. Overall, this project establishes a transparent and reproducible pipeline and provides critical baseline estimates for future, larger-scale assessments of research transparency in crime-related fields. + \end{abstract} + + \vspace{1cm} + \noindent\textbf{Keywords:} Open Science Practices; Open Access; Open Data; Open Materials; Preregistration; Text Mining; Machine Learning Classification; AI-assisted Coding; Replication + + \vspace{0.75cm} + \noindent\textbf{Date:} \today + \end{titlepage} + \newpage + \thispagestyle{empty} diff --git a/_quarto-default.yml b/_quarto-default.yml new file mode 100644 index 0000000..a178303 --- /dev/null +++ b/_quarto-default.yml @@ -0,0 +1,14 @@ +project: + output-dir: _output/normal + +authors: + - name: Michael Beck + affiliations: + - ref: die + corresponding: true + email: michaeljbeck@proton.me + orcid: 0009-0005-4622-4717 + +affiliations: + - id: die + name: German Institute for Adult Education - Leibniz Centre for Lifelong Learning (DIE), Bonn, Germany diff --git a/_quarto.yml b/_quarto.yml index 2c148ea..7c9c0bb 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -1,27 +1,26 @@ project: type: default - output-dir: _output lang: en-US -authors: - - name: Michael Beck - affiliations: - - ref: die - corresponding: true - email: michaeljbeck@proton.me - orcid: 0009-0005-4622-4717 +profile: + default: default + group: + - [default, anon] -affiliations: - - id: die - name: German Institute for Adult Education - Leibniz Centre for Lifelong Learning (DIE), Bonn, Germany abstract: | - This pilot study addresses the current lack of systematic, large-scale evidence on Open Science Practices (OSPs) adoption in criminology and legal psychology. A scalable, machine-learning-based text classification pipeline is introduced to map the prevalence of Open Access (OA), Open Data (OD), Open Materials (OM), and Preregistration (PR). The analysis is based on publication metadata and a year-stratified sample of full texts from the top 100 journals in Criminology & Penology, Law, and Psychology (2013-2023). After identifying articles containing statistical inference (SI) via a high-performing classifier, I utilized GPT-assisted coding and supervised learning to train specific classifiers for OD, OM, and PR. OA was classified using publicly available metadata. Among 1,763 SI articles with usable full text, design-based estimates reveal a significant disparity in OSP adoption. OA is relatively common (40.9%, 95% CI: 38.8-43.1) and has steadily increased from approximately 20% in 2013 to 50% in 2023. By sharp contrast, trends for OD, OM, and PR cannot be reliably quantified. - | Extreme class imbalance and the minimal number of positive cases indicate a very low underlying true prevalence for these practices in the assessed field. Methodologically, the study confirms that GPT-assisted coding supports accurate SI detection, but robust prevalence estimation for extremely low-frequency OSPs remains challenging for downstream classifiers. Overall, this project establishes a transparent and reproducible pipeline and provides critical baseline estimates for future, larger-scale assessments of research transparency in crime-related fields. + This pilot study addresses the current lack of systematic, large-scale evidence on Open Science Practices (OSPs) adoption in criminology and legal psychology. A scalable, machine-learning-based text classification pipeline is introduced to map the prevalence of Open Access (OA), Open Data (OD), Open Materials (OM), and Preregistration (PR). The analysis is based on publication metadata and a year-stratified sample of full texts from the top 100 journals in Criminology & Penology, Law, and Psychology (2013-2023). After identifying articles containing statistical inference (SI) via a high-performing classifier, the author utilized GPT-assisted coding and supervised learning to train specific classifiers for OD, OM, and PR. OA was classified using publicly available metadata. Among 1,763 SI articles with usable full text, design-based estimates reveal a significant disparity in OSP adoption. OA is relatively common (40.9%, 95% CI: 38.8-43.1) and has steadily increased from approximately 20% in 2013 to 50% in 2023. By sharp contrast, trends for OD, OM, and PR cannot be reliably quantified. Extreme class imbalance and the minimal number of positive cases indicate a very low underlying true prevalence for these practices in the assessed field. Methodologically, the study confirms that GPT-assisted coding supports accurate SI detection, but robust prevalence estimation for extremely low-frequency OSPs remains challenging for downstream classifiers. Overall, this project establishes a transparent and reproducible pipeline and provides critical baseline estimates for future, larger-scale assessments of research transparency in crime-related fields. keywords: - - Metaphysics - - String Theory -date: 2025-12-14 + - Open Science Practices + - Open Access + - Open Data + - Open Materials + - Preregistration + - Text Mining + - Machine Learning Classification + - AI-assisted Coding + - Replication +date: 2025-12-15 format: aog-article-pdf: @@ -49,6 +48,7 @@ format: lot: false lof: false reference-doc: custom-reference-doc.docx + fig-dpi: 300 filters: - authors-block @@ -79,28 +79,4 @@ header-includes: | \DeclareCaptionLabelSeparator{period-newline}{.\par} \usepackage{setspace} \usepackage{float} % enables [H] - \usepackage[section]{placeins} % <— add [section] - -include-before: | - \captionsetup[figure]{format=plain, labelsep=period-newline, justification=centering, margin=2cm, font=footnotesize, singlelinecheck=false} - \captionsetup[table]{format=plain, labelsep=period-newline, justification=centering, margin=2cm, font=footnotesize, singlelinecheck=false} - - \begin{center} - \Large{\textbf{Michael Beck}} \\ - \vspace*{2cm} - Masterarbeit \\ - \vspace*{2cm} - Themensteller: \\ - \textbf{Dr. Alexander Trinidad} \\ - \vspace*{5cm} - Vorgelegt in der Masterprüfung im Studiengang \\ - \vspace*{0.5cm} - \textbf{Master of Science\\Sociology and Social Research} \\ - \vspace*{0.5cm} - der Wirtschafts- und Sozialwissenschaftlichen Fakultät der Universität zu Köln \\ - \vspace*{4cm} - Köln, 09.09.2025 - \end{center} - \thispagestyle{empty} - \newpage - \thispagestyle{empty} + \usepackage[section]{placeins} % <- add [section] diff --git a/deps.R b/deps.R index 41d7549..be15c57 100644 --- a/deps.R +++ b/deps.R @@ -141,7 +141,7 @@ dir_data_fulltext <- file.path(dir_data, "fulltext") dir_data_keywords <- file.path(dir_data, "keyword_dicts") # Output subdirectories -dir_output_plots <- file.path(dir_output, "pp") # presentation plots +dir_output_plots <- file.path(dir_output, "plots") dir_output_tables <- file.path(dir_output, "tables") # =============================================================================