Files
2025-11-29 15:11:13 +01:00

12 lines
1.0 KiB
CSV

"","step","before","after","dropped","logic"
"1","deduplicate",95042,55904,39138,"~!duplicated(doi)"
"2","NA published_date",55904,55904,0,"~!is.na(published_date)"
"3","drop published_date > date_from",55904,47309,8595,"~published_date > date_from"
"4","drop published_date < date_to",47309,45922,1387,"~published_date < date_to"
"5","filter_reviews_simple",45922,45557,365,"~!str_detect(title, regex(str_c(kws, collapse = ""|""), ignore_case = TRUE))"
"6","filter_corrections",45557,45144,413,"~!str_detect(title, regex(str_c(kws, collapse = ""|""), ignore_case = TRUE))"
"7","filter_front_back",45144,42320,2824,"~!str_detect(title, regex(str_c(kws, collapse = ""|""), ignore_case = TRUE))"
"8","filter_announcements",42320,42279,41,"~!str_detect(title, regex(str_c(kws, collapse = ""|""), ignore_case = TRUE))"
"9","filter_reviews_full",42279,41908,371,"~!str_detect(title, regex(str_c(kws, collapse = ""|""), ignore_case = TRUE))"
"10","filter_other",41908,40860,1048,"~!str_detect(title, regex(str_c(kws, collapse = ""|""), ignore_case = TRUE))"