From fb7a70cf668f39895a4fb6181cccb690a32f8069 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Fri, 23 Jun 2023 17:04:08 +0200 Subject: [PATCH] adds missing file report --- collect.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/collect.py b/collect.py index 0f70226..256902c 100644 --- a/collect.py +++ b/collect.py @@ -211,13 +211,17 @@ print(timeEndScrape.strftime("%Y-%m-%d_%H-%M-%S")) os.chdir(path_to_tweetdfs) # At first check, whether all slices are present. tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv") -for handle -for tweetfile in tweetfiles: - +AllFilesList = [] +for handle in accounts: + for suffix in time_slices: + AllFilesList.append("Tweets-{handle}{suffix}.csv") +with open(f"{logfile}missing-{timeStartScrape}") as fout: + for file in AllFilesList: + if file not in tweetfiles: + fout.write('Missing: {file}.\n') # if file is not in tweetfiles, print error message. # check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge if file_alltweets in tweetfiles: tweetfiles.remove(file_alltweets) - # Go through all csv files and merge them into file_alltweets with open(file_alltweets, "wb") as fout: # first file (because of the header):