diff --git a/collect.py b/collect.py index 0f70226..256902c 100644 --- a/collect.py +++ b/collect.py @@ -211,13 +211,17 @@ print(timeEndScrape.strftime("%Y-%m-%d_%H-%M-%S")) os.chdir(path_to_tweetdfs) # At first check, whether all slices are present. tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv") -for handle -for tweetfile in tweetfiles: - +AllFilesList = [] +for handle in accounts: + for suffix in time_slices: + AllFilesList.append("Tweets-{handle}{suffix}.csv") +with open(f"{logfile}missing-{timeStartScrape}") as fout: + for file in AllFilesList: + if file not in tweetfiles: + fout.write('Missing: {file}.\n') # if file is not in tweetfiles, print error message. # check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge if file_alltweets in tweetfiles: tweetfiles.remove(file_alltweets) - # Go through all csv files and merge them into file_alltweets with open(file_alltweets, "wb") as fout: # first file (because of the header):