adds missing file report

This commit is contained in:
Michael Beck 2023-06-23 17:04:08 +02:00
parent 1a19fd407a
commit fb7a70cf66

View File

@ -211,13 +211,17 @@ print(timeEndScrape.strftime("%Y-%m-%d_%H-%M-%S"))
os.chdir(path_to_tweetdfs) os.chdir(path_to_tweetdfs)
# At first check, whether all slices are present. # At first check, whether all slices are present.
tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv") tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv")
for handle AllFilesList = []
for tweetfile in tweetfiles: for handle in accounts:
for suffix in time_slices:
AllFilesList.append("Tweets-{handle}{suffix}.csv")
with open(f"{logfile}missing-{timeStartScrape}") as fout:
for file in AllFilesList:
if file not in tweetfiles:
fout.write('Missing: {file}.\n') # if file is not in tweetfiles, print error message.
# check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge # check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge
if file_alltweets in tweetfiles: if file_alltweets in tweetfiles:
tweetfiles.remove(file_alltweets) tweetfiles.remove(file_alltweets)
# Go through all csv files and merge them into file_alltweets # Go through all csv files and merge them into file_alltweets
with open(file_alltweets, "wb") as fout: with open(file_alltweets, "wb") as fout:
# first file (because of the header): # first file (because of the header):