adds missing file report
This commit is contained in:
parent
1a19fd407a
commit
fb7a70cf66
12
collect.py
12
collect.py
@ -211,13 +211,17 @@ print(timeEndScrape.strftime("%Y-%m-%d_%H-%M-%S"))
|
||||
os.chdir(path_to_tweetdfs)
|
||||
# At first check, whether all slices are present.
|
||||
tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv")
|
||||
for handle
|
||||
for tweetfile in tweetfiles:
|
||||
|
||||
AllFilesList = []
|
||||
for handle in accounts:
|
||||
for suffix in time_slices:
|
||||
AllFilesList.append("Tweets-{handle}{suffix}.csv")
|
||||
with open(f"{logfile}missing-{timeStartScrape}") as fout:
|
||||
for file in AllFilesList:
|
||||
if file not in tweetfiles:
|
||||
fout.write('Missing: {file}.\n') # if file is not in tweetfiles, print error message.
|
||||
# check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge
|
||||
if file_alltweets in tweetfiles:
|
||||
tweetfiles.remove(file_alltweets)
|
||||
|
||||
# Go through all csv files and merge them into file_alltweets
|
||||
with open(file_alltweets, "wb") as fout:
|
||||
# first file (because of the header):
|
||||
|
Loading…
x
Reference in New Issue
Block a user