adds missing file report
This commit is contained in:
parent
1a19fd407a
commit
fb7a70cf66
12
collect.py
12
collect.py
@ -211,13 +211,17 @@ print(timeEndScrape.strftime("%Y-%m-%d_%H-%M-%S"))
|
|||||||
os.chdir(path_to_tweetdfs)
|
os.chdir(path_to_tweetdfs)
|
||||||
# At first check, whether all slices are present.
|
# At first check, whether all slices are present.
|
||||||
tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv")
|
tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv")
|
||||||
for handle
|
AllFilesList = []
|
||||||
for tweetfile in tweetfiles:
|
for handle in accounts:
|
||||||
|
for suffix in time_slices:
|
||||||
|
AllFilesList.append("Tweets-{handle}{suffix}.csv")
|
||||||
|
with open(f"{logfile}missing-{timeStartScrape}") as fout:
|
||||||
|
for file in AllFilesList:
|
||||||
|
if file not in tweetfiles:
|
||||||
|
fout.write('Missing: {file}.\n') # if file is not in tweetfiles, print error message.
|
||||||
# check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge
|
# check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge
|
||||||
if file_alltweets in tweetfiles:
|
if file_alltweets in tweetfiles:
|
||||||
tweetfiles.remove(file_alltweets)
|
tweetfiles.remove(file_alltweets)
|
||||||
|
|
||||||
# Go through all csv files and merge them into file_alltweets
|
# Go through all csv files and merge them into file_alltweets
|
||||||
with open(file_alltweets, "wb") as fout:
|
with open(file_alltweets, "wb") as fout:
|
||||||
# first file (because of the header):
|
# first file (because of the header):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user