adds retry loop mechanism for api limit
This commit is contained in:
parent
81db25a8b8
commit
2e70d960a5
34
collect.py
34
collect.py
@ -115,7 +115,12 @@ for handle in accounts:
|
|||||||
msg = f'trying to fetch tweets for {handle}{suffix}'
|
msg = f'trying to fetch tweets for {handle}{suffix}'
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
# Fetch tweets using tweepy Twitter API v2 pagination
|
# Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism
|
||||||
|
max_attempts = 3 # maximum number of attempts to fetch tweets for a slice
|
||||||
|
attempt = 1
|
||||||
|
|
||||||
|
while attempt <= max_attempts:
|
||||||
|
try:
|
||||||
tweets = tweepy.Paginator(client.search_all_tweets,
|
tweets = tweepy.Paginator(client.search_all_tweets,
|
||||||
query=query,
|
query=query,
|
||||||
tweet_fields=tweet_fields,
|
tweet_fields=tweet_fields,
|
||||||
@ -128,6 +133,29 @@ for handle in accounts:
|
|||||||
# ... add that tweet to tweetlist
|
# ... add that tweet to tweetlist
|
||||||
tweetlist.append(tweet)
|
tweetlist.append(tweet)
|
||||||
|
|
||||||
|
break # exit the retry loop if tweets are successfully fetched
|
||||||
|
|
||||||
|
except tweepy.TweepError as e:
|
||||||
|
# handle rate limit exceeded error
|
||||||
|
if e.response.status_code == 429:
|
||||||
|
# get the rate limit reset time from the response headers
|
||||||
|
reset_time = int(e.response.headers['x-rate-limit-reset'])
|
||||||
|
current_time = int(time.time())
|
||||||
|
|
||||||
|
# calculate the sleep time until the rate limit resets
|
||||||
|
sleep_time = reset_time - current_time + 1 # add an extra second
|
||||||
|
|
||||||
|
# sleep until the rate limit resets
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
|
attempt += 1 # increment the attempt counter
|
||||||
|
continue # retry the API call
|
||||||
|
|
||||||
|
else:
|
||||||
|
# handle other types of Tweepy errors
|
||||||
|
print(f'Error occurred: {e}')
|
||||||
|
break
|
||||||
|
|
||||||
# Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration
|
# Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration
|
||||||
if len(tweetlist) == 0:
|
if len(tweetlist) == 0:
|
||||||
msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'
|
msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'
|
||||||
@ -176,9 +204,11 @@ for handle in accounts:
|
|||||||
tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)
|
tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)
|
||||||
# save short csv
|
# save short csv
|
||||||
tweet_df.to_csv(csv_path)
|
tweet_df.to_csv(csv_path)
|
||||||
# sleep 1 second to not get over 1sec api limit
|
|
||||||
|
# sleep 1 second to not exceed the API rate limit
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
# Merge CSV-Files
|
# Merge CSV-Files
|
||||||
# (it would also have been a possibility to build a dataframe with all senators' tweets but i found the other way around more useful)
|
# (it would also have been a possibility to build a dataframe with all senators' tweets but i found the other way around more useful)
|
||||||
path_to_tweetdfs = wd + td
|
path_to_tweetdfs = wd + td
|
||||||
|
Loading…
x
Reference in New Issue
Block a user