adds retry loop mechanism for api limit

This commit is contained in:
Michael Beck 2023-06-07 20:42:47 +02:00
parent 81db25a8b8
commit 2e70d960a5

View File

@ -115,18 +115,46 @@ for handle in accounts:
msg = f'trying to fetch tweets for {handle}{suffix}'
print(msg)
# Fetch tweets using tweepy Twitter API v2 pagination
tweets = tweepy.Paginator(client.search_all_tweets,
query=query,
tweet_fields=tweet_fields,
start_time=start_time,
end_time=end_time,
max_results=20).flatten(20)
# Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism
max_attempts = 3 # maximum number of attempts to fetch tweets for a slice
attempt = 1
# for each tweet returned...
for tweet in tweets:
# ... add that tweet to tweetlist
tweetlist.append(tweet)
while attempt <= max_attempts:
try:
tweets = tweepy.Paginator(client.search_all_tweets,
query=query,
tweet_fields=tweet_fields,
start_time=start_time,
end_time=end_time,
max_results=20).flatten(20)
# for each tweet returned...
for tweet in tweets:
# ... add that tweet to tweetlist
tweetlist.append(tweet)
break # exit the retry loop if tweets are successfully fetched
except tweepy.TweepError as e:
# handle rate limit exceeded error
if e.response.status_code == 429:
# get the rate limit reset time from the response headers
reset_time = int(e.response.headers['x-rate-limit-reset'])
current_time = int(time.time())
# calculate the sleep time until the rate limit resets
sleep_time = reset_time - current_time + 1 # add an extra second
# sleep until the rate limit resets
time.sleep(sleep_time)
attempt += 1 # increment the attempt counter
continue # retry the API call
else:
# handle other types of Tweepy errors
print(f'Error occurred: {e}')
break
# Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration
if len(tweetlist) == 0:
@ -176,8 +204,10 @@ for handle in accounts:
tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)
# save short csv
tweet_df.to_csv(csv_path)
# sleep 1 second to not get over 1sec api limit
time.sleep(1)
# sleep 1 second to not exceed the API rate limit
time.sleep(1)
# Merge CSV-Files
# (it would also have been a possibility to build a dataframe with all senators' tweets but i found the other way around more useful)