adds retry loop mechanism for api limit
This commit is contained in:
parent
81db25a8b8
commit
2e70d960a5
56
collect.py
56
collect.py
@ -115,18 +115,46 @@ for handle in accounts:
|
||||
msg = f'trying to fetch tweets for {handle}{suffix}'
|
||||
print(msg)
|
||||
|
||||
# Fetch tweets using tweepy Twitter API v2 pagination
|
||||
tweets = tweepy.Paginator(client.search_all_tweets,
|
||||
query=query,
|
||||
tweet_fields=tweet_fields,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
max_results=20).flatten(20)
|
||||
# Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism
|
||||
max_attempts = 3 # maximum number of attempts to fetch tweets for a slice
|
||||
attempt = 1
|
||||
|
||||
# for each tweet returned...
|
||||
for tweet in tweets:
|
||||
# ... add that tweet to tweetlist
|
||||
tweetlist.append(tweet)
|
||||
while attempt <= max_attempts:
|
||||
try:
|
||||
tweets = tweepy.Paginator(client.search_all_tweets,
|
||||
query=query,
|
||||
tweet_fields=tweet_fields,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
max_results=20).flatten(20)
|
||||
|
||||
# for each tweet returned...
|
||||
for tweet in tweets:
|
||||
# ... add that tweet to tweetlist
|
||||
tweetlist.append(tweet)
|
||||
|
||||
break # exit the retry loop if tweets are successfully fetched
|
||||
|
||||
except tweepy.TweepError as e:
|
||||
# handle rate limit exceeded error
|
||||
if e.response.status_code == 429:
|
||||
# get the rate limit reset time from the response headers
|
||||
reset_time = int(e.response.headers['x-rate-limit-reset'])
|
||||
current_time = int(time.time())
|
||||
|
||||
# calculate the sleep time until the rate limit resets
|
||||
sleep_time = reset_time - current_time + 1 # add an extra second
|
||||
|
||||
# sleep until the rate limit resets
|
||||
time.sleep(sleep_time)
|
||||
|
||||
attempt += 1 # increment the attempt counter
|
||||
continue # retry the API call
|
||||
|
||||
else:
|
||||
# handle other types of Tweepy errors
|
||||
print(f'Error occurred: {e}')
|
||||
break
|
||||
|
||||
# Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration
|
||||
if len(tweetlist) == 0:
|
||||
@ -176,8 +204,10 @@ for handle in accounts:
|
||||
tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)
|
||||
# save short csv
|
||||
tweet_df.to_csv(csv_path)
|
||||
# sleep 1 second to not get over 1sec api limit
|
||||
time.sleep(1)
|
||||
|
||||
# sleep 1 second to not exceed the API rate limit
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
# Merge CSV-Files
|
||||
# (it would also have been a possibility to build a dataframe with all senators' tweets but i found the other way around more useful)
|
||||
|
Loading…
x
Reference in New Issue
Block a user