adds retry loop mechanism for api limit

2023-06-07 20:42:47 +02:00
parent 81db25a8b8
commit 2e70d960a5
1 changed files with 43 additions and 13 deletions
--- a/collect.py
+++ b/collect.py
@@ -115,7 +115,12 @@ for handle in accounts:
        msg = f'trying to fetch tweets for {handle}{suffix}'
        print(msg)
-        # Fetch tweets using tweepy Twitter API v2 pagination
+        # Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism
        max_attempts = 3  # maximum number of attempts to fetch tweets for a slice
        attempt = 1
        while attempt <= max_attempts:
            try:
                tweets = tweepy.Paginator(client.search_all_tweets,
                                          query=query,
                                          tweet_fields=tweet_fields,
@@ -128,6 +133,29 @@ for handle in accounts:
                    # ... add that tweet to tweetlist
                    tweetlist.append(tweet)
                break  # exit the retry loop if tweets are successfully fetched
            except tweepy.TweepError as e:
                # handle rate limit exceeded error
                if e.response.status_code == 429:
                    # get the rate limit reset time from the response headers
                    reset_time = int(e.response.headers['x-rate-limit-reset'])
                    current_time = int(time.time())
                    # calculate the sleep time until the rate limit resets
                    sleep_time = reset_time - current_time + 1  # add an extra second
                    # sleep until the rate limit resets
                    time.sleep(sleep_time)
                    attempt += 1  # increment the attempt counter
                    continue  # retry the API call
                else:
                    # handle other types of Tweepy errors
                    print(f'Error occurred: {e}')
                    break
        # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration
        if len(tweetlist) == 0:
            msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'
@@ -176,9 +204,11 @@ for handle in accounts:
            tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)
        # save short csv
        tweet_df.to_csv(csv_path)
-        # sleep 1 second to not get over 1sec api limit
+        
        # sleep 1 second to not exceed the API rate limit
        time.sleep(1)
 # Merge CSV-Files
 # (it would also have been a possibility to build a dataframe with all senators' tweets but i found the other way around more useful)
 path_to_tweetdfs = wd + td