adds retry loop mechanism for api limit

2023-06-07 20:42:47 +02:00 · 2023-06-07 20:42:47 +02:00 · 2e70d960a5
commit 2e70d960a5
parent 81db25a8b8
1 changed files with 43 additions and 13 deletions
--- a/collect.py
+++ b/collect.py
@ -115,18 +115,46 @@ for handle in accounts:
        msg = f'trying to fetch tweets for {handle}{suffix}'
        print(msg)
        
-        # Fetch tweets using tweepy Twitter API v2 pagination
-        tweets = tweepy.Paginator(client.search_all_tweets,
-                                      query=query,
-                                      tweet_fields=tweet_fields,
-                                      start_time=start_time,
-                                      end_time=end_time,
-                                      max_results=20).flatten(20)
+        # Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism
+        max_attempts = 3  # maximum number of attempts to fetch tweets for a slice
+        attempt = 1
        
-        # for each tweet returned...
-        for tweet in tweets:
-            # ... add that tweet to tweetlist
-            tweetlist.append(tweet)
+        while attempt <= max_attempts:
+            try:
+                tweets = tweepy.Paginator(client.search_all_tweets,
+                                          query=query,
+                                          tweet_fields=tweet_fields,
+                                          start_time=start_time,
+                                          end_time=end_time,
+                                          max_results=20).flatten(20)
+                
+                # for each tweet returned...
+                for tweet in tweets:
+                    # ... add that tweet to tweetlist
+                    tweetlist.append(tweet)
+                
+                break  # exit the retry loop if tweets are successfully fetched
+            
+            except tweepy.TweepError as e:
+                # handle rate limit exceeded error
+                if e.response.status_code == 429:
+                    # get the rate limit reset time from the response headers
+                    reset_time = int(e.response.headers['x-rate-limit-reset'])
+                    current_time = int(time.time())
+                    
+                    # calculate the sleep time until the rate limit resets
+                    sleep_time = reset_time - current_time + 1  # add an extra second
+                    
+                    # sleep until the rate limit resets
+                    time.sleep(sleep_time)
+                    
+                    attempt += 1  # increment the attempt counter
+                    continue  # retry the API call
+                
+                else:
+                    # handle other types of Tweepy errors
+                    print(f'Error occurred: {e}')
+                    break
        
        # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration
        if len(tweetlist) == 0:
@ -176,8 +204,10 @@ for handle in accounts:
            tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)
        # save short csv
        tweet_df.to_csv(csv_path)
-        # sleep 1 second to not get over 1sec api limit
-        time.sleep(1) 
+        
+        # sleep 1 second to not exceed the API rate limit
+        time.sleep(1)
+

 # Merge CSV-Files
 # (it would also have been a possibility to build a dataframe with all senators' tweets but i found the other way around more useful)