Merge remote-tracking branch 'origin/master'

This commit is contained in:
Michael Beck 2023-06-23 20:42:58 +02:00
commit d73da8db98

View File

@ -1,960 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "15573d92-f6a7-49d4-9c01-fff33d23be8e",
"metadata": {},
"source": [
"# Tweet Collecting\n",
"## Requirements\n",
"- tweepy-4.14.0\n",
"- pandas-2.0\n",
"- numpy-1.24.3\n",
"\n",
"## Preparations & Config\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "3290c840-961c-4e2c-a107-4ccd541d151b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import tweepy\n",
"import pandas as pd\n",
"import numpy as np\n",
"import glob\n",
"import time\n",
"\n",
"# Define time period of interest\n",
"time_slices = [\n",
" {\n",
" \"start_time\": \"2020-01-01T00:00:00Z\",\n",
" \"end_time\": \"2020-06-01T00:00:00Z\",\n",
" \"suffix\": \"-slice1\"\n",
" },\n",
" {\n",
" \"start_time\": \"2020-06-01T00:00:01Z\",\n",
" \"end_time\": \"2021-01-01T00:00:00Z\",\n",
" \"suffix\": \"-slice2\"\n",
" },\n",
" {\n",
" \"start_time\": \"2021-01-01T00:00:01Z\",\n",
" \"end_time\": \"2021-06-01T00:00:00Z\",\n",
" \"suffix\": \"-slice3\"\n",
" },\n",
" {\n",
" \"start_time\": \"2021-06-01T00:00:01Z\",\n",
" \"end_time\": \"2023-01-03T00:00:00Z\",\n",
" \"suffix\": \"-slice4\"\n",
" }\n",
"]\n",
"\n",
"tweet_fields = [\n",
"\t\"id\",\n",
"\t\"text\",\n",
"\t\"attachments\",\n",
"\t\"author_id\",\n",
"\t\"context_annotations\",\n",
"\t\"conversation_id\",\n",
"\t\"created_at\",\n",
"\t\"entities\",\n",
"\t\"geo\",\n",
"\t\"lang\",\n",
"\t\"possibly_sensitive\",\n",
"\t\"public_metrics\",\n",
"\t\"referenced_tweets\",\n",
"\t\"reply_settings\",\n",
"\t\"source\",\n",
"\t\"withheld\",\n",
"\t]\n",
"\n",
"## Setup directories\n",
"# WD Michael\n",
"# wd = \"/home/michael/Documents/PS/Data/collectTweets/\"\n",
"\n",
"# WD Server\n",
"wd = \"/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection\"\n",
"\n",
"# WD Josie\n",
"# wd = \"/home/michael/Documents/PS/Data/\"\n",
"\n",
"# WD Sam\n",
"# wd = \"/home/michael/Documents/PS/Data/\"\n",
"\n",
"# Tweet-datafile directory\n",
"td = \"data/tweets/\""
]
},
{
"cell_type": "markdown",
"id": "6782290c-7e14-4393-8caa-c78a2b326d85",
"metadata": {},
"source": [
"# Authenticate to Twitter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7ac9b603-e638-4ebb-95df-e0f8678f298e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"## Setup Api-connection\n",
"bearer_token = \"AAAAAAAAAAAAAAAAAAAAAMVDlQEAAAAAal9f5uZrM12CVPA4f4jr4mGH5Oc%3DuTg1Vd0YKYMwraA7ibX6LiGyd337OXkm3JwudEX7vatruswmoc\"\n",
"client = tweepy.Client(bearer_token, return_type = dict, wait_on_rate_limit = True)"
]
},
{
"cell_type": "markdown",
"id": "e81c4d49-242c-4b51-8e2a-e2bbfdae6877",
"metadata": {},
"source": [
"## Import Keywords\n",
"Keywords from:\n",
"* Chen, E., Lerman, K., & Ferrara, E. (2020). Tracking Social Media Discourse About the COVID-19 Pandemic: Development of a Public Coronavirus Twitter Data Set. JMIR Public Health and Surveillance, 6(2), e19273. https://doi.org/10.2196/19273\n",
"Line 80 and following:\n",
"* Lamsal, R. (2020). Coronavirus (COVID-19) Tweets Dataset [Data set]. IEEE. https://ieee-dataport.org/open-access/coronavirus-covid-19-tweets-dataset"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1d4af102-30ae-4c73-ae9c-333efb34e3f1",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['Coronavirus',\n",
" 'Koronavirus',\n",
" 'Corona',\n",
" 'CDC',\n",
" 'Wuhancoronavirus',\n",
" 'Wuhanlockdown',\n",
" 'Ncov',\n",
" 'Wuhan',\n",
" 'N95',\n",
" 'Kungflu',\n",
" 'Epidemic',\n",
" 'outbreak',\n",
" 'Sinophobia',\n",
" 'China',\n",
" 'covid-19',\n",
" 'corona virus',\n",
" 'covid',\n",
" 'covid19',\n",
" 'sars-cov-2',\n",
" 'COVIDー19',\n",
" 'COVD',\n",
" 'pandemic',\n",
" 'coronapocalypse',\n",
" 'canceleverything',\n",
" 'Coronials',\n",
" 'SocialDistancingNow',\n",
" 'Social Distancing',\n",
" 'SocialDistancing',\n",
" 'panicbuy',\n",
" 'panic buy',\n",
" 'panicbuying',\n",
" 'panic buying',\n",
" '14DayQuarantine',\n",
" 'DuringMy14DayQuarantine',\n",
" 'panic shop',\n",
" 'panic shopping',\n",
" 'panicshop',\n",
" 'InMyQuarantineSurvivalKit',\n",
" 'panic-buy',\n",
" 'panic-shop',\n",
" 'coronakindness',\n",
" 'quarantinelife',\n",
" 'chinese virus',\n",
" 'chinesevirus',\n",
" 'stayhomechallenge',\n",
" 'stay home challenge',\n",
" 'sflockdown',\n",
" 'DontBeASpreader',\n",
" 'lockdown',\n",
" 'lock down',\n",
" 'shelteringinplace',\n",
" 'sheltering in place',\n",
" 'staysafestayhome',\n",
" 'stay safe stay home',\n",
" 'trumppandemic',\n",
" 'trump pandemic',\n",
" 'flattenthecurve',\n",
" 'flatten the curve',\n",
" 'china virus',\n",
" 'chinavirus',\n",
" 'quarentinelife',\n",
" 'PPEshortage',\n",
" 'saferathome',\n",
" 'stayathome',\n",
" 'stay at home',\n",
" 'stay home',\n",
" 'stayhome',\n",
" 'GetMePPE',\n",
" 'covidiot',\n",
" 'epitwitter',\n",
" 'pandemie',\n",
" 'wear a mask',\n",
" 'wearamask',\n",
" 'kung flu',\n",
" 'covididiot',\n",
" 'COVID__19',\n",
" 'omicron',\n",
" 'variant',\n",
" 'vaccine',\n",
" 'travel ban',\n",
" 'corona',\n",
" 'corona',\n",
" 'coronavirus',\n",
" 'coronavirus',\n",
" 'covid',\n",
" 'covid',\n",
" 'covid19',\n",
" 'covid19',\n",
" 'covid-19',\n",
" 'covid-19',\n",
" 'sarscov2',\n",
" 'sarscov2',\n",
" 'sars cov2',\n",
" 'sars cov 2',\n",
" 'covid_19',\n",
" 'covid_19',\n",
" 'ncov',\n",
" 'ncov',\n",
" 'ncov2019',\n",
" 'ncov2019',\n",
" '2019-ncov',\n",
" '2019-ncov',\n",
" 'pandemic',\n",
" 'pandemic 2019ncov',\n",
" '2019ncov',\n",
" 'quarantine',\n",
" 'quarantine',\n",
" 'flatten the curve',\n",
" 'flattening the curve',\n",
" 'flatteningthecurve',\n",
" 'flattenthecurve',\n",
" 'hand sanitizer',\n",
" 'handsanitizer',\n",
" 'lockdown',\n",
" 'lockdown',\n",
" 'social distancing',\n",
" 'socialdistancing',\n",
" 'work from home',\n",
" 'workfromhome',\n",
" 'working from home',\n",
" 'workingfromhome',\n",
" 'ppe',\n",
" 'n95',\n",
" 'ppe',\n",
" 'n95',\n",
" 'covidiots',\n",
" 'covidiots',\n",
" 'herd immunity',\n",
" 'herdimmunity',\n",
" 'pneumonia',\n",
" 'pneumonia',\n",
" 'chinese virus',\n",
" 'chinesevirus',\n",
" 'wuhan virus',\n",
" 'wuhanvirus',\n",
" 'kung flu',\n",
" 'kungflu',\n",
" 'wearamask',\n",
" 'wearamask',\n",
" 'wear a mask',\n",
" 'vaccine',\n",
" 'vaccines',\n",
" 'vaccine',\n",
" 'vaccines',\n",
" 'corona vaccine',\n",
" 'corona vaccines',\n",
" 'coronavaccine',\n",
" 'coronavaccines',\n",
" 'face shield',\n",
" 'faceshield',\n",
" 'face shields',\n",
" 'faceshields',\n",
" 'health worker',\n",
" 'healthworker',\n",
" 'health workers',\n",
" 'healthworkers',\n",
" 'stayhomestaysafe',\n",
" 'coronaupdate',\n",
" 'frontlineheroes',\n",
" 'coronawarriors',\n",
" 'homeschool',\n",
" 'homeschooling',\n",
" 'hometasking',\n",
" 'masks4all',\n",
" 'wfh',\n",
" 'wash ur hands',\n",
" 'wash your hands',\n",
" 'washurhands',\n",
" 'washyourhands',\n",
" 'stayathome',\n",
" 'stayhome',\n",
" 'selfisolating',\n",
" 'self isolating']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"keywords = []\n",
"\n",
"# Read the keywords from a file\n",
"with open(\"data/keywords.txt\", \"r\") as file:\n",
" lines = file.readlines()\n",
" for line in lines:\n",
" keyword = line.strip() # Remove the newline character\n",
" keywords.append(keyword)\n",
"\n",
"keywords"
]
},
{
"cell_type": "markdown",
"id": "9f190608-c0a2-4e7e-9560-a03a57aa4132",
"metadata": {},
"source": [
"## Import Accounts"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a5bde33c-cc69-43ad-9b0c-4b04ce7f8a3c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['SenAlexander', 'SenatorEnzi', 'CoryGardner', 'VP', 'SenatorIsakson', 'DougJones', 'KLoeffler', 'MarthaMcSallyAZ', 'DavidPerdueGA', 'SenPatRoberts', 'SenatorTomUdall', 'SenatorBaldwin', 'SenJohnBarrasso', 'SenatorBennet', 'MarshaBlackburn', 'SenBlumenthal', 'RoyBlunt', 'senbooker', 'JohnBoozman', 'SenatorBraun', 'SenSherrodBrown', 'SenatorBurr', 'SenatorCantwell', 'SenCapito', 'SenatorCardin', 'SenatorCarper', 'SenBobCasey', 'SenBillCassidy', 'SenatorCollins', 'ChrisCoons', 'JohnCornyn', 'SenCortezMasto', 'SenTomCotton', 'SenKevinCramer', 'MikeCrapo', 'SenTedCruz', 'SteveDaines', 'SenDuckworth', 'SenatorDurbin', 'SenJoniErnst', 'SenFettermanPA', 'SenFeinstein', 'SenatorFischer', 'SenGillibrand', 'LindseyGrahamSC', 'ChuckGrassley', 'SenatorHagerty', 'SenatorHassan', 'HawleyMO', 'MartinHeinrich', 'SenatorHick', 'maziehirono', 'SenJohnHoeven', 'SenHydeSmith', 'JimInhofe', 'SenRonJohnson', 'timkaine', 'SenMarkKelly', 'SenJohnKennedy', 'SenAngusKing', 'SenAmyKlobuchar', 'SenatorLankford', 'SenatorLeahy', 'SenMikeLee', 'SenatorLujan', 'SenLummis', 'Sen_JoeManchin', 'SenMarkey', 'SenatorMarshall', 'LeaderMcConnell', 'SenatorMenendez', 'SenJeffMerkley', 'JerryMoran', 'lisamurkowski', 'ChrisMurphyCT', 'PattyMurray', 'SenOssoff', 'SenAlexPadilla', 'senrandpaul', 'SenGaryPeters', 'senrobportman', 'SenJackReed', 'SenatorRisch', 'SenatorRomney', 'SenJackyRosen', 'SenatorRounds', 'senmarcorubio', 'SenSanders', 'sensasse', 'brianschatz', 'SenSchumer', 'SenRickScott', 'SenatorTimScott', 'SenatorShaheen', 'SenShelby', 'SenatorSinema', 'SenTinaSmith', 'SenStabenow', 'SenDanSullivan', 'SenatorTester', 'SenJohnThune', 'SenThomTillis', 'SenToomey', 'SenTuberville', 'ChrisVanHollen', 'MarkWarner', 'SenatorWarnock', 'ewarren', 'SenWhitehouse', 'SenatorWicker', 'RonWyden', 'SenToddYoung']\n",
"['LamarAlexander ', nan, 'corygardner', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]\n"
]
}
],
"source": [
"# Get accounts & alt-accounts from Senators-Datafile\n",
"accounts = pd.read_csv(\"data/senators-raw.csv\")[\"twitter_handle\"].tolist()\n",
"alt_accounts = pd.read_csv(\"data/senators-raw.csv\")[\"alt_handle\"].tolist()\n",
"print(accounts)\n",
"print(alt_accounts)"
]
},
{
"cell_type": "markdown",
"id": "befc0fad-c803-4145-a041-570d6f894178",
"metadata": {},
"source": [
"## Collect Tweets\n",
"Loops over accounts:\n",
"* Collects Tweets of account. \n",
"* Then extracts columns public_metrics (likes aso) and referenced_tweets (indicates, whether tweet is a reply).\n",
"* Checks if tweet-text contains any of the keywords, if so, inserts the keyword(s) in a new column.\n",
"* Saves tweets of the account in a csv file \"HANDLE.csv\" and \"HANDLE-LONG.csv\" (LONG contains all given information such as annotations, that we might or might not need)\n",
"\n",
"### Problem:\n",
"_I limited the results to 20 tweets per senator._\n",
"Twitter has the following API Limit for the [search_all_tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all) method I used: \n",
"* App rate limit (Application-only): 300 requests per 15-minute window shared among all users of your app\n",
"* App rate limit (Application-only): 1 per second shared among all users of your app\n",
"\n",
"With a limit of 300, I request 20 posts per slice, just to get a better understanding of what's happening. After trying different things out, I think that the time-slices won't be needed if we get around the problem I'm having right now:\n",
"as soon, as the rate limit is reached, tweepy stops and waits for the time to run out and start again. BUT it doesn't retry the request but starts with the next request. \n",
"I haven't found anything and my only idea to solve the problem was to generate a list of failed attempts (via try and except) and after getting all tweets letting tweepy work over that list again. \n",
"One more thing I don't understand is that, when fetching the tweets I already sent to you, I didn't have as many problems as now and the limit exceeded after 3-4 senators, even though I used a higher `max_result` and a higher `flatten value`.\n",
"\n",
"I hope that the following output speaks for itself:\n",
"```\n",
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for SenatorEnzi-slice4\n",
"\n",
"Rate limit exceeded. Sleeping for 893 seconds.\n",
"```\n",
"\n",
"Tweepy returned no tweets because of the exceeded tweet limit, then the script tried to fetch more tweets and the error message came up.\n",
"Before changing the code below, see the other version i wrote just below the next cell (and ignore the error message below the cell as i just interrupted the execution which lead to the error message)."
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "0f842b8a-846a-4f38-8231-c1e9ccfbddf5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"<generator object Paginator.flatten at 0x7f20ebf137b0>\n",
"trying to fetch tweets for SenatorEnzi-slice4\n",
"trying to fetch tweets for CoryGardner-slice1\n",
"trying to fetch tweets for CoryGardner-slice2\n",
"trying to fetch tweets for CoryGardner-slice3\n",
"return empty in CoryGardner-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"<generator object Paginator.flatten at 0x7f20ebf13740>\n",
"trying to fetch tweets for CoryGardner-slice4\n",
"trying to fetch tweets for VP-slice1\n",
"trying to fetch tweets for VP-slice2\n",
"trying to fetch tweets for VP-slice3\n",
"trying to fetch tweets for VP-slice4\n",
"trying to fetch tweets for SenatorIsakson-slice1\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[28], line 30\u001b[0m\n\u001b[1;32m 22\u001b[0m tweets \u001b[38;5;241m=\u001b[39m tweepy\u001b[38;5;241m.\u001b[39mPaginator(client\u001b[38;5;241m.\u001b[39msearch_all_tweets,\n\u001b[1;32m 23\u001b[0m query\u001b[38;5;241m=\u001b[39mquery,\n\u001b[1;32m 24\u001b[0m tweet_fields\u001b[38;5;241m=\u001b[39mtweet_fields,\n\u001b[1;32m 25\u001b[0m start_time\u001b[38;5;241m=\u001b[39mstart_time,\n\u001b[1;32m 26\u001b[0m end_time\u001b[38;5;241m=\u001b[39mend_time,\n\u001b[1;32m 27\u001b[0m max_results\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m)\u001b[38;5;241m.\u001b[39mflatten(\u001b[38;5;241m20\u001b[39m)\n\u001b[1;32m 29\u001b[0m \u001b[38;5;66;03m# for each tweet returned...\u001b[39;00m\n\u001b[0;32m---> 30\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tweet \u001b[38;5;129;01min\u001b[39;00m tweets:\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# ... add that tweet to tweetlist\u001b[39;00m\n\u001b[1;32m 32\u001b[0m tweetlist\u001b[38;5;241m.\u001b[39mappend(tweet)\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\u001b[39;00m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:67\u001b[0m, in \u001b[0;36mPaginator.flatten\u001b[0;34m(self, limit)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 66\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m response \u001b[38;5;129;01min\u001b[39;00m PaginationIterator(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmethod, \u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs\n\u001b[1;32m 69\u001b[0m ):\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n\u001b[1;32m 71\u001b[0m response_data \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;129;01mor\u001b[39;00m []\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:126\u001b[0m, in \u001b[0;36mPaginationIterator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpagination_token\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m pagination_token\n\u001b[0;32m--> 126\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n\u001b[1;32m 129\u001b[0m meta \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mmeta\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:1163\u001b[0m, in \u001b[0;36mClient.search_all_tweets\u001b[0;34m(self, query, **params)\u001b[0m\n\u001b[1;32m 1071\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"search_all_tweets( \\\u001b[39;00m\n\u001b[1;32m 1072\u001b[0m \u001b[38;5;124;03m query, *, end_time=None, expansions=None, max_results=None, \\\u001b[39;00m\n\u001b[1;32m 1073\u001b[0m \u001b[38;5;124;03m media_fields=None, next_token=None, place_fields=None, \\\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1160\u001b[0m \u001b[38;5;124;03m.. _pagination: https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/paginate\u001b[39;00m\n\u001b[1;32m 1161\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1162\u001b[0m params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m query\n\u001b[0;32m-> 1163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1164\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGET\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/2/tweets/search/all\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1165\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1166\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mend_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexpansions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_results\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmedia.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1167\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnext_token\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mplace.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpoll.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mquery\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1168\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msince_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msort_order\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstart_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtweet.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1169\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muntil_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 1170\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mTweet\u001b[49m\n\u001b[1;32m 1171\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:129\u001b[0m, in \u001b[0;36mBaseClient._make_request\u001b[0;34m(self, method, route, params, endpoint_parameters, json, data_type, user_auth)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_make_request\u001b[39m(\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m, method, route, params\u001b[38;5;241m=\u001b[39m{}, endpoint_parameters\u001b[38;5;241m=\u001b[39m(), json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 125\u001b[0m data_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, user_auth\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 126\u001b[0m ):\n\u001b[1;32m 127\u001b[0m request_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_params(params, endpoint_parameters)\n\u001b[0;32m--> 129\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroute\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muser_auth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_auth\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_type \u001b[38;5;129;01mis\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mResponse:\n\u001b[1;32m 133\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:84\u001b[0m, in \u001b[0;36mBaseClient.request\u001b[0;34m(self, method, route, params, json, user_auth)\u001b[0m\n\u001b[1;32m 75\u001b[0m headers[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAuthorization\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBearer \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbearer_token\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 77\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 78\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMaking API request: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhost\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mroute\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 79\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mParameters: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparams\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHeaders: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mheaders\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBody: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjson\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 82\u001b[0m )\n\u001b[0;32m---> 84\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhost\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mroute\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\n\u001b[1;32m 87\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m response:\n\u001b[1;32m 88\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReceived API response: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mreason\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHeaders: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mheaders\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mContent: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 93\u001b[0m )\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/urllib3/connectionpool.py:790\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 787\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 790\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 791\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 792\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 793\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m 806\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/urllib3/connection.py:454\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 457\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n",
"File \u001b[0;32m/usr/lib/python3.9/http/client.py:1347\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1345\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1346\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1347\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1348\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
"File \u001b[0;32m/usr/lib/python3.9/http/client.py:307\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 307\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n",
"File \u001b[0;32m/usr/lib/python3.9/http/client.py:268\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 268\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 270\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m/usr/lib/python3.9/socket.py:704\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 704\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"File \u001b[0;32m/usr/lib/python3.9/ssl.py:1241\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1238\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1241\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1242\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
"File \u001b[0;32m/usr/lib/python3.9/ssl.py:1099\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1099\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# Iterate over each Twitter account\n",
"for handle in accounts:\n",
" for slice_data in time_slices:\n",
" # sleep 1 second to not get over 1sec api limit\n",
" time.sleep(1) \n",
" # define slice data variables from time_slices\n",
" start_time = slice_data['start_time']\n",
" end_time = slice_data['end_time']\n",
" suffix = slice_data['suffix']\n",
" \n",
" # define tweepy query with twitter handle of current sen\n",
" query = f'from:{handle} -is:retweet'\n",
" \n",
" # create empty tweetlist that will be filled with tweets of current sen\n",
" tweetlist = []\n",
" \n",
" # statusmsg\n",
" msg = f'trying to fetch tweets for {handle}{suffix}'\n",
" print(msg)\n",
" \n",
" # Fetch tweets using tweepy Twitter API v2 pagination\n",
" tweets = tweepy.Paginator(client.search_all_tweets,\n",
" query=query,\n",
" tweet_fields=tweet_fields,\n",
" start_time=start_time,\n",
" end_time=end_time,\n",
" max_results=20).flatten(20)\n",
" \n",
" # for each tweet returned...\n",
" for tweet in tweets:\n",
" # ... add that tweet to tweetlist\n",
" tweetlist.append(tweet)\n",
" \n",
" # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\n",
" if len(tweetlist) == 0:\n",
" msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'\n",
" print(msg)\n",
" print(tweets)\n",
" continue\n",
" \n",
" # convert to dataframe\n",
" tweet_df = pd.DataFrame(tweetlist)\n",
" \n",
" # add handle column as api only provides user-ids\n",
" tweet_df['handle'] = handle\n",
" \n",
" ## Extract referenced_tweet info from column\n",
" tweet_df['referenced_tweet_type'] = None\n",
" tweet_df['referenced_tweet_id'] = None\n",
" \n",
" # if cond. because in some cases column doesn't exist\n",
" if 'referenced_tweets' in tweet_df.columns:\n",
" for index, row in tweet_df.iterrows():\n",
" referenced_tweets = row['referenced_tweets']\n",
" \n",
" if isinstance(referenced_tweets, list) and len(referenced_tweets) > 0:\n",
" referenced_tweet = referenced_tweets[0]\n",
" referenced_tweet_type = referenced_tweet['type']\n",
" referenced_tweet_id = referenced_tweet['id']\n",
" \n",
" tweet_df.at[index, 'referenced_tweet_type'] = referenced_tweet_type\n",
" tweet_df.at[index, 'referenced_tweet_id'] = referenced_tweet_id\n",
" \n",
" ## Check if tweet-text contains keyword\n",
" # if cond. because in some cases column doesn't exist\n",
" if 'text' in tweet_df.columns:\n",
" tweet_df['contains_keyword'] = (tweet_df['text'].str.findall('|'.join(keywords))\n",
" .str.join(',')\n",
" .replace('', 'none'))\n",
" \n",
" ## Save two versions of the dataset, one with all fields and one without dict fields\n",
" # define filepaths\n",
" csv_path = f'data/tweets/{handle}{suffix}.csv'\n",
" csv_path2 = f'data/tweets/{handle}{suffix}-LONG.csv'\n",
" # save LONG csv\n",
" tweet_df.to_csv(csv_path2)\n",
" # Remove 'context_annotations', 'entities' and 'referenced_tweets' columns for short csv files\n",
" # if cond. because in some cases column doesn't exist\n",
" if all(k in tweet_df for k in ('context_annotations', 'entities', 'referenced_tweets')):\n",
" tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)\n",
" # save short csv\n",
" tweet_df.to_csv(csv_path)"
]
},
{
"cell_type": "markdown",
"id": "cb779d9a-cecb-475c-9e76-22c9b8c1928d",
"metadata": {},
"source": [
"## Alternative way to fetch tweets via tweepy with retry mechanism"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "c3b4a2ba-46e2-478b-9558-7d6999fdcd69",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for SenatorEnzi-slice4\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Rate limit exceeded. Sleeping for 437 seconds.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for CoryGardner-slice1\n",
"trying to fetch tweets for CoryGardner-slice2\n",
"trying to fetch tweets for CoryGardner-slice3\n",
"return empty in CoryGardner-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for CoryGardner-slice4\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Rate limit exceeded. Sleeping for 897 seconds.\n"
]
},
{
"ename": "AttributeError",
"evalue": "module 'tweepy' has no attribute 'TweepError'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[21], line 33\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# for each tweet returned...\u001b[39;00m\n\u001b[0;32m---> 33\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tweet \u001b[38;5;129;01min\u001b[39;00m tweets:\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# ... add that tweet to tweetlist\u001b[39;00m\n\u001b[1;32m 35\u001b[0m tweetlist\u001b[38;5;241m.\u001b[39mappend(tweet)\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:67\u001b[0m, in \u001b[0;36mPaginator.flatten\u001b[0;34m(self, limit)\u001b[0m\n\u001b[1;32m 66\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m response \u001b[38;5;129;01min\u001b[39;00m PaginationIterator(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmethod, \u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs\n\u001b[1;32m 69\u001b[0m ):\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:126\u001b[0m, in \u001b[0;36mPaginationIterator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpagination_token\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m pagination_token\n\u001b[0;32m--> 126\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:1163\u001b[0m, in \u001b[0;36mClient.search_all_tweets\u001b[0;34m(self, query, **params)\u001b[0m\n\u001b[1;32m 1162\u001b[0m params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m query\n\u001b[0;32m-> 1163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1164\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGET\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/2/tweets/search/all\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1165\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1166\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mend_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexpansions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_results\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmedia.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1167\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnext_token\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mplace.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpoll.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mquery\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1168\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msince_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msort_order\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstart_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtweet.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1169\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muntil_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 1170\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mTweet\u001b[49m\n\u001b[1;32m 1171\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:129\u001b[0m, in \u001b[0;36mBaseClient._make_request\u001b[0;34m(self, method, route, params, endpoint_parameters, json, data_type, user_auth)\u001b[0m\n\u001b[1;32m 127\u001b[0m request_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_params(params, endpoint_parameters)\n\u001b[0;32m--> 129\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroute\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muser_auth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_auth\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_type \u001b[38;5;129;01mis\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mResponse:\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:112\u001b[0m, in \u001b[0;36mBaseClient.request\u001b[0;34m(self, method, route, params, json, user_auth)\u001b[0m\n\u001b[1;32m 108\u001b[0m log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 109\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRate limit exceeded. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSleeping for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msleep_time\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m seconds.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 111\u001b[0m )\n\u001b[0;32m--> 112\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[43msleep_time\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest(method, route, params, json, user_auth)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[21], line 39\u001b[0m\n\u001b[1;32m 35\u001b[0m tweetlist\u001b[38;5;241m.\u001b[39mappend(tweet)\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m \u001b[38;5;66;03m# exit the retry loop if tweets are successfully fetched\u001b[39;00m\n\u001b[0;32m---> 39\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[43mtweepy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTweepError\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 40\u001b[0m \u001b[38;5;66;03m# handle rate limit exceeded error\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m e\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m429\u001b[39m:\n\u001b[1;32m 42\u001b[0m \u001b[38;5;66;03m# get the rate limit reset time from the response headers\u001b[39;00m\n\u001b[1;32m 43\u001b[0m reset_time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(e\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mheaders[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx-rate-limit-reset\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
"\u001b[0;31mAttributeError\u001b[0m: module 'tweepy' has no attribute 'TweepError'"
]
}
],
"source": [
"# Iterate over each Twitter account\n",
"for handle in accounts:\n",
" for slice_data in time_slices:\n",
" # define slice data variables from time_slices\n",
" start_time = slice_data['start_time']\n",
" end_time = slice_data['end_time']\n",
" suffix = slice_data['suffix']\n",
" \n",
" # define tweepy query with twitter handle of current sen\n",
" query = f'from:{handle} -is:retweet'\n",
" \n",
" # create empty tweetlist that will be filled with tweets of current sen\n",
" tweetlist = []\n",
" \n",
" # statusmsg\n",
" msg = f'trying to fetch tweets for {handle}{suffix}'\n",
" print(msg)\n",
" \n",
" # Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism\n",
" max_attempts = 3 # maximum number of attempts to fetch tweets for a slice\n",
" attempt = 1\n",
" \n",
" while attempt <= max_attempts:\n",
" try:\n",
" tweets = tweepy.Paginator(client.search_all_tweets,\n",
" query=query,\n",
" tweet_fields=tweet_fields,\n",
" start_time=start_time,\n",
" end_time=end_time,\n",
" max_results=20).flatten(20)\n",
" \n",
" # for each tweet returned...\n",
" for tweet in tweets:\n",
" # ... add that tweet to tweetlist\n",
" tweetlist.append(tweet)\n",
" \n",
" break # exit the retry loop if tweets are successfully fetched\n",
" \n",
" except tweepy.TweepError as e:\n",
" # handle rate limit exceeded error\n",
" if e.response.status_code == 429:\n",
" # get the rate limit reset time from the response headers\n",
" reset_time = int(e.response.headers['x-rate-limit-reset'])\n",
" current_time = int(time.time())\n",
" \n",
" # calculate the sleep time until the rate limit resets\n",
" sleep_time = reset_time - current_time + 1 # add an extra second\n",
" \n",
" # sleep until the rate limit resets\n",
" time.sleep(sleep_time)\n",
" \n",
" attempt += 1 # increment the attempt counter\n",
" continue # retry the API call\n",
" \n",
" else:\n",
" # handle other types of Tweepy errors\n",
" print(f'Error occurred: {e}')\n",
" break\n",
" \n",
" # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\n",
" if len(tweetlist) == 0:\n",
" msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'\n",
" print(msg)\n",
" continue\n",
" \n",
" # convert to dataframe\n",
" tweet_df = pd.DataFrame(tweetlist)\n",
" \n",
" # add handle column as api only provides user-ids\n",
" tweet_df['handle'] = handle\n",
" \n",
" ## Extract referenced_tweet info from column\n",
" tweet_df['referenced_tweet_type'] = None\n",
" tweet_df['referenced_tweet_id'] = None\n",
" \n",
" # if cond. because in some cases column doesn't exist\n",
" if 'referenced_tweets' in tweet_df.columns:\n",
" for index, row in tweet_df.iterrows():\n",
" referenced_tweets = row['referenced_tweets']\n",
" \n",
" if isinstance(referenced_tweets, list) and len(referenced_tweets) > 0:\n",
" referenced_tweet = referenced_tweets[0]\n",
" referenced_tweet_type = referenced_tweet['type']\n",
" referenced_tweet_id = referenced_tweet['id']\n",
" \n",
" tweet_df.at[index, 'referenced_tweet_type'] = referenced_tweet_type\n",
" tweet_df.at[index, 'referenced_tweet_id'] = referenced_tweet_id\n",
" \n",
" ## Check if tweet-text contains keyword\n",
" # if cond. because in some cases column doesn't exist\n",
" if 'text' in tweet_df.columns:\n",
" tweet_df['contains_keyword'] = (tweet_df['text'].str.findall('|'.join(keywords))\n",
" .str.join(',')\n",
" .replace('', 'none'))\n",
" \n",
" ## Save two versions of the dataset, one with all fields and one without dict fields\n",
" # define filepaths\n",
" csv_path = f'data/tweets/{handle}{suffix}.csv'\n",
" csv_path2 = f'data/tweets/{handle}{suffix}-LONG.csv'\n",
" # save LONG csv\n",
" tweet_df.to_csv(csv_path2)\n",
" # Remove 'context_annotations', 'entities' and 'referenced_tweets' columns for short csv files\n",
" # if cond. because in some cases column doesn't exist\n",
" if all(k in tweet_df for k in ('context_annotations', 'entities', 'referenced_tweets')):\n",
" tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)\n",
" # save short csv\n",
" tweet_df.to_csv(csv_path)\n",
" \n",
" # sleep 1 second to not exceed the API rate limit\n",
" time.sleep(1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5dd5498-1ba4-4f0a-9bb9-ffce4655212d",
"metadata": {},
"outputs": [],
"source": [
"path_to_tweetdfs = wd + td\n",
"os.chdir(path_to_tweetdfs)\n",
"tweetfiles = glob.glob('*.{}'.format(\"csv\"))\n",
"\n",
"print(tweetfiles)\n",
"\n",
"# save merged csv as two files \n",
"df_all_senators = pd.DataFrame()\n",
"df_all_senators_long = pd.DataFrame()\n",
"for file in tweetfiles:\n",
"\tif \"LONG\" in file:\n",
"\t\tdf = pd.read_csv(file)\n",
"\t\tdf_all_senators_long = pd.concat([df, df_all_senators_long])\n",
"\telse:\n",
"\t\tdf = pd.read_csv(file)\n",
"\t\tdf_all_senators = pd.concat([df, df_all_senators])\n",
"csv_path = td + \"ALL-SENATORS.csv\"\n",
"csv_path2 = td + \"ALL-SENATORS-LONG-LONG.csv\"\n",
"df_all_senators.to_csv(csv_path) \n",
"df_all_senators_long.to_csv(csv_path2)\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "320ebbf4-8eaf-4189-836b-5d5aa8a0a263",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for SenatorEnzi-slice4\n",
"trying to fetch tweets for CoryGardner-slice1\n",
"trying to fetch tweets for CoryGardner-slice2\n",
"trying to fetch tweets for CoryGardner-slice3\n",
"return empty in CoryGardner-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for CoryGardner-slice4\n",
"trying to fetch tweets for VP-slice1\n",
"trying to fetch tweets for VP-slice2\n",
"trying to fetch tweets for VP-slice3\n",
"trying to fetch tweets for VP-slice4\n",
"trying to fetch tweets for SenatorIsakson-slice1\n",
"trying to fetch tweets for SenatorIsakson-slice2\n",
"trying to fetch tweets for SenatorIsakson-slice3\n",
"trying to fetch tweets for SenatorIsakson-slice4\n",
"trying to fetch tweets for DougJones-slice1\n",
"trying to fetch tweets for DougJones-slice2\n",
"trying to fetch tweets for DougJones-slice3\n",
"trying to fetch tweets for DougJones-slice4\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[24], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m handle \u001b[38;5;129;01min\u001b[39;00m accounts:\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m slice_data \u001b[38;5;129;01min\u001b[39;00m time_slices:\n\u001b[0;32m----> 4\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1.01\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# define slice data variables from time_slices\u001b[39;00m\n\u001b[1;32m 6\u001b[0m start_time \u001b[38;5;241m=\u001b[39m slice_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstart_time\u001b[39m\u001b[38;5;124m'\u001b[39m]\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# Iterate over each Twitter account\n",
"for handle in accounts:\n",
" for slice_data in time_slices:\n",
" time.sleep(1.01)\n",
" # define slice data variables from time_slices\n",
" start_time = slice_data['start_time']\n",
" end_time = slice_data['end_time']\n",
" suffix = slice_data['suffix']\n",
" \n",
" # define tweepy query with twitter handle of current sen\n",
" query = f'from:{handle} -is:retweet'\n",
" \n",
" # create empty tweetlist that will be filled with tweets of current sen\n",
" tweetlist = []\n",
" \n",
" # statusmsg\n",
" msg = f'trying to fetch tweets for {handle}{suffix}'\n",
" print(msg)\n",
" \n",
" # Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism\n",
" max_attempts = 3 # maximum number of attempts to fetch tweets for a slice\n",
" attempt = 1\n",
" \n",
" while attempt <= max_attempts:\n",
" try:\n",
" tweets = tweepy.Paginator(client.search_all_tweets,\n",
" query=query,\n",
" tweet_fields=tweet_fields,\n",
" start_time=start_time,\n",
" end_time=end_time,\n",
" max_results=20).flatten(20)\n",
" \n",
" # for each tweet returned...\n",
" for tweet in tweets:\n",
" # ... add that tweet to tweetlist\n",
" tweetlist.append(tweet)\n",
" \n",
" # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\n",
" if len(tweetlist) == 0:\n",
" msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'\n",
" print(msg)\n",
" break\n",
" \n",
" # convert to dataframe\n",
" tweet_df = pd.DataFrame(tweetlist)\n",
" \n",
" # add handle column as API only provides user-ids\n",
" tweet_df['handle'] = handle\n",
" \n",
" ## Extract referenced_tweet info from column\n",
" tweet_df['referenced_tweet_type'] = None\n",
" tweet_df['referenced_tweet_id'] = None\n",
" \n",
" # if cond. because in some cases column doesn't exist\n",
" if 'referenced_tweets' in tweet_df.columns:\n",
" for index, row in tweet_df.iterrows():\n",
" referenced_tweets = row['referenced_tweets']\n",
" \n",
" if isinstance(referenced_tweets, list) and len(referenced_tweets) > 0:\n",
" referenced_tweet = referenced_tweets[0]\n",
" referenced_tweet_type = referenced_tweet['type']\n",
" referenced_tweet_id = referenced_tweet['id']\n",
" \n",
" tweet_df.at[index, 'referenced_tweet_type'] = referenced_tweet_type\n",
" tweet_df.at[index, 'referenced_tweet_id'] = referenced_tweet_id\n",
" \n",
" ## Check if tweet-text contains keyword\n",
" # if cond. because in some cases column doesn't exist\n",
" if 'text' in tweet_df.columns:\n",
" tweet_df['contains_keyword'] = (tweet_df['text'].str.findall('|'.join(keywords))\n",
" .str.join(',')\n",
" .replace('', 'none'))\n",
" \n",
" ## Save two versions of the dataset, one with all fields and one without dict fields\n",
" # define filepaths\n",
" csv_path = f'data/tweets/{handle}{suffix}.csv'\n",
" csv_path2 = f'data/tweets/{handle}{suffix}-LONG.csv'\n",
" # save LONG csv\n",
" tweet_df.to_csv(csv_path2)\n",
" # Remove 'context_annotations', 'entities', and 'referenced_tweets' columns for short csv files\n",
" # if cond. because in some cases column doesn't exist\n",
" if all(k in tweet_df for k in ('context_annotations', 'entities', 'referenced_tweets')):\n",
" tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)\n",
" # save short csv\n",
" tweet_df.to_csv(csv_path)\n",
" \n",
" # break out of the retry loop since fetching tweets was successful\n",
" break\n",
" \n",
" except tweepy.TweepError as e:\n",
" if e.response.status_code == 429: # rate limit exceeded\n",
" reset_time = int(e.response.headers['x-rate-limit-reset'])\n",
" wait_time = reset_time - time.time() + 5 # add additional 5 seconds as buffer\n",
" \n",
" print(f\"Rate limit exceeded. Sleeping for {wait_time} seconds.\")\n",
" time.sleep(wait_time)\n",
" \n",
" attempt += 1 # increment the attempt counter\n",
" else:\n",
" print(f\"Error occurred: {e}\")\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48688858-104d-4f2f-87b8-ed103f34b4e8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Politics & Society",
"language": "python",
"name": "polsoc"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
},
"toc-autonumbering": true,
"toc-showmarkdowntxt": false
},
"nbformat": 4,
"nbformat_minor": 5
}