Compare commits

...

43 Commits

Author SHA1 Message Date
Michael Beck
89b4755c65 adds link to full package to readme 2023-08-31 01:23:38 +02:00
Michael Beck
01e58b1b99 adds html files to gitignore 2023-08-31 01:21:31 +02:00
Michael Beck
d0fcefedf4 data/OUT/profiles/CovTweets.html gelöscht 2023-08-31 01:20:39 +02:00
Michael Beck
71cf907249 data/OUT/profiles/AllTweets.html gelöscht 2023-08-31 01:20:31 +02:00
Michael Beck
a9018fedee REALLY corrects the filetree 2023-08-30 21:54:13 +02:00
Michael Beck
d94a93295f corrects filetree 2023-08-30 21:53:05 +02:00
Michael Beck
80b63b39df adds readme 2023-08-30 21:45:38 +02:00
Michael Beck
d8136909c8 corrects import of own functions that didn't work anymore because of a newer python version. 2023-08-30 21:45:27 +02:00
Michael Beck
1c6d9d5415 cleans and renames files 2023-08-30 21:18:55 +02:00
Michael Beck
4e08cde317 finishes classification scripts 2023-08-16 10:06:16 +02:00
Michael Beck
2535683cdc finishes classification scripts 2023-08-15 14:51:28 +02:00
Michael Beck
8f744a08be adds final counter keywords 2023-08-15 14:30:40 +02:00
Michael Beck
df5fd51a5f repairs stupid 2023-08-15 14:30:13 +02:00
Michael Beck
3d4f559d2d adds model training stats 2023-08-15 14:29:42 +02:00
Michael Beck
2e067b6a64 adds both classification scripts. Corrects inclusion of CleanTweets functions. 2023-08-15 14:23:56 +02:00
Michael Beck
7a16526a97 adds dataset profiles 2023-08-15 14:20:13 +02:00
Michael Beck
b89b5969ec adds typerror controls 2023-08-15 14:19:33 +02:00
Michael Beck
7c6b618272 adds both training scripts and evaluation files of topic classification 2023-08-15 14:19:08 +02:00
Michael Beck
90aa58239c adds generation of model-training dataset 2023-08-14 15:37:30 +02:00
Michael Beck
1beff96ae9 adds model training code 2023-08-14 15:37:05 +02:00
Michael Beck
881d3d6d6d adds tweet-text-cleaning functions 2023-08-14 15:36:46 +02:00
Michael Beck
5a63c478e9 adds dataset profiler 2023-08-08 15:32:12 +02:00
Michael Beck
ed61d52182 adds files to gitignore 2023-08-08 00:07:42 +02:00
Michael Beck
a26d150060 renames pretest classification file 2023-08-08 00:06:18 +02:00
Michael Beck
d791e4a293 adds classification file. adds removal of empty tweets after transormation for classification preparation 2023-08-08 00:04:14 +02:00
Michael Beck
d57b7a31b7 adds more counter keywords 2023-08-08 00:03:30 +02:00
Michael Beck
13d80124d3 adds lines with counterKeywords to remove non-covid tweets 2023-08-07 23:45:11 +02:00
Michael Beck
3de6d8f3ec adds tweetLen column, converts keywords to lowercase and removes certain keywords 2023-08-07 23:07:29 +02:00
Michael Beck
899a99ba72 adds CleanTweets functions, creates Graphs 2023-07-07 18:18:51 +02:00
Michael Beck
817ec48478 corrects a lot of mistakes.
adds keywords
adds analyze.py
adds pretest
adds pretest ids
2023-07-07 00:16:44 +02:00
Michael Beck
c64904a64d adds cleanTweets.py 2023-06-26 23:51:32 +02:00
Michael Beck
82830f13e2 „README.md“ ändern 2023-06-26 13:12:16 +02:00
Michael Beck
8c8a191952 „README.md“ hinzufügen 2023-06-26 13:12:04 +02:00
Michael Beck
71e10a62d3 adds senator data scraper 2023-06-23 23:53:31 +02:00
Michael Beck
90d5501ec8 adds comment 2023-06-23 23:53:01 +02:00
Michael Beck
340cca017c corrects comments 2023-06-23 20:59:14 +02:00
Michael Beck
791cebc297 adds log folder 2023-06-23 20:49:35 +02:00
Michael Beck
6241484e83 adds gitkeep 2023-06-23 20:47:32 +02:00
Michael Beck
d73da8db98 Merge remote-tracking branch 'origin/master' 2023-06-23 20:42:58 +02:00
Michael Beck
6220c1841d „collect.ipynb“ löschen 2023-06-23 20:41:56 +02:00
Michael Beck
27746cd886 changes folder structure of in- and output files 2023-06-23 20:39:40 +02:00
Michael Beck
02c3d055bd adds comments. changes logfile format to .log 2023-06-23 20:34:46 +02:00
Michael Beck
dc2e17cc2f adds docstrings to functions. adds several comments. 2023-06-23 20:26:16 +02:00
36 changed files with 3046 additions and 1203 deletions

4
.gitignore vendored
View File

@ -1,7 +1,9 @@
**/log*
**/*.log
**/*lock*
**/*-slice*.csv
**/*.zip
**/*.html
**/*.htm
/ALL-SENATORS-LONG.csv
/ALL-SENATORS.csv
/collect2.py

1
.vscode/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/settings.json

123
ClassificationFake.py Normal file
View File

@ -0,0 +1,123 @@
import numpy as np
import pandas as pd
from datetime import datetime
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from datasets import load_dataset
from transformers.pipelines.pt_utils import KeyDataset
#%%
# prepare & define paths
# install xformers (pip install xformers) for better performance
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Name of file that all senator data will be written to
senCSV = "Tweets-Classified-Topic-Results.csv"
# Name of Classify datafile
senCSVClassifiedPrep = "Tweets-Classified-Fake-Prep.csv"
senCSVClassifiedResult = "Tweets-Classified-Fake-Results.csv"
# don't change this one
senCSVPath = wd + ud + senCSV
senCSVcClassificationPrepPath = wd + ud + senCSVClassifiedPrep
senCSVcClassificationResultPath = wd + ud + senCSVClassifiedResult
import sys
funs = wd+"funs"
sys.path.insert(1, funs)
import CleanTweets
#%%
# get datafra,e
dfClassify = pd.read_csv(senCSVPath, dtype=(object))
def encode_labels(label):
if label == 'True':
return 'False'
elif label == 'False':
return 'True'
return 0
dfClassify['output_label_topicCov'] = dfClassify['output_label_topicCov'].apply(encode_labels)
dfClassify.to_csv("/home/michael/Documents/PS/Data/collectTweets/data/OUT/Tweets-Classified-Topic-Results.csv", encoding='utf-8')
dfClassify = dfClassify[dfClassify['output_label_topicCov']=='True']
# dataframe from csv
dfClassify['fake'] = False
#%%
# https://huggingface.co/bvrau/covid-twitter-bert-v2-struth
# HowTo:
# https://huggingface.co/docs/transformers/main/en/model_doc/bert#transformers.BertForSequenceClassification
# https://stackoverflow.com/questions/75932605/getting-the-input-text-from-transformers-pipeline
pipe = pipeline("text-classification", model="/home/michael/Documents/PS/Data/collectTweets/models/FakeClass/2023-08-15_14-35-43/")
model = AutoModelForSequenceClassification.from_pretrained("/home/michael/Documents/PS/Data/collectTweets/models/FakeClass/2023-08-15_14-35-43/")
tokenizer = AutoTokenizer.from_pretrained("/home/michael/Documents/PS/Data/collectTweets/models/FakeClass/2023-08-15_14-35-43/")
# Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
dfClassify['cleanContent'] = dfClassify['rawContent'].apply(CleanTweets.preprocess_text)
#%%
# remove empty rows
dfClassify.cleanContent.replace('',np.nan,inplace=True)
dfClassify.dropna(subset=['cleanContent'], inplace=True)
#%%
timeStart = datetime.now() # start counting execution time
max_length = 128
dfClassify['input_ids'] = dfClassify['cleanContent'].apply(lambda x: tokenizer(x, max_length=max_length, padding="max_length",)['input_ids'])
#train.rename(columns={'target': 'labels'}, inplace=True)
#train.head()
# %%
dfClassify.to_csv(senCSVcClassificationPrepPath, encoding='utf-8', columns=['id', 'cleanContent'])
#%%
dataset = load_dataset("csv", data_files=senCSVcClassificationPrepPath)
# %%from datetime import datetime
#from tqdm.auto import tqdm
#for out in tqdm(pipe(KeyDataset(dataset['train'], "cleanContent"))):
# print(out)
#%%
output_labels = []
output_score = []
for out in pipe(KeyDataset(dataset['train'], "cleanContent"), batch_size=8, truncation="only_first"):
output_labels.append(out['label'])
output_score.append(out['score'])
# [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
# Exactly the same output as before, but the content are passed
# as batches to the model
# %%
dfClassify['output_label_fake'] = output_labels
dfClassify['output_score_fake'] = output_score
timeEnd = datetime.now()
timeTotal = timeEnd - timeStart
timePerTweet = timeTotal / 96
print(f"Total classification execution time: {timeTotal} seconds")
print(f"Time per tweet classification: {timePerTweet}")
# %%
dfClassify.to_csv(senCSVcClassificationResultPath, encoding='utf-8')
# %%

123
ClassificationTopic.py Normal file
View File

@ -0,0 +1,123 @@
import numpy as np
import pandas as pd
from datetime import datetime
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from datasets import load_dataset
from transformers.pipelines.pt_utils import KeyDataset
#%%
# prepare & define paths
# install xformers (pip install xformers) for better performance
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Name of file that all senator data will be written to
senCSV = "SenatorsTweets-OnlyCov.csv"
# Name of Classify datafile
senCSVClassifiedPrep = "Tweets-Classified-Topic-Prep.csv"
senCSVClassifiedResult = "Tweets-Classified-Topic-Results.csv"
# don't change this one
senCSVPath = wd + ud + senCSV
senCSVcClassificationPrepPath = wd + ud + senCSVClassifiedPrep
senCSVcClassificationResultPath = wd + ud + senCSVClassifiedResult
import sys
funs = wd+"funs"
sys.path.insert(1, funs)
import CleanTweets
#%%
# get datafra,e
dfClassify = pd.read_csv(senCSVPath, dtype=(object))
# dataframe from csv
dfClassify['fake'] = False
#%%
# https://huggingface.co/bvrau/covid-twitter-bert-v2-struth
# HowTo:
# https://huggingface.co/docs/transformers/main/en/model_doc/bert#transformers.BertForSequenceClassification
# https://stackoverflow.com/questions/75932605/getting-the-input-text-from-transformers-pipeline
pipe = pipeline("text-classification", model="/home/michael/Documents/PS/Data/collectTweets/models/CovClass/2023-08-15_05-56-50/")
model = AutoModelForSequenceClassification.from_pretrained("/home/michael/Documents/PS/Data/collectTweets/models/CovClass/2023-08-15_05-56-50/")
tokenizer = AutoTokenizer.from_pretrained("/home/michael/Documents/PS/Data/collectTweets/models/CovClass/2023-08-15_05-56-50/")
# Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
dfClassify['cleanContent'] = dfClassify['rawContent'].apply(CleanTweets.preprocess_text)
#%%
# remove empty rows
dfClassify.cleanContent.replace('',np.nan,inplace=True)
dfClassify.dropna(subset=['cleanContent'], inplace=True)
#%%
timeStart = datetime.now() # start counting execution time
max_length = 128
dfClassify['input_ids'] = dfClassify['cleanContent'].apply(lambda x: tokenizer(x, max_length=max_length, padding="max_length",)['input_ids'])
#train.rename(columns={'target': 'labels'}, inplace=True)
#train.head()
# %%
dfClassify.to_csv(senCSVcClassificationPrepPath, encoding='utf-8', columns=['id', 'cleanContent'])
#%%
dataset = load_dataset("csv", data_files=senCSVcClassificationPrepPath)
# %%from datetime import datetime
#from tqdm.auto import tqdm
#for out in tqdm(pipe(KeyDataset(dataset['train'], "cleanContent"))):
# print(out)
#%%
output_labels = []
output_score = []
for out in pipe(KeyDataset(dataset['train'], "cleanContent"), batch_size=8, truncation="only_first"):
output_labels.append(out['label'])
output_score.append(out['score'])
# [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
# Exactly the same output as before, but the content are passed
# as batches to the model
# %%
dfClassify['output_label_topicCov'] = output_labels
dfClassify['output_score_topicCov'] = output_score
timeEnd = datetime.now()
timeTotal = timeEnd - timeStart
timePerTweet = timeTotal / 96
print(f"Total classification execution time: {timeTotal} seconds")
print(f"Time per tweet classification: {timePerTweet}")
# %%
dfClassify.to_csv(senCSVcClassificationResultPath, encoding='utf-8')
# %%
## corrections
def encode_labels(label):
if label == 'real':
return 'True'
elif label == 'fake':
return 'False'
return 0
dfClassify['output_label_topicCov'] = dfClassify['output_label_topicCov'].apply(encode_labels)
dfClassify.to_csv(senCSVcClassificationResultPath, encoding='utf-8')
#still wrong, will be corrected in ClassificationFake.py

131
README.md Normal file
View File

@ -0,0 +1,131 @@
# Requirements
- python 3.10+
- snscrape 0.6.2.20230321+ (see git repo in this folder)
- transformers 4.31.0
- numpy 1.23.5
- pandas 2.0.3
- scikit-learn 1.3.0
- torch 2.0.1
# About
This collection of scripts scrapes tweets of US-senators in the time from 2020-01-01T00:00:00Z to 2023-01-03T00:00:00Z, scrapes account data of the senators, prepares the tweets for the training of a NLP-model, trains two models to (1) classify the tweets topic as covid or non-covid and (2) the tweets as either "fake news" tweets or "non-fake news" tweets.
Training only works with a prepared dataset in which the tweets are pre classified.
More info in the comments of the scripts.
Due to time constraints, most of the code is procedurally coded and ugly but effective.
# How to
Tested on Ubuntu 22.04.
If needed, the virual environment can be exported and send to you.
All files in the folder data/in have to exist in order to execute the scripts.
Execute in the following order:
01 collect.py (see more for further info on scraping)
02 collectSenData.py
03 cleanTweets
04 preTestClassification.py
05 trainTopic.py
06 trainFake.py
07 ClassificationFake.py
08 ClassificationTopic.py
# Files & Folders
Datafiles are not included in the repository but can be found in the full package that can be downloaded from [here](https://ncloud.mischbeck.de/s/T4QcMDSfYSkadYC) (password protected).
```
├── data
│   ├── IN
│   │   ├── counterKeywordsFinal.txt
│   │   ├── counterKeywords.txt
│   │   ├── keywords-raw.txt
│   │   ├── keywords.txt
│   │   ├── own_keywords.txt
│   │   ├── pretest-tweets_fake.txt contains tweet ids for pretest
│   │   ├── pretest-tweets_not_fake.txt contains tweet ids for pretest
│   │   └── senators-raw.csv senator datafile
│   ├── OUT
│   │   ├── ALL-SENATORS-TWEETS.csv
│   │   ├── graphs
│   │   │   ├── Timeline.png
│   │   │   ├── Wordcloud-All.png
│   │   │   └── Wordcloud-Cov.png
│   │   ├── Pretest-Prep.csv
│   │   ├── Pretest-Results.csv
│   │   ├── Pretest-SENATORS-TWEETS.csv
│   │   ├── profiles dataset profiles
│   │   │   ├── AllTweets.html
│   │   │   └── CovTweets.html
│   │   ├── SenatorsTweets-Final.csv
│   │   ├── SenatorsTweets-OnlyCov.csv
│   │   ├── SenatorsTweets-train-CovClassification.csv
│   │   ├── SenatorsTweets-train-CovClassificationTRAIN.csv
│   │   ├── SenatorsTweets-train-CovClassification.tsv
│   │   ├── SenatorsTweets-train-FakeClassification.csv
│   │   ├── SenatorsTweets-train-FakeClassificationTRAIN.csv
│   │   ├── SenatorsTweets-train-FakeClassification.tsv
│   │   ├── SenatorsTweets-Training.csv
│   │   ├── SenatorsTweets-Training_WORKING-COPY.csv
│   │   ├── topClass-PRETEST-Prep.csv
│   │   ├── topClass-PRETEST-Results.csv
│   │   ├── Tweets-All-slices.zip
│   │   ├── Tweets-Classified-Fake-Prep.csv
│   │   ├── Tweets-Classified-Fake-Results.csv
│   │   ├── Tweets-Classified-Prep.csv
│   │   ├── Tweets-Classified-Topic-Prep.csv
│   │   ├── Tweets-Classified-Topic-Results.csv
│   │   └── Tweets-Stub.csv
├── funs
│   ├── CleanTweets.py 2023-01-03T00:00:00Z multiple functions to clean tweet contents for NLN-processing
│   ├── ClearDupes.py function for deletion of duplicate keywords
│   ├── __init__.py
│   ├── Scrape.py scraper functions to be used for multiprocessing
│   └── TimeSlice.py time slice script to slice the time span in 24 slices, speeds up scraping through multiprocessing
├── log logs of the scraping process
│   ├── log_2023-06-23_21-06-10_err.log
│   ├── log_2023-06-23_21-06-10.log
│   └── log_2023-06-23_21-06-10_missing.log
├── models
│   ├── CovClass Covid tweet classification model
│   │   └── 2023-08-15_05-56-50
│   │   ├── 2023-08-15_05-56-50.csv training output
│   │   ├── config.json
│   │   ├── pytorch_model.bin
│   │   ├── special_tokens_map.json
│   │   ├── tokenizer_config.json
│   │   ├── tokenizer.json
│   │   └── vocab.txt
│   └── FakeClass Fake tweet classification model
│   └── 2023-08-15_14-35-43
│   ├── 2023-08-15_14-35-43.csv training output
│   ├── config.json
│   ├── pytorch_model.bin
│   ├── special_tokens_map.json
│   ├── tokenizer_config.json
│   ├── tokenizer.json
│   └── vocab.txt
├── snscrape contains snscrape 0.6.2.20230321+ git repo
├── ClassificationFake.py classifies tweets as fake or non-fake, saves:
│ Tweets-Classified-Fake-Prep.csv - prepared training dataset
│ Tweets-Classified-Fake-Results.csv - Tweets-Classified-Topic-Results.csv with cov classification results
├── ClassificationTopic.py classifies tweet topic, saves:
│ Tweets-Classified-Topic-Prep.csv - prepared training dataset
│ Tweets-Classified-Topic-Results.csv - SenatorsTweets-OnlyCov.csv with cov classification results
├── cleanTweets.py Curates keywordlists
│ Merges senator and tweet datasets
│ Creates multiple datasets:
│ SenatorsTweets-Final.csv - all tweets with keyword columns
│ SenatorsTweets-OnlyCov.csv - only covid tweets, filtered by keywordlist
│ SenatorsTweets-Training.csv - training dataset, containing ~1800 randomly selected tweets from SenatorsTweets-OnlyCov.csv
├── collect.py scrapes tweets, saves to ALL-SENATORS-TWEETS.csv
├── collectSenData.py scrapes senator account data, saves to ALL-SENATORS.csv
├── createGraphs.py creates wordcloud & timeline graphs
├── preTestClassification.py pretest script that uses bvrau/covid-twitter-bert-v2-struth to analyze 100 preclassified tweets
├── profiler.py creates dataset profiles
├── README.md readme
├── trainFake.py training script for the fake tweet classification model
└── trainTopic.py training script for the tweet topic classification model
```

233
cleanTweets.py Normal file
View File

@ -0,0 +1,233 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 26 20:36:43 2023
@author: michael
"""
import pandas as pd
# import pyreadstat
import numpy as np
import sys
# Seet for training dataset generation
seed = 86431891
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Name of file that all senator data will be written to
senCSV = "ALL-SENATORS-TWEETS.csv"
# Name of file that all senator data will be written to
senDataset = "senators-raw.csv"
# Name of new datafile generated
senCSVc = "SenatorsTweets-Final"
senCSVcCov = "SenatorsTweets-OnlyCov"
senCSVcTrain = "SenatorsTweets-Training"
# don't change this one
senCSVPath = wd + ud + senCSV
senCSVcPath = wd + ud + senCSVc + ".csv"
senCSVcCovPath = wd + ud + senCSVcCov + ".csv"
senCSVcTrainPath = wd + ud + senCSVcTrain + ".csv"
senSAVcPath = wd + ud + senCSV + ".sav"
senDTAcPath = wd + ud + senCSV + ".dta"
senDatasetPath = wd + di + senDataset
df = pd.read_csv(senCSVPath, dtype=(object))
## Import own functions
funs = wd+"funs"
sys.path.insert(1, funs)
from ClearDupes import deDupe
mixed_columns = df.columns[df.nunique() != len(df)]
print(mixed_columns)
df = df.drop(columns=['user.url', 'cashtags', 'coordinates', 'hashtags', 'Unnamed: 0', 'user.verified', 'lang', 'renderedContent', 'retweetedTweet', 'sourceLabel', 'sourceUrl', 'source'], index=1)
del df[df.columns[0]] # remove first col
df['user.created'] = pd.to_datetime(df['user.created'])
df['date'] = pd.to_datetime(df['date'])
#%%
# sort and generate id
df = df.sort_values(by='date').reset_index() # sort df by date before generating id
df["tid"] = df.index + 1 # create id column
#%%
# move id column to front
cols = list(df.columns.values) # Make a list of all of the columns in the df
cols.pop(cols.index('tid')) # Remove id from list
#cols.pop(cols.index('user')) # Remove id from list
df = df[['tid']+cols] # Create new dataframe with ordered colums
#%%
###################
# Keywords
# read additional keywords from a file and write to list.
keywords = []
# Remove duplicate Keywords and save all non-duplicates to 'data/keywords.txt'
deDupe(f"{di}keywords-raw.txt", f"{di}keywords.txt")
# Read the keywords from a file
with open(f"{di}own_keywords.txt", "r") as file:
lines = file.readlines()
for line in lines:
keyword = line.strip() # Remove the newline character
keywords.append(keyword)
# write all keywords to file
with open(f"{di}keywords-raw.txt", "r") as file:
lines = file.readlines()
for line in lines:
keyword = line.strip() # Remove the newline character
keywords.append(keyword)
# delete keywords ppe and china that lead to too many false positives
removeWords = {'ppe', 'china'}
keywords = [x.lower() for x in keywords] # converts to lowercase which makes the search case insensitive. convert to set to speed up comparison
keywords = [item for item in keywords if item not in removeWords ] # removes words
with open(f"{di}keywords.txt", "w") as file:
print("read keyword files")
for line in keywords:
file.write(f'{line}\n')
# counter keywords
# Read the keywords from a file
counterKeywords = []
with open(f"{di}counterKeywords.txt", "r") as file:
lines = file.readlines()
for line in lines:
counterKeyword = line.strip() # Remove the newline character
counterKeywords.append(counterKeyword)
counterKeywords = set([x.lower() for x in counterKeywords]) # converts to lowercase which makes the search case insensitive. convert to set to speed up comparison
with open(f"{di}counterKeywordsFinal.txt", "w") as file:
print("read keyword files")
for line in counterKeywords:
file.write(f'{line}\n')
#%%
# overwrite keyword column
df['keywords'] = np.nan
df['keywords'] = (
df['rawContent'].str.lower().str.findall('|'.join(keywords)).str.join(',').replace('', np.nan) # str.lower to make search case-insensitive
)
df['counterKeywords'] = np.nan
df['counterKeywords'] = (
df['rawContent'].str.lower().str.findall('|'.join(counterKeywords)).str.join(',').replace('', np.nan) # str.lower to make search case-insensitive
)
#%%
# create boolean contains_keyword column
df['contains_keyword'] = True
df['contains_counterKeyword'] = True
mask = (df['keywords'].isna()) # select all values in contains_keyword == 'none'
df.loc[mask,'contains_keyword'] = False # set keywords = contains_keyword under the condition of mask
mask = (df['counterKeywords'].isna()) # select all values in contains_keyword == 'none'
df.loc[mask,'contains_counterKeyword'] = False # set keywords = contains_keyword under the condition of mask
#%%
pd.Series(df["user.id"]).is_unique
#%%
# Merge Datasets
# get senator data
cols = [
"name",
"id",
"state_short",
"party",
"class",
"ideology",
"start_serving",
"end_serving",
"time_in_office",
"not_in_office",
"last_congress",
"vote_share",
"next_closest_share",
"election_year",
"twitter_handle",
"alt_handle",
"date_of_birth",
"female",
"ethnicity",
"edu_level",
"edu_information",
"occup_level"]
dfSenA = pd.read_csv(senDatasetPath, index_col=False, sep = ",", usecols=cols).reset_index()
dfSenB = pd.read_csv(senDatasetPath, index_col=False, sep = ",", usecols=cols).reset_index()
dfSenA['alt'] = False
dfSenB['alt'] = True
dfSenA = dfSenA.rename(columns={'twitter_handle': 'user.username'})
dfSenB = dfSenB.rename(columns={'alt_handle': 'user.username'})
dfSenB = dfSenB.dropna(axis=0, subset=['user.username'])
dfSenA['user.username'] = dfSenA['user.username'].apply(str.lower)
dfSenB['user.username'] = dfSenB['user.username'].apply(str.lower)
df['user.username'] = df['user.username'].apply(str.lower)
dfSenAll = pd.concat([dfSenA, dfSenB]).reset_index()
# %%
# see if all senators are present in file
dfAll = df.merge(dfSenAll, how='left',on='user.username')
#check merge
unique_usernames = dfAll.loc[dfAll['name'].isnull(), 'user.username'].unique()
print(unique_usernames)
# senatorisakson was dropped, is ok
#%%
# create covidtweets csv
dfCov = dfAll[dfAll['contains_counterKeyword']==False]
dfCov = dfCov[dfCov['contains_keyword']==True]
dfCov = dfCov.drop(columns=['contains_counterKeyword', 'counterKeywords'])
#%%
# create column with tweet length
dfCov['tweetLen'] = dfCov['rawContent'].str.len().copy()
# reset df index and write to id column
dfCov.reset_index(drop=True, inplace=True)
#%%
# Export to csv, sav and dta
dfAll.to_csv(senCSVcPath, encoding='utf-8')
dfCov.to_csv(senCSVcCovPath, encoding='utf-8', index_label = 'id')
# pyreadstat.write_sav(df, senSAVcPath) # commented out because file generated is 11 gb
# =============================================================================
# dfAll.rename(columns=lambda x: x.replace('.', '_'), inplace=True)
# dfAllStata = dfAll.rename(columns={'class':'class_'})
# dfAllStata.to_stata(senDTAcPath, version=119, convert_strl=['alt'], convert_dates={'date': 'td', 'user_created': 'td'})
# print(dfAllStata.columns)
# ====================================================df.id.str.len().value_counts()
# =========================
# %%
# Create training dataset
np.random.seed(seed);
dfTrain = pd.dfCov(np.random.rand(1800))
# %%
# Create training dataset
np.random.seed(seed);
dfTrain = dfCov.loc[np.random.choice(dfCov.index, 1800, replace=False)]
dfTrain = dfTrain[['tid', 'date', 'rawContent']]
dfTrain['topicCovid'] = True
dfTrain['fake'] = False
dfTrain.to_csv(senCSVcTrainPath, encoding='utf-8')

View File

@ -1,960 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "15573d92-f6a7-49d4-9c01-fff33d23be8e",
"metadata": {},
"source": [
"# Tweet Collecting\n",
"## Requirements\n",
"- tweepy-4.14.0\n",
"- pandas-2.0\n",
"- numpy-1.24.3\n",
"\n",
"## Preparations & Config\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "3290c840-961c-4e2c-a107-4ccd541d151b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import tweepy\n",
"import pandas as pd\n",
"import numpy as np\n",
"import glob\n",
"import time\n",
"\n",
"# Define time period of interest\n",
"time_slices = [\n",
" {\n",
" \"start_time\": \"2020-01-01T00:00:00Z\",\n",
" \"end_time\": \"2020-06-01T00:00:00Z\",\n",
" \"suffix\": \"-slice1\"\n",
" },\n",
" {\n",
" \"start_time\": \"2020-06-01T00:00:01Z\",\n",
" \"end_time\": \"2021-01-01T00:00:00Z\",\n",
" \"suffix\": \"-slice2\"\n",
" },\n",
" {\n",
" \"start_time\": \"2021-01-01T00:00:01Z\",\n",
" \"end_time\": \"2021-06-01T00:00:00Z\",\n",
" \"suffix\": \"-slice3\"\n",
" },\n",
" {\n",
" \"start_time\": \"2021-06-01T00:00:01Z\",\n",
" \"end_time\": \"2023-01-03T00:00:00Z\",\n",
" \"suffix\": \"-slice4\"\n",
" }\n",
"]\n",
"\n",
"tweet_fields = [\n",
"\t\"id\",\n",
"\t\"text\",\n",
"\t\"attachments\",\n",
"\t\"author_id\",\n",
"\t\"context_annotations\",\n",
"\t\"conversation_id\",\n",
"\t\"created_at\",\n",
"\t\"entities\",\n",
"\t\"geo\",\n",
"\t\"lang\",\n",
"\t\"possibly_sensitive\",\n",
"\t\"public_metrics\",\n",
"\t\"referenced_tweets\",\n",
"\t\"reply_settings\",\n",
"\t\"source\",\n",
"\t\"withheld\",\n",
"\t]\n",
"\n",
"## Setup directories\n",
"# WD Michael\n",
"# wd = \"/home/michael/Documents/PS/Data/collectTweets/\"\n",
"\n",
"# WD Server\n",
"wd = \"/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection\"\n",
"\n",
"# WD Josie\n",
"# wd = \"/home/michael/Documents/PS/Data/\"\n",
"\n",
"# WD Sam\n",
"# wd = \"/home/michael/Documents/PS/Data/\"\n",
"\n",
"# Tweet-datafile directory\n",
"td = \"data/tweets/\""
]
},
{
"cell_type": "markdown",
"id": "6782290c-7e14-4393-8caa-c78a2b326d85",
"metadata": {},
"source": [
"# Authenticate to Twitter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7ac9b603-e638-4ebb-95df-e0f8678f298e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"## Setup Api-connection\n",
"bearer_token = \"AAAAAAAAAAAAAAAAAAAAAMVDlQEAAAAAal9f5uZrM12CVPA4f4jr4mGH5Oc%3DuTg1Vd0YKYMwraA7ibX6LiGyd337OXkm3JwudEX7vatruswmoc\"\n",
"client = tweepy.Client(bearer_token, return_type = dict, wait_on_rate_limit = True)"
]
},
{
"cell_type": "markdown",
"id": "e81c4d49-242c-4b51-8e2a-e2bbfdae6877",
"metadata": {},
"source": [
"## Import Keywords\n",
"Keywords from:\n",
"* Chen, E., Lerman, K., & Ferrara, E. (2020). Tracking Social Media Discourse About the COVID-19 Pandemic: Development of a Public Coronavirus Twitter Data Set. JMIR Public Health and Surveillance, 6(2), e19273. https://doi.org/10.2196/19273\n",
"Line 80 and following:\n",
"* Lamsal, R. (2020). Coronavirus (COVID-19) Tweets Dataset [Data set]. IEEE. https://ieee-dataport.org/open-access/coronavirus-covid-19-tweets-dataset"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1d4af102-30ae-4c73-ae9c-333efb34e3f1",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['Coronavirus',\n",
" 'Koronavirus',\n",
" 'Corona',\n",
" 'CDC',\n",
" 'Wuhancoronavirus',\n",
" 'Wuhanlockdown',\n",
" 'Ncov',\n",
" 'Wuhan',\n",
" 'N95',\n",
" 'Kungflu',\n",
" 'Epidemic',\n",
" 'outbreak',\n",
" 'Sinophobia',\n",
" 'China',\n",
" 'covid-19',\n",
" 'corona virus',\n",
" 'covid',\n",
" 'covid19',\n",
" 'sars-cov-2',\n",
" 'COVIDー19',\n",
" 'COVD',\n",
" 'pandemic',\n",
" 'coronapocalypse',\n",
" 'canceleverything',\n",
" 'Coronials',\n",
" 'SocialDistancingNow',\n",
" 'Social Distancing',\n",
" 'SocialDistancing',\n",
" 'panicbuy',\n",
" 'panic buy',\n",
" 'panicbuying',\n",
" 'panic buying',\n",
" '14DayQuarantine',\n",
" 'DuringMy14DayQuarantine',\n",
" 'panic shop',\n",
" 'panic shopping',\n",
" 'panicshop',\n",
" 'InMyQuarantineSurvivalKit',\n",
" 'panic-buy',\n",
" 'panic-shop',\n",
" 'coronakindness',\n",
" 'quarantinelife',\n",
" 'chinese virus',\n",
" 'chinesevirus',\n",
" 'stayhomechallenge',\n",
" 'stay home challenge',\n",
" 'sflockdown',\n",
" 'DontBeASpreader',\n",
" 'lockdown',\n",
" 'lock down',\n",
" 'shelteringinplace',\n",
" 'sheltering in place',\n",
" 'staysafestayhome',\n",
" 'stay safe stay home',\n",
" 'trumppandemic',\n",
" 'trump pandemic',\n",
" 'flattenthecurve',\n",
" 'flatten the curve',\n",
" 'china virus',\n",
" 'chinavirus',\n",
" 'quarentinelife',\n",
" 'PPEshortage',\n",
" 'saferathome',\n",
" 'stayathome',\n",
" 'stay at home',\n",
" 'stay home',\n",
" 'stayhome',\n",
" 'GetMePPE',\n",
" 'covidiot',\n",
" 'epitwitter',\n",
" 'pandemie',\n",
" 'wear a mask',\n",
" 'wearamask',\n",
" 'kung flu',\n",
" 'covididiot',\n",
" 'COVID__19',\n",
" 'omicron',\n",
" 'variant',\n",
" 'vaccine',\n",
" 'travel ban',\n",
" 'corona',\n",
" 'corona',\n",
" 'coronavirus',\n",
" 'coronavirus',\n",
" 'covid',\n",
" 'covid',\n",
" 'covid19',\n",
" 'covid19',\n",
" 'covid-19',\n",
" 'covid-19',\n",
" 'sarscov2',\n",
" 'sarscov2',\n",
" 'sars cov2',\n",
" 'sars cov 2',\n",
" 'covid_19',\n",
" 'covid_19',\n",
" 'ncov',\n",
" 'ncov',\n",
" 'ncov2019',\n",
" 'ncov2019',\n",
" '2019-ncov',\n",
" '2019-ncov',\n",
" 'pandemic',\n",
" 'pandemic 2019ncov',\n",
" '2019ncov',\n",
" 'quarantine',\n",
" 'quarantine',\n",
" 'flatten the curve',\n",
" 'flattening the curve',\n",
" 'flatteningthecurve',\n",
" 'flattenthecurve',\n",
" 'hand sanitizer',\n",
" 'handsanitizer',\n",
" 'lockdown',\n",
" 'lockdown',\n",
" 'social distancing',\n",
" 'socialdistancing',\n",
" 'work from home',\n",
" 'workfromhome',\n",
" 'working from home',\n",
" 'workingfromhome',\n",
" 'ppe',\n",
" 'n95',\n",
" 'ppe',\n",
" 'n95',\n",
" 'covidiots',\n",
" 'covidiots',\n",
" 'herd immunity',\n",
" 'herdimmunity',\n",
" 'pneumonia',\n",
" 'pneumonia',\n",
" 'chinese virus',\n",
" 'chinesevirus',\n",
" 'wuhan virus',\n",
" 'wuhanvirus',\n",
" 'kung flu',\n",
" 'kungflu',\n",
" 'wearamask',\n",
" 'wearamask',\n",
" 'wear a mask',\n",
" 'vaccine',\n",
" 'vaccines',\n",
" 'vaccine',\n",
" 'vaccines',\n",
" 'corona vaccine',\n",
" 'corona vaccines',\n",
" 'coronavaccine',\n",
" 'coronavaccines',\n",
" 'face shield',\n",
" 'faceshield',\n",
" 'face shields',\n",
" 'faceshields',\n",
" 'health worker',\n",
" 'healthworker',\n",
" 'health workers',\n",
" 'healthworkers',\n",
" 'stayhomestaysafe',\n",
" 'coronaupdate',\n",
" 'frontlineheroes',\n",
" 'coronawarriors',\n",
" 'homeschool',\n",
" 'homeschooling',\n",
" 'hometasking',\n",
" 'masks4all',\n",
" 'wfh',\n",
" 'wash ur hands',\n",
" 'wash your hands',\n",
" 'washurhands',\n",
" 'washyourhands',\n",
" 'stayathome',\n",
" 'stayhome',\n",
" 'selfisolating',\n",
" 'self isolating']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"keywords = []\n",
"\n",
"# Read the keywords from a file\n",
"with open(\"data/keywords.txt\", \"r\") as file:\n",
" lines = file.readlines()\n",
" for line in lines:\n",
" keyword = line.strip() # Remove the newline character\n",
" keywords.append(keyword)\n",
"\n",
"keywords"
]
},
{
"cell_type": "markdown",
"id": "9f190608-c0a2-4e7e-9560-a03a57aa4132",
"metadata": {},
"source": [
"## Import Accounts"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a5bde33c-cc69-43ad-9b0c-4b04ce7f8a3c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['SenAlexander', 'SenatorEnzi', 'CoryGardner', 'VP', 'SenatorIsakson', 'DougJones', 'KLoeffler', 'MarthaMcSallyAZ', 'DavidPerdueGA', 'SenPatRoberts', 'SenatorTomUdall', 'SenatorBaldwin', 'SenJohnBarrasso', 'SenatorBennet', 'MarshaBlackburn', 'SenBlumenthal', 'RoyBlunt', 'senbooker', 'JohnBoozman', 'SenatorBraun', 'SenSherrodBrown', 'SenatorBurr', 'SenatorCantwell', 'SenCapito', 'SenatorCardin', 'SenatorCarper', 'SenBobCasey', 'SenBillCassidy', 'SenatorCollins', 'ChrisCoons', 'JohnCornyn', 'SenCortezMasto', 'SenTomCotton', 'SenKevinCramer', 'MikeCrapo', 'SenTedCruz', 'SteveDaines', 'SenDuckworth', 'SenatorDurbin', 'SenJoniErnst', 'SenFettermanPA', 'SenFeinstein', 'SenatorFischer', 'SenGillibrand', 'LindseyGrahamSC', 'ChuckGrassley', 'SenatorHagerty', 'SenatorHassan', 'HawleyMO', 'MartinHeinrich', 'SenatorHick', 'maziehirono', 'SenJohnHoeven', 'SenHydeSmith', 'JimInhofe', 'SenRonJohnson', 'timkaine', 'SenMarkKelly', 'SenJohnKennedy', 'SenAngusKing', 'SenAmyKlobuchar', 'SenatorLankford', 'SenatorLeahy', 'SenMikeLee', 'SenatorLujan', 'SenLummis', 'Sen_JoeManchin', 'SenMarkey', 'SenatorMarshall', 'LeaderMcConnell', 'SenatorMenendez', 'SenJeffMerkley', 'JerryMoran', 'lisamurkowski', 'ChrisMurphyCT', 'PattyMurray', 'SenOssoff', 'SenAlexPadilla', 'senrandpaul', 'SenGaryPeters', 'senrobportman', 'SenJackReed', 'SenatorRisch', 'SenatorRomney', 'SenJackyRosen', 'SenatorRounds', 'senmarcorubio', 'SenSanders', 'sensasse', 'brianschatz', 'SenSchumer', 'SenRickScott', 'SenatorTimScott', 'SenatorShaheen', 'SenShelby', 'SenatorSinema', 'SenTinaSmith', 'SenStabenow', 'SenDanSullivan', 'SenatorTester', 'SenJohnThune', 'SenThomTillis', 'SenToomey', 'SenTuberville', 'ChrisVanHollen', 'MarkWarner', 'SenatorWarnock', 'ewarren', 'SenWhitehouse', 'SenatorWicker', 'RonWyden', 'SenToddYoung']\n",
"['LamarAlexander ', nan, 'corygardner', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]\n"
]
}
],
"source": [
"# Get accounts & alt-accounts from Senators-Datafile\n",
"accounts = pd.read_csv(\"data/senators-raw.csv\")[\"twitter_handle\"].tolist()\n",
"alt_accounts = pd.read_csv(\"data/senators-raw.csv\")[\"alt_handle\"].tolist()\n",
"print(accounts)\n",
"print(alt_accounts)"
]
},
{
"cell_type": "markdown",
"id": "befc0fad-c803-4145-a041-570d6f894178",
"metadata": {},
"source": [
"## Collect Tweets\n",
"Loops over accounts:\n",
"* Collects Tweets of account. \n",
"* Then extracts columns public_metrics (likes aso) and referenced_tweets (indicates, whether tweet is a reply).\n",
"* Checks if tweet-text contains any of the keywords, if so, inserts the keyword(s) in a new column.\n",
"* Saves tweets of the account in a csv file \"HANDLE.csv\" and \"HANDLE-LONG.csv\" (LONG contains all given information such as annotations, that we might or might not need)\n",
"\n",
"### Problem:\n",
"_I limited the results to 20 tweets per senator._\n",
"Twitter has the following API Limit for the [search_all_tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all) method I used: \n",
"* App rate limit (Application-only): 300 requests per 15-minute window shared among all users of your app\n",
"* App rate limit (Application-only): 1 per second shared among all users of your app\n",
"\n",
"With a limit of 300, I request 20 posts per slice, just to get a better understanding of what's happening. After trying different things out, I think that the time-slices won't be needed if we get around the problem I'm having right now:\n",
"as soon, as the rate limit is reached, tweepy stops and waits for the time to run out and start again. BUT it doesn't retry the request but starts with the next request. \n",
"I haven't found anything and my only idea to solve the problem was to generate a list of failed attempts (via try and except) and after getting all tweets letting tweepy work over that list again. \n",
"One more thing I don't understand is that, when fetching the tweets I already sent to you, I didn't have as many problems as now and the limit exceeded after 3-4 senators, even though I used a higher `max_result` and a higher `flatten value`.\n",
"\n",
"I hope that the following output speaks for itself:\n",
"```\n",
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for SenatorEnzi-slice4\n",
"\n",
"Rate limit exceeded. Sleeping for 893 seconds.\n",
"```\n",
"\n",
"Tweepy returned no tweets because of the exceeded tweet limit, then the script tried to fetch more tweets and the error message came up.\n",
"Before changing the code below, see the other version i wrote just below the next cell (and ignore the error message below the cell as i just interrupted the execution which lead to the error message)."
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "0f842b8a-846a-4f38-8231-c1e9ccfbddf5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"<generator object Paginator.flatten at 0x7f20ebf137b0>\n",
"trying to fetch tweets for SenatorEnzi-slice4\n",
"trying to fetch tweets for CoryGardner-slice1\n",
"trying to fetch tweets for CoryGardner-slice2\n",
"trying to fetch tweets for CoryGardner-slice3\n",
"return empty in CoryGardner-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"<generator object Paginator.flatten at 0x7f20ebf13740>\n",
"trying to fetch tweets for CoryGardner-slice4\n",
"trying to fetch tweets for VP-slice1\n",
"trying to fetch tweets for VP-slice2\n",
"trying to fetch tweets for VP-slice3\n",
"trying to fetch tweets for VP-slice4\n",
"trying to fetch tweets for SenatorIsakson-slice1\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[28], line 30\u001b[0m\n\u001b[1;32m 22\u001b[0m tweets \u001b[38;5;241m=\u001b[39m tweepy\u001b[38;5;241m.\u001b[39mPaginator(client\u001b[38;5;241m.\u001b[39msearch_all_tweets,\n\u001b[1;32m 23\u001b[0m query\u001b[38;5;241m=\u001b[39mquery,\n\u001b[1;32m 24\u001b[0m tweet_fields\u001b[38;5;241m=\u001b[39mtweet_fields,\n\u001b[1;32m 25\u001b[0m start_time\u001b[38;5;241m=\u001b[39mstart_time,\n\u001b[1;32m 26\u001b[0m end_time\u001b[38;5;241m=\u001b[39mend_time,\n\u001b[1;32m 27\u001b[0m max_results\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m)\u001b[38;5;241m.\u001b[39mflatten(\u001b[38;5;241m20\u001b[39m)\n\u001b[1;32m 29\u001b[0m \u001b[38;5;66;03m# for each tweet returned...\u001b[39;00m\n\u001b[0;32m---> 30\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tweet \u001b[38;5;129;01min\u001b[39;00m tweets:\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# ... add that tweet to tweetlist\u001b[39;00m\n\u001b[1;32m 32\u001b[0m tweetlist\u001b[38;5;241m.\u001b[39mappend(tweet)\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\u001b[39;00m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:67\u001b[0m, in \u001b[0;36mPaginator.flatten\u001b[0;34m(self, limit)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 66\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m response \u001b[38;5;129;01min\u001b[39;00m PaginationIterator(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmethod, \u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs\n\u001b[1;32m 69\u001b[0m ):\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n\u001b[1;32m 71\u001b[0m response_data \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;129;01mor\u001b[39;00m []\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:126\u001b[0m, in \u001b[0;36mPaginationIterator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpagination_token\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m pagination_token\n\u001b[0;32m--> 126\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n\u001b[1;32m 129\u001b[0m meta \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mmeta\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:1163\u001b[0m, in \u001b[0;36mClient.search_all_tweets\u001b[0;34m(self, query, **params)\u001b[0m\n\u001b[1;32m 1071\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"search_all_tweets( \\\u001b[39;00m\n\u001b[1;32m 1072\u001b[0m \u001b[38;5;124;03m query, *, end_time=None, expansions=None, max_results=None, \\\u001b[39;00m\n\u001b[1;32m 1073\u001b[0m \u001b[38;5;124;03m media_fields=None, next_token=None, place_fields=None, \\\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1160\u001b[0m \u001b[38;5;124;03m.. _pagination: https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/paginate\u001b[39;00m\n\u001b[1;32m 1161\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1162\u001b[0m params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m query\n\u001b[0;32m-> 1163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1164\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGET\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/2/tweets/search/all\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1165\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1166\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mend_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexpansions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_results\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmedia.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1167\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnext_token\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mplace.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpoll.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mquery\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1168\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msince_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msort_order\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstart_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtweet.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1169\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muntil_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 1170\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mTweet\u001b[49m\n\u001b[1;32m 1171\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:129\u001b[0m, in \u001b[0;36mBaseClient._make_request\u001b[0;34m(self, method, route, params, endpoint_parameters, json, data_type, user_auth)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_make_request\u001b[39m(\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m, method, route, params\u001b[38;5;241m=\u001b[39m{}, endpoint_parameters\u001b[38;5;241m=\u001b[39m(), json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 125\u001b[0m data_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, user_auth\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 126\u001b[0m ):\n\u001b[1;32m 127\u001b[0m request_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_params(params, endpoint_parameters)\n\u001b[0;32m--> 129\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroute\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muser_auth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_auth\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_type \u001b[38;5;129;01mis\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mResponse:\n\u001b[1;32m 133\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:84\u001b[0m, in \u001b[0;36mBaseClient.request\u001b[0;34m(self, method, route, params, json, user_auth)\u001b[0m\n\u001b[1;32m 75\u001b[0m headers[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAuthorization\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBearer \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbearer_token\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 77\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 78\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMaking API request: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhost\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mroute\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 79\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mParameters: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparams\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHeaders: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mheaders\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBody: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjson\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 82\u001b[0m )\n\u001b[0;32m---> 84\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhost\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mroute\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\n\u001b[1;32m 87\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m response:\n\u001b[1;32m 88\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReceived API response: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mreason\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHeaders: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mheaders\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mContent: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 93\u001b[0m )\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/urllib3/connectionpool.py:790\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 787\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 790\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 791\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 792\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 793\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m 806\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/urllib3/connection.py:454\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 457\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n",
"File \u001b[0;32m/usr/lib/python3.9/http/client.py:1347\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1345\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1346\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1347\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1348\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
"File \u001b[0;32m/usr/lib/python3.9/http/client.py:307\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 307\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n",
"File \u001b[0;32m/usr/lib/python3.9/http/client.py:268\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 268\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 270\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m/usr/lib/python3.9/socket.py:704\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 704\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"File \u001b[0;32m/usr/lib/python3.9/ssl.py:1241\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1238\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1241\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1242\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
"File \u001b[0;32m/usr/lib/python3.9/ssl.py:1099\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1099\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# Iterate over each Twitter account\n",
"for handle in accounts:\n",
" for slice_data in time_slices:\n",
" # sleep 1 second to not get over 1sec api limit\n",
" time.sleep(1) \n",
" # define slice data variables from time_slices\n",
" start_time = slice_data['start_time']\n",
" end_time = slice_data['end_time']\n",
" suffix = slice_data['suffix']\n",
" \n",
" # define tweepy query with twitter handle of current sen\n",
" query = f'from:{handle} -is:retweet'\n",
" \n",
" # create empty tweetlist that will be filled with tweets of current sen\n",
" tweetlist = []\n",
" \n",
" # statusmsg\n",
" msg = f'trying to fetch tweets for {handle}{suffix}'\n",
" print(msg)\n",
" \n",
" # Fetch tweets using tweepy Twitter API v2 pagination\n",
" tweets = tweepy.Paginator(client.search_all_tweets,\n",
" query=query,\n",
" tweet_fields=tweet_fields,\n",
" start_time=start_time,\n",
" end_time=end_time,\n",
" max_results=20).flatten(20)\n",
" \n",
" # for each tweet returned...\n",
" for tweet in tweets:\n",
" # ... add that tweet to tweetlist\n",
" tweetlist.append(tweet)\n",
" \n",
" # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\n",
" if len(tweetlist) == 0:\n",
" msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'\n",
" print(msg)\n",
" print(tweets)\n",
" continue\n",
" \n",
" # convert to dataframe\n",
" tweet_df = pd.DataFrame(tweetlist)\n",
" \n",
" # add handle column as api only provides user-ids\n",
" tweet_df['handle'] = handle\n",
" \n",
" ## Extract referenced_tweet info from column\n",
" tweet_df['referenced_tweet_type'] = None\n",
" tweet_df['referenced_tweet_id'] = None\n",
" \n",
" # if cond. because in some cases column doesn't exist\n",
" if 'referenced_tweets' in tweet_df.columns:\n",
" for index, row in tweet_df.iterrows():\n",
" referenced_tweets = row['referenced_tweets']\n",
" \n",
" if isinstance(referenced_tweets, list) and len(referenced_tweets) > 0:\n",
" referenced_tweet = referenced_tweets[0]\n",
" referenced_tweet_type = referenced_tweet['type']\n",
" referenced_tweet_id = referenced_tweet['id']\n",
" \n",
" tweet_df.at[index, 'referenced_tweet_type'] = referenced_tweet_type\n",
" tweet_df.at[index, 'referenced_tweet_id'] = referenced_tweet_id\n",
" \n",
" ## Check if tweet-text contains keyword\n",
" # if cond. because in some cases column doesn't exist\n",
" if 'text' in tweet_df.columns:\n",
" tweet_df['contains_keyword'] = (tweet_df['text'].str.findall('|'.join(keywords))\n",
" .str.join(',')\n",
" .replace('', 'none'))\n",
" \n",
" ## Save two versions of the dataset, one with all fields and one without dict fields\n",
" # define filepaths\n",
" csv_path = f'data/tweets/{handle}{suffix}.csv'\n",
" csv_path2 = f'data/tweets/{handle}{suffix}-LONG.csv'\n",
" # save LONG csv\n",
" tweet_df.to_csv(csv_path2)\n",
" # Remove 'context_annotations', 'entities' and 'referenced_tweets' columns for short csv files\n",
" # if cond. because in some cases column doesn't exist\n",
" if all(k in tweet_df for k in ('context_annotations', 'entities', 'referenced_tweets')):\n",
" tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)\n",
" # save short csv\n",
" tweet_df.to_csv(csv_path)"
]
},
{
"cell_type": "markdown",
"id": "cb779d9a-cecb-475c-9e76-22c9b8c1928d",
"metadata": {},
"source": [
"## Alternative way to fetch tweets via tweepy with retry mechanism"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "c3b4a2ba-46e2-478b-9558-7d6999fdcd69",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for SenatorEnzi-slice4\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Rate limit exceeded. Sleeping for 437 seconds.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for CoryGardner-slice1\n",
"trying to fetch tweets for CoryGardner-slice2\n",
"trying to fetch tweets for CoryGardner-slice3\n",
"return empty in CoryGardner-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for CoryGardner-slice4\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Rate limit exceeded. Sleeping for 897 seconds.\n"
]
},
{
"ename": "AttributeError",
"evalue": "module 'tweepy' has no attribute 'TweepError'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[21], line 33\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# for each tweet returned...\u001b[39;00m\n\u001b[0;32m---> 33\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tweet \u001b[38;5;129;01min\u001b[39;00m tweets:\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# ... add that tweet to tweetlist\u001b[39;00m\n\u001b[1;32m 35\u001b[0m tweetlist\u001b[38;5;241m.\u001b[39mappend(tweet)\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:67\u001b[0m, in \u001b[0;36mPaginator.flatten\u001b[0;34m(self, limit)\u001b[0m\n\u001b[1;32m 66\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m response \u001b[38;5;129;01min\u001b[39;00m PaginationIterator(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmethod, \u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs\n\u001b[1;32m 69\u001b[0m ):\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/pagination.py:126\u001b[0m, in \u001b[0;36mPaginationIterator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpagination_token\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m pagination_token\n\u001b[0;32m--> 126\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, Response):\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:1163\u001b[0m, in \u001b[0;36mClient.search_all_tweets\u001b[0;34m(self, query, **params)\u001b[0m\n\u001b[1;32m 1162\u001b[0m params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m query\n\u001b[0;32m-> 1163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1164\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGET\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/2/tweets/search/all\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1165\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1166\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mend_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexpansions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_results\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmedia.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1167\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnext_token\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mplace.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpoll.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mquery\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1168\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msince_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msort_order\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstart_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtweet.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1169\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muntil_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser.fields\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 1170\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mTweet\u001b[49m\n\u001b[1;32m 1171\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:129\u001b[0m, in \u001b[0;36mBaseClient._make_request\u001b[0;34m(self, method, route, params, endpoint_parameters, json, data_type, user_auth)\u001b[0m\n\u001b[1;32m 127\u001b[0m request_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_params(params, endpoint_parameters)\n\u001b[0;32m--> 129\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroute\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muser_auth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_auth\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_type \u001b[38;5;129;01mis\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mResponse:\n",
"File \u001b[0;32m/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/.venv/lib/python3.9/site-packages/tweepy/client.py:112\u001b[0m, in \u001b[0;36mBaseClient.request\u001b[0;34m(self, method, route, params, json, user_auth)\u001b[0m\n\u001b[1;32m 108\u001b[0m log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 109\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRate limit exceeded. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSleeping for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msleep_time\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m seconds.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 111\u001b[0m )\n\u001b[0;32m--> 112\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[43msleep_time\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest(method, route, params, json, user_auth)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[21], line 39\u001b[0m\n\u001b[1;32m 35\u001b[0m tweetlist\u001b[38;5;241m.\u001b[39mappend(tweet)\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m \u001b[38;5;66;03m# exit the retry loop if tweets are successfully fetched\u001b[39;00m\n\u001b[0;32m---> 39\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[43mtweepy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTweepError\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 40\u001b[0m \u001b[38;5;66;03m# handle rate limit exceeded error\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m e\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m429\u001b[39m:\n\u001b[1;32m 42\u001b[0m \u001b[38;5;66;03m# get the rate limit reset time from the response headers\u001b[39;00m\n\u001b[1;32m 43\u001b[0m reset_time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(e\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mheaders[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx-rate-limit-reset\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
"\u001b[0;31mAttributeError\u001b[0m: module 'tweepy' has no attribute 'TweepError'"
]
}
],
"source": [
"# Iterate over each Twitter account\n",
"for handle in accounts:\n",
" for slice_data in time_slices:\n",
" # define slice data variables from time_slices\n",
" start_time = slice_data['start_time']\n",
" end_time = slice_data['end_time']\n",
" suffix = slice_data['suffix']\n",
" \n",
" # define tweepy query with twitter handle of current sen\n",
" query = f'from:{handle} -is:retweet'\n",
" \n",
" # create empty tweetlist that will be filled with tweets of current sen\n",
" tweetlist = []\n",
" \n",
" # statusmsg\n",
" msg = f'trying to fetch tweets for {handle}{suffix}'\n",
" print(msg)\n",
" \n",
" # Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism\n",
" max_attempts = 3 # maximum number of attempts to fetch tweets for a slice\n",
" attempt = 1\n",
" \n",
" while attempt <= max_attempts:\n",
" try:\n",
" tweets = tweepy.Paginator(client.search_all_tweets,\n",
" query=query,\n",
" tweet_fields=tweet_fields,\n",
" start_time=start_time,\n",
" end_time=end_time,\n",
" max_results=20).flatten(20)\n",
" \n",
" # for each tweet returned...\n",
" for tweet in tweets:\n",
" # ... add that tweet to tweetlist\n",
" tweetlist.append(tweet)\n",
" \n",
" break # exit the retry loop if tweets are successfully fetched\n",
" \n",
" except tweepy.TweepError as e:\n",
" # handle rate limit exceeded error\n",
" if e.response.status_code == 429:\n",
" # get the rate limit reset time from the response headers\n",
" reset_time = int(e.response.headers['x-rate-limit-reset'])\n",
" current_time = int(time.time())\n",
" \n",
" # calculate the sleep time until the rate limit resets\n",
" sleep_time = reset_time - current_time + 1 # add an extra second\n",
" \n",
" # sleep until the rate limit resets\n",
" time.sleep(sleep_time)\n",
" \n",
" attempt += 1 # increment the attempt counter\n",
" continue # retry the API call\n",
" \n",
" else:\n",
" # handle other types of Tweepy errors\n",
" print(f'Error occurred: {e}')\n",
" break\n",
" \n",
" # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\n",
" if len(tweetlist) == 0:\n",
" msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'\n",
" print(msg)\n",
" continue\n",
" \n",
" # convert to dataframe\n",
" tweet_df = pd.DataFrame(tweetlist)\n",
" \n",
" # add handle column as api only provides user-ids\n",
" tweet_df['handle'] = handle\n",
" \n",
" ## Extract referenced_tweet info from column\n",
" tweet_df['referenced_tweet_type'] = None\n",
" tweet_df['referenced_tweet_id'] = None\n",
" \n",
" # if cond. because in some cases column doesn't exist\n",
" if 'referenced_tweets' in tweet_df.columns:\n",
" for index, row in tweet_df.iterrows():\n",
" referenced_tweets = row['referenced_tweets']\n",
" \n",
" if isinstance(referenced_tweets, list) and len(referenced_tweets) > 0:\n",
" referenced_tweet = referenced_tweets[0]\n",
" referenced_tweet_type = referenced_tweet['type']\n",
" referenced_tweet_id = referenced_tweet['id']\n",
" \n",
" tweet_df.at[index, 'referenced_tweet_type'] = referenced_tweet_type\n",
" tweet_df.at[index, 'referenced_tweet_id'] = referenced_tweet_id\n",
" \n",
" ## Check if tweet-text contains keyword\n",
" # if cond. because in some cases column doesn't exist\n",
" if 'text' in tweet_df.columns:\n",
" tweet_df['contains_keyword'] = (tweet_df['text'].str.findall('|'.join(keywords))\n",
" .str.join(',')\n",
" .replace('', 'none'))\n",
" \n",
" ## Save two versions of the dataset, one with all fields and one without dict fields\n",
" # define filepaths\n",
" csv_path = f'data/tweets/{handle}{suffix}.csv'\n",
" csv_path2 = f'data/tweets/{handle}{suffix}-LONG.csv'\n",
" # save LONG csv\n",
" tweet_df.to_csv(csv_path2)\n",
" # Remove 'context_annotations', 'entities' and 'referenced_tweets' columns for short csv files\n",
" # if cond. because in some cases column doesn't exist\n",
" if all(k in tweet_df for k in ('context_annotations', 'entities', 'referenced_tweets')):\n",
" tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)\n",
" # save short csv\n",
" tweet_df.to_csv(csv_path)\n",
" \n",
" # sleep 1 second to not exceed the API rate limit\n",
" time.sleep(1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5dd5498-1ba4-4f0a-9bb9-ffce4655212d",
"metadata": {},
"outputs": [],
"source": [
"path_to_tweetdfs = wd + td\n",
"os.chdir(path_to_tweetdfs)\n",
"tweetfiles = glob.glob('*.{}'.format(\"csv\"))\n",
"\n",
"print(tweetfiles)\n",
"\n",
"# save merged csv as two files \n",
"df_all_senators = pd.DataFrame()\n",
"df_all_senators_long = pd.DataFrame()\n",
"for file in tweetfiles:\n",
"\tif \"LONG\" in file:\n",
"\t\tdf = pd.read_csv(file)\n",
"\t\tdf_all_senators_long = pd.concat([df, df_all_senators_long])\n",
"\telse:\n",
"\t\tdf = pd.read_csv(file)\n",
"\t\tdf_all_senators = pd.concat([df, df_all_senators])\n",
"csv_path = td + \"ALL-SENATORS.csv\"\n",
"csv_path2 = td + \"ALL-SENATORS-LONG-LONG.csv\"\n",
"df_all_senators.to_csv(csv_path) \n",
"df_all_senators_long.to_csv(csv_path2)\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "320ebbf4-8eaf-4189-836b-5d5aa8a0a263",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying to fetch tweets for SenAlexander-slice1\n",
"trying to fetch tweets for SenAlexander-slice2\n",
"trying to fetch tweets for SenAlexander-slice3\n",
"trying to fetch tweets for SenAlexander-slice4\n",
"trying to fetch tweets for SenatorEnzi-slice1\n",
"trying to fetch tweets for SenatorEnzi-slice2\n",
"trying to fetch tweets for SenatorEnzi-slice3\n",
"return empty in SenatorEnzi-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for SenatorEnzi-slice4\n",
"trying to fetch tweets for CoryGardner-slice1\n",
"trying to fetch tweets for CoryGardner-slice2\n",
"trying to fetch tweets for CoryGardner-slice3\n",
"return empty in CoryGardner-slice3 - from 2021-01-01T00:00:01Z to 2021-06-01T00:00:00Z\n",
"trying to fetch tweets for CoryGardner-slice4\n",
"trying to fetch tweets for VP-slice1\n",
"trying to fetch tweets for VP-slice2\n",
"trying to fetch tweets for VP-slice3\n",
"trying to fetch tweets for VP-slice4\n",
"trying to fetch tweets for SenatorIsakson-slice1\n",
"trying to fetch tweets for SenatorIsakson-slice2\n",
"trying to fetch tweets for SenatorIsakson-slice3\n",
"trying to fetch tweets for SenatorIsakson-slice4\n",
"trying to fetch tweets for DougJones-slice1\n",
"trying to fetch tweets for DougJones-slice2\n",
"trying to fetch tweets for DougJones-slice3\n",
"trying to fetch tweets for DougJones-slice4\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[24], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m handle \u001b[38;5;129;01min\u001b[39;00m accounts:\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m slice_data \u001b[38;5;129;01min\u001b[39;00m time_slices:\n\u001b[0;32m----> 4\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1.01\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# define slice data variables from time_slices\u001b[39;00m\n\u001b[1;32m 6\u001b[0m start_time \u001b[38;5;241m=\u001b[39m slice_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstart_time\u001b[39m\u001b[38;5;124m'\u001b[39m]\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# Iterate over each Twitter account\n",
"for handle in accounts:\n",
" for slice_data in time_slices:\n",
" time.sleep(1.01)\n",
" # define slice data variables from time_slices\n",
" start_time = slice_data['start_time']\n",
" end_time = slice_data['end_time']\n",
" suffix = slice_data['suffix']\n",
" \n",
" # define tweepy query with twitter handle of current sen\n",
" query = f'from:{handle} -is:retweet'\n",
" \n",
" # create empty tweetlist that will be filled with tweets of current sen\n",
" tweetlist = []\n",
" \n",
" # statusmsg\n",
" msg = f'trying to fetch tweets for {handle}{suffix}'\n",
" print(msg)\n",
" \n",
" # Fetch tweets using tweepy Twitter API v2 pagination with retry mechanism\n",
" max_attempts = 3 # maximum number of attempts to fetch tweets for a slice\n",
" attempt = 1\n",
" \n",
" while attempt <= max_attempts:\n",
" try:\n",
" tweets = tweepy.Paginator(client.search_all_tweets,\n",
" query=query,\n",
" tweet_fields=tweet_fields,\n",
" start_time=start_time,\n",
" end_time=end_time,\n",
" max_results=20).flatten(20)\n",
" \n",
" # for each tweet returned...\n",
" for tweet in tweets:\n",
" # ... add that tweet to tweetlist\n",
" tweetlist.append(tweet)\n",
" \n",
" # Check if no tweets fetched for the current time slice. If there are no tweets, skip to next time_slices loop iteration\n",
" if len(tweetlist) == 0:\n",
" msg = f'return empty in {handle}{suffix} - from {start_time} to {end_time}'\n",
" print(msg)\n",
" break\n",
" \n",
" # convert to dataframe\n",
" tweet_df = pd.DataFrame(tweetlist)\n",
" \n",
" # add handle column as API only provides user-ids\n",
" tweet_df['handle'] = handle\n",
" \n",
" ## Extract referenced_tweet info from column\n",
" tweet_df['referenced_tweet_type'] = None\n",
" tweet_df['referenced_tweet_id'] = None\n",
" \n",
" # if cond. because in some cases column doesn't exist\n",
" if 'referenced_tweets' in tweet_df.columns:\n",
" for index, row in tweet_df.iterrows():\n",
" referenced_tweets = row['referenced_tweets']\n",
" \n",
" if isinstance(referenced_tweets, list) and len(referenced_tweets) > 0:\n",
" referenced_tweet = referenced_tweets[0]\n",
" referenced_tweet_type = referenced_tweet['type']\n",
" referenced_tweet_id = referenced_tweet['id']\n",
" \n",
" tweet_df.at[index, 'referenced_tweet_type'] = referenced_tweet_type\n",
" tweet_df.at[index, 'referenced_tweet_id'] = referenced_tweet_id\n",
" \n",
" ## Check if tweet-text contains keyword\n",
" # if cond. because in some cases column doesn't exist\n",
" if 'text' in tweet_df.columns:\n",
" tweet_df['contains_keyword'] = (tweet_df['text'].str.findall('|'.join(keywords))\n",
" .str.join(',')\n",
" .replace('', 'none'))\n",
" \n",
" ## Save two versions of the dataset, one with all fields and one without dict fields\n",
" # define filepaths\n",
" csv_path = f'data/tweets/{handle}{suffix}.csv'\n",
" csv_path2 = f'data/tweets/{handle}{suffix}-LONG.csv'\n",
" # save LONG csv\n",
" tweet_df.to_csv(csv_path2)\n",
" # Remove 'context_annotations', 'entities', and 'referenced_tweets' columns for short csv files\n",
" # if cond. because in some cases column doesn't exist\n",
" if all(k in tweet_df for k in ('context_annotations', 'entities', 'referenced_tweets')):\n",
" tweet_df = tweet_df.drop(['context_annotations', 'entities', 'referenced_tweets'], axis=1)\n",
" # save short csv\n",
" tweet_df.to_csv(csv_path)\n",
" \n",
" # break out of the retry loop since fetching tweets was successful\n",
" break\n",
" \n",
" except tweepy.TweepError as e:\n",
" if e.response.status_code == 429: # rate limit exceeded\n",
" reset_time = int(e.response.headers['x-rate-limit-reset'])\n",
" wait_time = reset_time - time.time() + 5 # add additional 5 seconds as buffer\n",
" \n",
" print(f\"Rate limit exceeded. Sleeping for {wait_time} seconds.\")\n",
" time.sleep(wait_time)\n",
" \n",
" attempt += 1 # increment the attempt counter\n",
" else:\n",
" print(f\"Error occurred: {e}\")\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48688858-104d-4f2f-87b8-ed103f34b4e8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Politics & Society",
"language": "python",
"name": "polsoc"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
},
"toc-autonumbering": true,
"toc-showmarkdowntxt": false
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -4,9 +4,12 @@ Created on Thu Jun 8 01:08:21 2023
@author: Michael
Following files are necessary:
config.py
Used to configure everything that's needed for this script.
collect.py scrapes tweets from senators of the us that were in office between
2020 and the beginning of 2023.
# https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
# Following files are necessary:
funs/TimeSlice.py
Function get_Tslices slices the defined timespan in config.py into N
slices. Is necessary due to possible blocking of requests by twitter.
@ -15,26 +18,34 @@ Following files are necessary:
Function deDupe reads each line of inFile and removes duplicate lines.
A file outFile is saved without the duplicate lines. Generates
"keywords.txt".
data/keywords-raw.txt
funs/Scrape.py
scrapes using snscrape.modules.twitter. See docstring.
data/IN/keywords-raw.txt
Contains all keywords that are used to detect whether a tweet contains
information about Covid19.
data/senators-raw.csv
data/IN/senators-raw.csv
Contains the senator dataset converted to csv. Is used to get the
account-names of all senators twitter accounts.
Requirements:
# Requirements:
- snscrape 0.6.2.20230321+
- pandas 2.0+
The script will first import needed libraries.
# IMPORTANT:
This script uses snscrape Version 0.6.2.20230321.dev50+g0d824ab which is
included in 'snscrape/' as a git repository for better reproducibility. Earlier
versions of snscrape will most likely fail to scrape all tweets because of
certain rate limits or other errors that may occur.
config.py will check whether snscrape is already installed. If not, it will try
to install the included version automatically.
Install snscrape from local git repo to make shure that it fits the used version.
If snscrape is shall be installed from local repo, uncomment the following lines:
How to use:
- To run the script, first adjust the config.py file.
import subprocess
os.chdir('snscrape/')
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.'])
os.chdir(wd)
# How to use:
- To run the script, first adjust the options found in the following lines.
- config.py will check whether snscrape is already installed. If not, it will try
to install the included version automatically.
- run the script
@ -55,26 +66,34 @@ which is the final output.
import os
import pandas as pd
import glob
import time
import sys
from datetime import datetime
import concurrent.futures
## Setup directories
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
td = "data/tweets/"
td = "data/OUT/"
# Name of file that all tweets will be written to
file_alltweets = "ALL-SENATORS-TWEETS.csv"
# don't change this one
path_to_tweetdfs = wd + td
## Define Timespan
# Name of logfile
logfile = f"{wd}log/log_"
###################
# Define Timespan & time-format
# Format: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
ts_beg = "2020-01-01T00:00:00Z" # start of scraping
ts_end = "2023-01-03T00:00:00Z" # end of straping
@ -86,19 +105,9 @@ fTimeFormat = "%Y-%m-%d_%H-%M-%S"
# Maximum tweets to be scraped by snscrape. Can be left untouched.
maxTweets = 5000
# Name of logfile
logfile = wd+"log/log_"
## Install snscrape from local git repo to make shure that it fits the used version.
# If snscrape is already installed, uncomment the following lines:
"""
import subprocess
os.chdir('snscrape/')
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.'])
os.chdir(wd)
"""
# Columns for tweet dataframe
# Columns for tweet dataframe. Parameters for snscrape.modules.twitter.Tweet:
# https://thetechrobo.ca/snscrape-docs/_autosummary/snscrape.modules.twitter.Tweet.html
# get subparams just like in user where user id can be obtained by user.id
tweetDFColumns = [
"id",
"user.id",
@ -135,18 +144,28 @@ tweetDFColumns = [
"source",
]
## Import other files
from funs.TimeSlice import *
from funs.ClearDupes import deDupe
from funs.Scrape import scrapeTweets
#############################################################################
################## do NOT change anything below this line ###################
#############################################################################
# create logfile & log all outputs
logfilen = logfile + datetime.now().strftime(fTimeFormat) + ".txt"
logfileErrors = logfile + datetime.now().strftime(fTimeFormat) + "_err" + ".txt"
## Import own functions
funs = wd+"funs"
sys.path.insert(1, funs)
from TimeSlice import get_Tslices
from ClearDupes import deDupe
from Scrape import scrapeTweets
###################
# Create logfile & log all outputs
# there are three logfile types to be found in /log.
# should be self explanatory.
logfilen = logfile + datetime.now().strftime(fTimeFormat) + ".log"
logfileErrors = logfile + datetime.now().strftime(fTimeFormat) + "_err" + ".log"
sys.stderr = open(logfileErrors, "w")
sys.stdout = open(logfilen, "w")
## Create List of time-period-slices
###################
# Create List of time-period-slices
time_slices = get_Tslices(ts_beg, ts_end, no_slices)
# Print slices
print("Time-period-slices:")
@ -154,22 +173,25 @@ for slice in time_slices:
print(slice["suffix"] + ": " + slice["beg_time"] + " - " + slice["end_time"])
print("---")
## Keywords
###################
# Keywords
# read keywords from a file and write to list.
keywords = []
# Remove duplicate Keywords and save all non-duplicates to 'data/keywords.txt'
deDupe("data/keywords-raw.txt", "data/keywords.txt")
deDupe(f"{di}keywords-raw.txt", f"{di}keywords.txt")
# Read the keywords from a file
with open("data/keywords.txt", "r") as file:
with open(f"{di}keywords.txt", "r") as file:
lines = file.readlines()
for line in lines:
keyword = line.strip() # Remove the newline character
keywords.append(keyword)
print("---")
## Senator Accounts
###################
# Senator Accounts
# Get accounts & alt-accounts from Senators-Datafile
accounts = pd.read_csv("data/senators-raw.csv")["twitter_handle"].tolist()
alt_accounts = pd.read_csv("data/senators-raw.csv")["alt_handle"].tolist()
accounts = pd.read_csv(f"{di}senators-raw.csv")["twitter_handle"].tolist()
alt_accounts = pd.read_csv(f"{di}senators-raw.csv")["alt_handle"].tolist()
alt_accounts = [x for x in alt_accounts if str(x) != 'nan'] # remove empty alt_accounts fields
accounts.extend(alt_accounts)
@ -181,52 +203,61 @@ for i, acc in enumerate(accounts): # print 5 accounts per line
print("\n")
print(f"\n{i} accounts in total.\n---")
## Scraping
###################
# Scraping
# report time:
timeStartScrape = datetime.now()
print("Starting scraping at:")
print(timeStartScrape.strftime(fTimeFormat))
print("---")
# Iterate over each Twitter account using multiprocessing
# with concurrent.futures.ProcessPoolExecutor() as executor:
# # List to store the scraping tasks
# tasks = []
# for handle in accounts:
# # Iterate over each time slice
# for slice_data in time_slices:
# # ... Code to prepare the slice_data ...
# # Schedule the scraping task
# task = executor.submit(
# scrapeTweets, handle, slice_data, keywords, td, tweetDFColumns
# )
# # Store the handle and slice_data as attributes of the task
# # Wait for all tasks to complete
# concurrent.futures.wait(tasks)
with concurrent.futures.ProcessPoolExecutor() as executor:
# List to store the scraping tasks
tasks = []
for handle in accounts:
# Iterate over each time slice
for slice_data in time_slices:
# ... Code to prepare the slice_data ...
# Schedule the scraping task
task = executor.submit(
scrapeTweets, handle, keywords, td, tweetDFColumns, slice_data['beg_time'], slice_data['end_time'], slice_data['suffix']
)
# Store the handle and slice_data as attributes of the task
# Wait for all tasks to complete
concurrent.futures.wait(tasks)
# report time:
timeEndScrape = datetime.now()
print("---")
print("End of scraping at:")
print(timeEndScrape.strftime(fTimeFormat))
## Merge CSV-Files to file_alltweets.
# fastest way is to save the slices seperately and then add every file to the output instead of using pandas or anything else.
os.chdir(path_to_tweetdfs)
# At first check, whether all slices are present.
tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv")
###################
# Merge CSV-Files to file_alltweets.
# fastest way is to save the slices seperately and then add every file to the
# output instead of using pandas or anything else.
os.chdir(path_to_tweetdfs) # change dir to use glob to get list of csv-files in dir
## At first check, whether all slices are present.
tweetfiles = glob.glob("*.csv") # get list of all csv files in folder - before: "*.{}".format("csv") ???
# Create list of all files that should be in the folder:
AllFilesList = []
for handle in accounts:
for tslice in time_slices:
suffix = tslice['suffix']
AllFilesList.append(f"Tweets-{handle}{suffix}.csv")
with open(f"{logfile}"+timeStartScrape.strftime(fTimeFormat)+"_missing.txt", "w") as fout:
AllFilesList.append(f"Tweets-{handle}{suffix}.csv")
# report missing files to "log_*_missing.txt"
with open(f"{logfile}"+timeStartScrape.strftime(fTimeFormat)+"_missing.log", "w") as fout:
for file in AllFilesList:
if file not in tweetfiles:
fout.write(f'Missing: {file}.\n') # if file is not in tweetfiles, print error message.
else:
fout.write('all slices scraped.')
fout.write(f'{file:<30}:all slices scraped.\n')
# check if file_alltweets (previously scraped tweets that have been merged into one file) exists, if it exists, remove from list to not include it in the following merge
## Merge .csv files.
# check if file_alltweets (previously scraped tweets that have been merged
# into one file) exists in tweetfiles list, if it exists, remove from list
# to not include it in the following merge
if file_alltweets in tweetfiles:
tweetfiles.remove(file_alltweets)
# Go through all csv files and merge them into file_alltweets
@ -240,21 +271,26 @@ if tweetfiles:
with open(file, "rb") as f:
next(f) # skip the header
fout.write(f.read())
os.chdir(wd)
os.chdir(wd) # go back to wd
###################
# finish logging
# Report timing info.
timeEndMerge = datetime.now()
print("---")
print("End of scraping at:")
print(timeEndMerge.strftime(fTimeFormat))
print("---")
tThours, tTminutes, tTseconds = convertTime(timeEndMerge - timeStartScrape)
tShours, tSminutes, tSseconds = convertTime(timeEndScrape - timeStartScrape)
tMhours, tMminutes, tMseconds = convertTime(timeEndMerge - timeEndScrape)
# calulate times:
tThours, tTminutes, tTseconds = convertTime(timeEndMerge - timeStartScrape) # total execution time
tShours, tSminutes, tSseconds = convertTime(timeEndScrape - timeStartScrape) # scraping time
tMhours, tMminutes, tMseconds = convertTime(timeEndMerge - timeEndScrape) # merge time
print(
f"Total execution time: {tThours} hours, {tTminutes} minutes and {tTseconds} seconds"
)
print(f"Scraping time: {tShours} hours, {tSminutes} minutes and {tSseconds} seconds")
print(f"Time merging: {tMhours} hours, {tMminutes} minutes and {tMseconds} seconds")
# close connection to logfiles.
sys.stdout.close()
sys.stderr.close()

166
collectSenData.py Normal file
View File

@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 23 21:49:11 2023
@author: Michael
collectSenData.py scrapes accounts of senators for the following data:the
number of followers, the number of users the twitter account is following,
and how long the twitter account has existed.
# Requirements:
- snscrape 0.6.2.20230321+
- pandas 2.0+
# IMPORTANT:
This script uses snscrape Version 0.6.2.20230321.dev50+g0d824ab which is
included in 'snscrape/' as a git repository for better reproducibility. Earlier
versions of snscrape will most likely fail to scrape all tweets because of
certain rate limits or other errors that may occur.
Install snscrape from local git repo to make shure that it fits the used version.
If snscrape is shall be installed from local repo, uncomment the following lines:
import subprocess
os.chdir('snscrape/')
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.'])
os.chdir(wd)
# How to use:
"""
import os
import pandas as pd
import glob
import time
import sys
from datetime import datetime
import concurrent.futures
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Name of file that all senator data will be written to
senCSV = "ALL-SENATORS.csv"
# don't change this one
senCSVPath = wd + ud + senCSV
# Name of logfile
logfile = wd+"log/UserLog_"
###################
# Define Timespan & time-format
# Format: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
ts_beg = "2020-01-01T00:00:00Z" # start of scraping
ts_end = "2023-01-03T00:00:00Z" # end of straping
no_slices = 24 # Number of slices / time periods.
# file time format
fTimeFormat = "%Y-%m-%d_%H-%M-%S"
# Maximum tweets to be scraped by snscrape. Can be left untouched.
maxTweets = 5000
# Columns for tweet dataframe. Parameters for snscrape.modules.twitter.Tweet:
# https://thetechrobo.ca/snscrape-docs/_autosummary/snscrape.modules.twitter.Tweet.html
# get subparams just like in user where user id can be obtained by user.id
userDFColumns = [
"id",
"username",
"followersCount",
"friendsCount",
"verified",
"created"
]
#############################################################################
################## do NOT change anything below this line ###################
#############################################################################
from funs.Scrape import scrapeUsers, getHandles, printHandles
from funs.TimeSlice import convertTime
###################
# Create logfile & log all outputs
# there are three logfile types to be found in /log.
# should be self explanatory.
logfilen = logfile + datetime.now().strftime(fTimeFormat) + ".log"
logfileErrors = logfile + datetime.now().strftime(fTimeFormat) + "_err" + ".log"
sys.stderr = open(logfileErrors, "w")
sys.stdout = open(logfilen, "w")
###################
# Senator Accounts
# Get accounts & alt-accounts from Senators-Datafile
accounts = getHandles(di)
# Print accounts to be scraped
print(printHandles(accounts))
###################
# Scraping
# report time:
timeStartScrape = datetime.now()
print("Starting scraping at:")
print(timeStartScrape.strftime(fTimeFormat))
print("---")
# Iterate over each Twitter account using multiprocessing
listUsers = []
# Iterate over each Twitter account using multiprocessing
with concurrent.futures.ProcessPoolExecutor() as executor:
# List to store the scraping tasks
tasks = []
for handle in accounts:
# Schedule the scraping task
task = executor.submit(
scrapeUsers, handle, userDFColumns
)
tasks.append(task)
# Wait for all tasks to complete and retrieve results
for task in concurrent.futures.as_completed(tasks):
result = task.result()
listUsers.append(result)
dfUsers = pd.DataFrame(listUsers, columns=userDFColumns)
dfUsers.to_csv(senCSVPath, encoding='utf-8')
# report time:
timeEndScrape = datetime.now()
print("---")
print("End of scraping at:")
print(timeEndScrape.strftime(fTimeFormat))
# Report timing info.
timeEndMerge = datetime.now()
print("---")
print("End of scraping at:")
print(timeEndMerge.strftime(fTimeFormat))
print("---")
# calulate times:
tThours, tTminutes, tTseconds = convertTime(timeEndMerge - timeStartScrape) # total execution time
tShours, tSminutes, tSseconds = convertTime(timeEndScrape - timeStartScrape) # scraping time
tMhours, tMminutes, tMseconds = convertTime(timeEndMerge - timeEndScrape) # merge time
print(
f"Total execution time: {tThours} hours, {tTminutes} minutes and {tTseconds} seconds"
)
print(f"Scraping time: {tShours} hours, {tSminutes} minutes and {tSseconds} seconds")
print(f"Time merging: {tMhours} hours, {tMminutes} minutes and {tMseconds} seconds")
print(listUsers)
# close connection to logfiles.
sys.stdout.close()
sys.stderr.close()

144
createGraphs.py Normal file
View File

@ -0,0 +1,144 @@
#%%
#!/usr/bin/env python3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from funs.CleanTweets import remove_URL, remove_emoji, remove_html, remove_punct
import string
#%%
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 26 20:36:43 2023
@author: michael
"""
import pandas as pd
# import pyreadstat
# import numpy as np
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Name of file that all senator data will be written to
senCSV = "SenatorsTweets-OnlyCov.csv" # SenatorsTweets-Final.csv SenatorsTweets-OnlyCov.csv
# Name of file that all senator data will be written to
senDataset = "senators-raw.csv"
# Name of new datafile generated
senCSVc = "SenatorsTweets-Final.csv"
senCSVcCov = "SenatorsTweets-OnlyCov.csv"
# Outfiles
wcAllTweetsF = "graphs/Wordcloud-All.png"
wcCovTweetsF = "graphs/Wordcloud-Cov.png"
TwCovTimeline = "graphs/Timeline.png"
# don't change this one
senCSVcPath = wd + ud + senCSVc
senCSVcCovPath = wd + ud + senCSVcCov
wcAllTweetsFPath = wd + ud + wcAllTweetsF
wcCovTweetsFPath = wd + ud + wcCovTweetsF
TwCovTimelinePath = wd + ud + TwCovTimeline
#%%
df = pd.read_csv(senCSVcPath, dtype=(object))
dfCov = pd.read_csv(senCSVcCovPath, dtype=(object))
#%%
df['cleanContent'] = df['rawContent'].apply(remove_URL)
df['cleanContent'] = df['cleanContent'].apply(remove_emoji)
df['cleanContent'] = df['cleanContent'].apply(remove_html)
df['cleanContent'] = df['cleanContent'].apply(remove_punct)
# create string with all cleaned tweets as text
str_alltweets = df['cleanContent'].astype(str).str.cat(sep=' ').casefold()
#%%
dfCov['cleanContent'] = dfCov['rawContent'].apply(remove_URL)
dfCov['cleanContent'] = dfCov['cleanContent'].apply(remove_emoji)
dfCov['cleanContent'] = dfCov['cleanContent'].apply(remove_html)
dfCov['cleanContent'] = dfCov['cleanContent'].apply(remove_punct)
# create string with all cleaned tweets as text
str_covtweets = dfCov['cleanContent'].astype(str).str.cat(sep=' ').casefold()
#%%
# replace single U and S characters
str_covtweets = str_covtweets.replace(' u ', ' ')
str_covtweets = str_covtweets.replace(' s ', ' ')
str_alltweets = str_alltweets.replace(' u ', ' ')
str_alltweets = str_alltweets.replace(' s ', ' ')
# %%
# create wordcloud alltweets
wcA = WordCloud(background_color="white", width=1000, height=1000, repeat=True)
wcA.generate(str_alltweets)
#%%
# draw
plt.figure( figsize=(20,20))
plt.axis("off")
plt.imshow(wcA, interpolation="bilinear")
fig1 = plt.gcf()
plt.show()
fig1.savefig(wcAllTweetsFPath)
# %%
# create wordcloud covtweets
wcC = WordCloud(background_color="white", width=1000, height=1000, repeat=True)
wcC.generate(str_covtweets)
#%%
# draw
plt.figure( figsize=(20,20))
plt.axis("off")
plt.imshow(wcC, interpolation="bilinear")
fig2 = plt.gcf()
plt.show()
fig2.savefig(wcCovTweetsFPath)
# %%
# with open('test.txt', 'w') as f:
# f.write(str_covtweets)
# %%
dfT = pd.DataFrame()
dfT['date'] = df['date'].copy()
dfT['count'] = 1
dfCovT = pd.DataFrame()
dfCovT['date'] = dfCov['date'].copy()
dfCovT['count'] = 1
#%%
dfT['date'] = pd.to_datetime(dfT['date']).dt.strftime('%Y-%m-%d')
dfCovT['date'] = pd.to_datetime(dfCovT['date']).dt.strftime('%Y-%m-%d')
#%%
dfT = dfT.groupby('date').count().reset_index()
dfCovT = dfCovT.groupby('date').count().reset_index()
#%%
import matplotlib.dates as mdates
# n of tweets overall
my_dpi=300
plt.figure(figsize=(1000/my_dpi, 1500/my_dpi), dpi=my_dpi)
plt.style.use('seaborn-darkgrid')
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(dfCovT['date'], dfCovT['count'], marker='', color='tab:blue', linewidth=1, alpha=0.4)
ax.plot(dfT['date'], dfT['count'], marker='', color='tab:blue', linewidth=1, alpha=1)
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
ax.xaxis.set_minor_locator(mdates.MonthLocator())
fig.autofmt_xdate()
fig.savefig(TwCovTimelinePath)
# %%

0
data/IN/.gitkeep Normal file
View File

View File

@ -0,0 +1,23 @@
opioid
gun violence
gun-violence
CHD
Coronary heart disease
addiction
tobacco
vaping
e-cigarette
shooting
indigenous women
overdose
meth
cocaine
separated children
separating children
separating families
Muslim travel ban
flu-season
flu season
Soleimani
Muslim Ban
USMCA trade deal

View File

@ -0,0 +1,23 @@
meth
gun violence
flu season
vaping
chd
addiction
indigenous women
separating children
tobacco
e-cigarette
muslim ban
soleimani
cocaine
separating families
muslim travel ban
usmca trade deal
shooting
overdose
separated children
coronary heart disease
gun-violence
opioid
flu-season

View File

@ -1,41 +1,60 @@
Coronavirus
Koronavirus
Corona
CDC
Wuhancoronavirus
Wuhanlockdown
Ncov
Wuhan
N95
Kungflu
Epidemic
plandemic
scamdemic
wuhan flu
wuhanflu
corona
coronavirusoutbreak
pandemic
epidemic
vax
antivax
antivaxxers
wearamask
masksoff
cdc
ncov
sars-cov-2
socialdistancing
wear a mask
lockdown
covd
coronavirus
koronavirus
corona
cdc
wuhancoronavirus
wuhanlockdown
ncov
wuhan
n95
kungflu
epidemic
outbreak
Sinophobia
China
sinophobia
covid-19
corona virus
covid
covid19
sars-cov-2
COVIDー19
COVD
covidー19
covd
pandemic
coronapocalypse
canceleverything
Coronials
SocialDistancingNow
Social Distancing
SocialDistancing
coronials
socialdistancingnow
social distancing
socialdistancing
panicbuy
panic buy
panicbuying
panic buying
14DayQuarantine
DuringMy14DayQuarantine
14dayquarantine
duringmy14dayquarantine
panic shop
panic shopping
panicshop
InMyQuarantineSurvivalKit
inmyquarantinesurvivalkit
panic-buy
panic-shop
coronakindness
@ -45,27 +64,27 @@ chinesevirus
stayhomechallenge
stay home challenge
sflockdown
DontBeASpreader
dontbeaspreader
lockdown
lock down
shelteringinplace
sheltering in place
staysafestayhome
staysafestayhome
stay safe stay home
trumppandemic
trump pandemic
flattenthecurve
flattenthecurve
flatten the curve
china virus
chinavirus
quarentinelife
PPEshortage
ppeshortage
saferathome
stayathome
stay at home
stay home
stayhome
GetMePPE
getmeppe
covidiot
epitwitter
pandemie
@ -73,43 +92,72 @@ wear a mask
wearamask
kung flu
covididiot
COVID__19
covid__19
omicron
variant
variant
vaccine
travel ban
corona
corona
coronavirus
coronavirus
covid
covid
covid19
covid19
covid-19
covid-19
sarscov2
sarscov2
sars cov2
sars cov 2
covid_19
covid_19
ncov
ncov
ncov2019
ncov2019
2019-ncov
2019-ncov
pandemic
pandemic 2019ncov
2019ncov
quarantine
quarantine
flatten the curve
flattening the curve
flatteningthecurve
flattenthecurve
hand sanitizer
handsanitizer
lockdown
lockdown
social distancing
socialdistancing
work from home
workfromhome
working from home
workingfromhome
ppe
n95
n95
covidiots
covidiots
herd immunity
herdimmunity
pneumonia
pneumonia
chinese virus
chinesevirus
wuhan virus
wuhanvirus
kung flu
kungflu
wearamask
wearamask
wear a mask
vaccine
vaccines
vaccine
vaccines
corona vaccine
corona vaccines
@ -136,5 +184,7 @@ wash ur hands
wash your hands
washurhands
washyourhands
stayathome
stayhome
selfisolating
self isolating
self isolating

20
data/IN/own_keywords.txt Normal file
View File

@ -0,0 +1,20 @@
plandemic
scamdemic
wuhan flu
wuhanflu
corona
coronavirusoutbreak
pandemic
epidemic
vax
antivax
antivaxxers
wearamask
masksoff
cdc
ncov
sars-cov-2
socialdistancing
wear a mask
lockdown
covd

View File

@ -0,0 +1,50 @@
1486474031419297799
1504880316506263552
1264663210197745665
1479500294887256069
1320058585590734852
1539003407096336388
1481704942574395392
1572014646374154240
1524764580806811649
1592940763515858944
1554529221594292224
1479488991347023876
1481715928492609541
1476722414100914179
1478478958740086790
1459285859358982148
1475620600228028432
1479459200229117955
1448386057339297797
1468993886316077063
1448369102318362625
1444354461799956482
1431340411193331715
1583474056011010048
1450479481278406658
1396992539010469894
1396992534623174658
1417920232333656076
1439553348122861568
1598398871990079489
1502768541979881479
1337604370981134336
1417797808707473410
1601693432292192256
1598145048989704192
1599906362380591110
1325851780496961538
1468908159330885632
1468332389923311616
1339703372505624577
1468633243654451200
1488290848907444240
1491146722625880064
1481766558313730053
1503078235373985795
1485398845718773762
1371501907483754497
1494398809245376513
1436328255959801865
1482862501461209089

View File

@ -0,0 +1,50 @@
1258402212327436288
1489758168750174209
1303698927766646785
1257681474670809090
1340109389672411136
1303698924444803072
1303698926902665218
1337595387796983809
1344441446515019777
1385680800218324992
1590129838261956608
1303698928609697796
1348715183502454793
1340418291274289153
1421228572732280835
1456349962942533637
1603457599877308416
1278354646885687296
1340418294579421188
1365866032792039425
1472722005657112578
1381021635772350464
1337598897217220609
1354797645261398016
1266806429282963456
1429847265242460161
1234272677633953792
1301581247932772352
1424832183148204043
1339255967809212416
1284831896988454912
1463528081214394377
1453679912938885122
1583474059148337152
1519791965113622528
1470775155110682628
1464615554103357450
1337595385565638657
1436055743418019840
1572208051830104069
1433765113891328002
1482774656075534336
1310288545886736384
1353845938566156289
1396992537202659329
1455712525362810883
1340384267327647747
1338588364459618305
1376696928692412419
1340386565399429123

View File

@ -1,112 +1,111 @@
name,id,state,state_short,party,class,ideology,start_serving,end_serving,time_in_office,not_in_office,last_congress,vote_share,next_closest_share,election_year,twitter_url,twitter_handle,alt_account,alt_handle,date_of_birth,female, ethnicity,edu_level,edu_information,occup_level,website_url,bioguide_link,Comments_1,Comments_2
"Alexander, Andrew L., Jr.",1,Tennessee,TN,0,2,0.681815808318192,01/07/2003,01/03/2021,18.0027397260274,1,116,61.9,31.8,2014,https://twitter.com/SenAlexander,SenAlexander,https://twitter.com/LamarAlexander ,LamarAlexander ,07/03/1940,0,White,8,J.D.; New York Univeristy; 1965,2,N/A,https://bioguide.congress.gov/search/bio/A000360,,
"Enzi, Mike",2,Wyoming,WY,0,2,0.719285383539398,01/03/1997,01/03/2021,24,1,116,72.3,17.6,2014,https://twitter.com/senatorenzi?lang=zh-Hant ,SenatorEnzi,N/A,N/A,02/01/1944,0,White,7,M.B.A.; Retail Marketing; Denver University; 1968,4,N/A,https://bioguide.congress.gov/search/bio/E000285,,
name,id,state,state_short,party,class,ideology,start_serving,end_serving,time_in_office,not_in_office,last_congress,vote_share,next_closest_share,election_year,twitter_url,twitter_handle,alt_account,alt_handle,date_of_birth,female,ethnicity,edu_level,edu_information,occup_level,website_url,bioguide_link,Comments_1,Comments_2
"Alexander, Andrew L., Jr.",1,Tennessee,TN,0,2,0.681815808318192,01/07/2003,01/03/2021,18.0027397260274,1,116,61.9,31.8,2014,https://twitter.com/SenAlexander,SenAlexander,https://twitter.com/LamarAlexander,LamarAlexander,07/03/1940,0,White,8,J.D.; New York Univeristy; 1965,2,N/A,https://bioguide.congress.gov/search/bio/A000360,,
"Enzi, Mike",2,Wyoming,WY,0,2,0.719285383539398,01/03/1997,01/03/2021,24,1,116,72.3,17.6,2014,https://twitter.com/senatorenzi,senatorenzi,N/A,N/A,02/01/1944,0,White,7,M.B.A.; Retail Marketing; Denver University; 1968,4,N/A,https://bioguide.congress.gov/search/bio/E000285,,
"Gardner, Cory",3,Colorado,CO,0,2,0.719285383539398,01/06/2015,01/03/2021,5.9972602739726,1,116,48.5,46,2014,https://twitter.com/CoryGardner,CoryGardner,https://twitter.com/corygardner,corygardner,08/22/1974,0,White,8,"J.D.; University of Colorado, Boulder; 2001",2,N/A,https://bioguide.congress.gov/search/bio/G000562,,
"Harris, Kamala",4,California ,CA,1,3,0.0213759569468058,01/03/2017,01/18/2021,4.04383561643836,1,116,62.4,37.6,2016,https://twitter.com/VP,VP,https://twitter.com/KamalaHarris,KamalaHarris,10/20/1964,1,African-American; Asian-American,8,J.D.; University of California; 1989,2,N/A,https://bioguide.congress.gov/search/bio/H001075,(became VP on jan 20 2021),
"Isakson, John",5,Georgia,GA,0,3,*,01/03/2005,12/31/2019,14,1,116,55,40.8,2016,https://twitter.com/SenatorIsakson ,SenatorIsakson,N/A,N/A,12/28/1944,0,White,6,"University of Georgia, Athens; 1966",1,N/A,https://bioguide.congress.gov/search/bio/I000055,(died in 2019),
"Jones, Gordon Douglas",6,Alabama,AL,1,2,0.632885678298333,01/03/2018,01/03/2021,3.0027397260274,1,116,49.9,48.4,2017,https://twitter.com/DougJones,DougJones,N/A,N/A,05/04/1954,0,White,8,"J.D.; Samford University, Cumberland School of Law; 1979",2,N/A,https://bioguide.congress.gov/search/bio/J000300/,special election to replace Jeff Sessions,
"Loeffler, Kelly",7,Georgia,GA,0,2,0.904293903291947,01/06/2020,01/20/2021,1.04109589041096,1,116,N/A,N/A,*,https://twitter.com/KLoeffler,KLoeffler,https://twitter.com/senatorloeffler ,senatorloeffler ,11/27/1970,1,White,7,M.B.A.; Internationla Finance and Marketing; DePaul University Chicago; 1999,1,N/A,https://bioguide.congress.gov/search/bio/L000594,Appointed in 2019 after the resignation of Johnny Isakson but lost the 2020 election,
"McSally, Martha",8,Arizona,AZ,0,2,*,01/03/2015,01/03/2019,1,1,116,N/A,N/A,*,https://twitter.com/MarthaMcSallyAZ,MarthaMcSallyAZ,https://twitter.com/marthamcsally,marthamcsally,03/22/1966,1,White,7,M.P.P.; John F. Kennedy School of Government,3,N/A,https://bioguide.congress.gov/search/bio/M001197,(left office Dec 2 2020),appointed in 2018 after death of John McCain but lot 2020 election
"Perdue, David",9,Georgia,GA,0,2,0.914979462126755,01/06/2015,01/03/2021,5.9972602739726,1,116,53,45.1,2014,https://twitter.com/DavidPerdueGA,DavidPerdueGA,https://twitter.com/sendavidperdue,sendavidperdue,12/10/1949,0,White,7,M.S.; Georgia Institute of Technology; 1976,1,N/A,https://bioguide.congress.gov/search/bio/P000612,,
"Roberts, Charles Patrick",10,Kansas,KS,0,2,0.822995787870405,01/07/1997,01/03/2021,24.0054794520548,1,116,53.3,42.5,2014,https://twitter.com/SenPatRoberts,SenPatRoberts,https://twitter.com/PatRoberts,PatRoberts,04/20/1936,0,White,6,"B.A.; Kansas State university, Manhattan; 1958",7,N/A,https://bioguide.congress.gov/search/bio/R000307,,
"Udall, Tom",11,New Mexico,NM,1,2,0.259828450248573,01/06/2009,01/03/2021,12,1,116,55.4,44.6,2014,https://twitter.com/SenatorTomUdall,SenatorTomUdall,https://twitter.com/tomudall,tomudall,05/18/1948,0,White,8,"J.D.; University of New Mexico School of Law, Albuquerque, N.M.; 1977",2,N/A,https://bioguide.congress.gov/search/bio/U000039,,
"Baldwin, Tammy",12,Wisconsin,WI,1,1,0.176999238019796,01/03/2013,12/31/2022,9.9972602739726,0,117,55.4,44.6,2018,https://twitter.com/SenatorBaldwin,SenatorBaldwin,https://twitter.com/tammybaldwin,tammybaldwin,02/11/1962,1,White,8,"J.D.; University of Wisconsin, Madison; 1989",2,https://www.baldwin.senate.gov/,https://bioguide.congress.gov/search/bio/B001230,,
"Barrasso, John",13,Wyoming,WY,0,1,0.817902617377421,06/22/2007,12/31/2022,15.5369863013699,0,117,67.1,30.1,2018,https://twitter.com/SenJohnBarrasso,SenJohnBarrasso,https://twitter.com/barrassoforwyo,barrassoforwyo,07/21/1952,0,White,7,M.D.; Georgetown University School of Medicine; 1978,6,https://www.barrasso.senate.gov/,https://bioguide.congress.gov/search/bio/B001261,,
"Bennet, Michael F.",14,Colorado,CO,1,3,0.248044568735702,01/21/2009,12/31/2022,13.9506849315069,0,117,49.1,45.4,2016,https://twitter.com/SenatorBennet,SenatorBennet,https://twitter.com/michaelbennet,michaelbennet,11/28/1964,0,White,8,J.D.; Yale Law School; 1993,2,https://www.bennet.senate.gov/,https://bioguide.congress.gov/search/bio/B001267,,
"Blackburn, Marsha",15,Tennessee,TN,0,1,0.93228239890635,01/03/2019,12/31/2022,3.99452054794521,0,117,54.7,43.9,2018,https://twitter.com/MarshaBlackburn,MarshaBlackburn,N/A,N/A,06/06/1952,1,White,6,"B.S.; Home Economics; Mississippi State University, Starkville; 1973",1,https://www.blackburn.senate.gov/,https://bioguide.congress.gov/search/bio/B001243,,
"Blumenthal, Richard",16,Connecticut,CT,1,3,0.0310655954121906,01/03/2010,12/31/2022,13,0,117,62.9,34.9,2016,https://twitter.com/SenBlumenthal,SenBlumenthal,N/A,N/A,02/13/1946,0,White,8,J.D.; Yale University; 1973,2,https://www.blumenthal.senate.gov/,https://bioguide.congress.gov/search/bio/B001277,,
"Blunt, Roy",17,Missouri,MO,0,3,0.584409139223541,01/03/2011,12/31/2022,12,1,117,49.4,46.2,2016,https://twitter.com/RoyBlunt,RoyBlunt,N/A,N/A,01/10/1950,0,White,7,"M.A.; Missouri State University ,Springfield; 1972",5,N/A,https://bioguide.congress.gov/search/bio/B000575,,
"Booker, Cory A.",18,New Jersey,NJ,1,2,0.0455802980872292,10/31/2013,12/31/2022,12,0,117,57.2,40.9,2020,https://twitter.com/senbooker,senbooker,https://twitter.com/CoryBooker,CoryBooker,04/27/1969,0,African-American; Asian-American,8,J.D.; Yale Law School; 1997,2,https://www.booker.senate.gov/,https://bioguide.congress.gov/search/bio/B001288,,
"Boozman, John",19,Arkansas,AR,0,3,0.768699282926499,01/05/2011,12/31/2022,11.9945205479452,0,117,59.8,36.2,2016,https://twitter.com/JohnBoozman,JohnBoozman,N/A,N/A,12/10/1950,0,White,6,Southern College of Optometry; 1977,6,https://www.boozman.senate.gov/,https://bioguide.congress.gov/search/bio/B001236,,
"Braun, Michael",20,Indiana,IN,0,1,0.98106874319906,01/03/2019,12/31/2022,3.99452054794521,0,117,50.9,45,2018,https://twitter.com/SenatorBraun,SenatorBraun,N/A,N/A,03/24/1954,0,White,7,M.B.A.; Harvard Business School; 1978,1,https://www.braun.senate.gov/,https://bioguide.congress.gov/search/bio/B001310,,
"Brown, Sherrod",21,Ohio,OH,1,1,0.0923940264109351,01/04/2007,12/31/2022,16,0,117,53.4,46.6,2018,https://twitter.com/SenSherrodBrown,SenSherrodBrown,https://twitter.com/SherrodBrown,SherrodBrown,11/09/1952,0,White,7,M.a.; Education; Ohio State University; 1981,5,https://www.brown.senate.gov/,https://bioguide.congress.gov/search/bio/B000944,,
"Burr, Richard",22,North Carolina,NC,0,3,0.605472891780936,01/03/2001,12/31/2022,22.0054794520548,1,117,51.1,45.3,2016,https://twitter.com/SenatorBurr,SenatorBurr,N/A,N/A,11/30/1955,0,White,6,B.A.; Communications; Wake Forest University; 1978,1,N/A,https://bioguide.congress.gov/search/bio/B001135,,
"Cantwell, Maria",23,Washington,WA,1,1,0.216591445478212,01/03/2001,12/31/2022,22.0054794520548,0,117,58.4,41.6,2018,https://twitter.com/SenatorCantwell,SenatorCantwell,N/A,N/A,10/13/1958,1,White,6,B.A.; Public Administration; Miami University of Ohio; 1980,1,https://www.cantwell.senate.gov/,https://bioguide.congress.gov/search/bio/C000127,,
"Capito, Shelley Moore",24,West Virginia,WV,0,2,0.61478303011512,01/06/2015,12/31/2022,7.98904109589041,0,117,70.3,27,2020,https://twitter.com/SenCapito,SenCapito,N/A,N/A,11/26/1953,1,White,7,M. Ed.; University of Virginia; 1976,5,https://www.capito.senate.gov/,https://bioguide.congress.gov/search/bio/C001047,,
"Cardin, Benjamin L.",25,Maryland,MD,1,1,0.1994990268606,01/04/2007,12/31/2022,16,0,117,64.9,30.3,2018,https://twitter.com/SenatorCardin,SenatorCardin,N/A,N/A,10/05/1943,0,White,8,J.D.; University of Maryland; 1967,2,https://www.cardin.senate.gov/,https://bioguide.congress.gov/search/bio/C000141,,
"Carper, Thomas R.",26,Delaware,DE,1,1,0.309479384969288,01/03/2001,12/31/2022,22.0054794520548,0,117,60,37.8,2018,https://twitter.com/SenatorCarper,SenatorCarper,N/A,N/A,01/23/1947,0,White,7,M.B.A.; University of Delaware; 1975,3,https://www.carper.senate.gov/,https://bioguide.congress.gov/search/bio/C000174,,
"Casey, Robert P., Jr.",27,Pennsylvania,PA,1,1,0.171897216341815,01/04/2007,12/31/2022,16,0,117,55.7,42.6,2018,https://twitter.com/SenBobCasey,SenBobCasey,https://twitter.com/Bob_Casey,Bob_Casey,04/13/1960,0,White,8,J.D.; Catholic University of America; 1988,2,https://www.casey.senate.gov/,https://bioguide.congress.gov/search/bio/C001070,,
"Cassidy, Bill",28,Louisiana,LA,0,2,0.682348710788942,01/06/2015,12/31/2022,7.98904109589041,0,117,59.3,19,2020,https://twitter.com/SenBillCassidy,SenBillCassidy,https://twitter.com/BillCassidy,BillCassidy,09/28/1957,0,White,7,M.D.; Louisiana State University; 1979,6,https://www.cassidy.senate.gov/,https://bioguide.congress.gov/search/bio/C001075,,
"Collins, Susan M.",29,Maine,ME,0,2,0.448622425849401,01/07/1997,12/31/2022,25.9972602739726,0,117,51,42.4,2020,https://twitter.com/SenatorCollins,SenatorCollins,N/A,N/A,12/07/1952,1,White,6,Bachelor in Government; St. Lawrence University; 1975,0,https://www.collins.senate.gov/,https://bioguide.congress.gov/search/bio/C001035,,
"Coons, Christopher A.",30,Delaware,DE,1,2,0.338422715351401,11/15/2010,12/31/2022,12.1342465753425,0,117,59.4,37.9,2020,https://twitter.com/ChrisCoons,ChrisCoons,N/A,N/A,09/09/1963,0,White,8,J.D.; Yale Law School; 1992,2,https://www.coons.senate.gov/,https://bioguide.congress.gov/search/bio/C001088,,
"Cornyn, John",31,Texas,TX,0,2,0.772226738391321,11/30/2002,12/31/2022,20.0986301369863,0,117,53.5,43.9,2020,https://twitter.com/JohnCornyn,JohnCornyn,N/A,N/A,02/02/1952,0,White,8,J.D.; St. Mary<72>s School of Law; 1977,2,https://www.cornyn.senate.gov/,https://bioguide.congress.gov/search/bio/C001056,,
"Cortez Masto, Catherine",32,Nevada,NV,1,3,0.236574567369409,01/03/2017,12/31/2022,5.99452054794521,0,117,47.1,44.7,2016,https://twitter.com/SenCortezMasto,SenCortezMasto,https://twitter.com/CortezMasto,CortezMasto,03/29/1964,1,Hispanic; White,8,J.D.; Gonzaga University School of Law; 1990,2,https://www.cortezmasto.senate.gov/,https://bioguide.congress.gov/search/bio/C001113,,
"Cotton, Tom",33,Arkansas,AR,0,2,0.876390364042756,01/06/2015,12/31/2022,7.98904109589041,0,117,66.5,33.5,2020,https://twitter.com/SenTomCotton,SenTomCotton,https://twitter.com/TomCottonAR,TomCottonAR,05/13/1977,0,White,8,J.D.; Harvard University; 2002,2,https://www.cotton.senate.gov/,https://bioguide.congress.gov/search/bio/C001095,,
"Cramer, Kevin",34,North Dakota,ND,0,1,0.910896298032277,01/03/2019,12/31/2022,3.99452054794521,0,117,55.5,44.5,2018,https://twitter.com/SenKevinCramer,SenKevinCramer,https://twitter.com/kevincramer,kevincramer,01/21/1961,0,White,7,M.A.; Management; University o fMary; 2003,0,https://www.cramer.senate.gov/,https://bioguide.congress.gov/search/bio/C001096,,
"Crapo, Michael",35,Idaho,ID,0,3,0.823331951918519,01/06/1999,12/31/2022,24,0,117,66.1,27.8,2016,https://twitter.com/MikeCrapo,MikeCrapo,N/A,N/A,05/20/1951,0,White,8,J.D.; Harvard University; 1977,2,https://www.crapo.senate.gov/,https://bioguide.congress.gov/search/bio/C000880,,
"Cruz, Ted",36,Texas,TX,0,1,0.944056385174951,01/03/2013,12/31/2022,9.9972602739726,0,117,50.9,48.3,2018,https://twitter.com/SenTedCruz,SenTedCruz,https://twitter.com/tedcruz,tedcruz,12/22/1970,0,Hispanic; White,8,J.D.; Harvard University; 1995,2,https://www.cruz.senate.gov/,https://bioguide.congress.gov/search/bio/C001098,,
"Daines, Steve",37,Montana,MT,0,2,0.859322244752884,01/06/2015,12/31/2022,7.98904109589041,0,117,55,45,2020,https://twitter.com/SteveDaines,SteveDaines,N/A,N/A,08/20/1962,0,White,6,B.S.; Chemical Engineering; Montana State University; 1984,1,https://www.daines.senate.gov/,https://bioguide.congress.gov/search/bio/D000618,,
"Duckworth, Tammy",38,Illinois,IL,1,3,0.0944404184553066,01/03/2017,12/31/2022,5.99452054794521,0,117,54.4,40.2,2016,https://twitter.com/SenDuckworth,SenDuckworth,https://twitter.com/tammyduckworth,tammyduckworth,03/12/1968,1,Asian; White,8,PhD in human services; Capella University School of Public Service Leadership; 2015,3,https://www.duckworth.senate.gov/,https://bioguide.congress.gov/search/bio/D000622,,
"Durbin, Richard J.",39,Illinois,IL,1,2,0.0855733771029607,01/07/1997,12/31/2022,25.9972602739726,0,117,54.9,38.9,2020,https://twitter.com/SenatorDurbin,SenatorDurbin,https://twitter.com/DickDurbin,DickDurbin,11/21/1944,0,White,8,J.D.; Georgetown University; 1969,2,https://www.durbin.senate.gov/,https://bioguide.congress.gov/search/bio/D000563,,
"Ernst, Joni",40,Iowa,IA,0,2,0.826265400967212,01/06/2015,12/31/2022,7.98904109589041,0,117,51.8,45.2,2020,https://twitter.com/SenJoniErnst,SenJoniErnst,https://twitter.com/joniernst,joniernst,07/01/1970,1,White,7,M.P.A.; Columbus State University; 1995,3,https://www.ernst.senate.gov/,https://bioguide.congress.gov/search/bio/E000295,,
"Feinstein, Dianne",41,California,CA,1,1,0.150865658191444,11/10/1992,12/31/2022,30.158904109589,0,117,54.2,45.8,2018,https://twitter.com/SenFeinstein,SenFeinstein,https://twitter.com/DianneFeinstein,DianneFeinstein,06/22/1933,1,White,6,B.A.; History; Stanford University; 1955,0,https://www.feinstein.senate.gov/public/,https://bioguide.congress.gov/search/bio/F000062,,
"Fischer, Debra",42,Nebraska,NE,0,1,0.688576408222131,01/03/2013,12/31/2022,9.9972602739726,0,117,57.7,38.6,2018,https://twitter.com/SenatorFischer,SenatorFischer,N/A,N/A,03/01/1951,1,White,6,B.S.; Education; University of Nebraska; 1988,0,https://www.fischer.senate.gov/,https://bioguide.congress.gov/search/bio/F000463,,
"Gillibrand, Kirsten E.",43,New York,NY,1,1,0.12072202063417,01/27/2009,12/31/2022,13.9342465753425,0,117,67,33,2018,https://twitter.com/SenGillibrand,SenGillibrand,https://twitter.com/gillibrandny,gillibrandny,12/09/1966,1,White,8,J.D.; University of California; 1991,2,https://www.gillibrand.senate.gov/,https://bioguide.congress.gov/search/bio/G000555,,
"Graham, Lindsey",44,South Carolina,SC,0,2,0.619070797359753,01/07/2003,12/31/2022,19.9945205479452,0,117,54.5,44.2,2020,https://twitter.com/LindseyGrahamSC,LindseyGrahamSC,https://twitter.com/grahamblog,grahamblog,07/09/1955,0,White,8,J.D.; University of South Carolina; 1981,2,https://www.lgraham.senate.gov/,https://bioguide.congress.gov/search/bio/G000359 ,,
"Grassley, Chuck",45,Iowa,IA,0,3,0.670073592619545,01/05/1981,12/31/2022,42.013698630137,0,117,60.2,35.7,2016,https://twitter.com/ChuckGrassley,ChuckGrassley,N/A,N/A,09/17/1933,0,White,7,M.A.; Political Science; University of Northern Iowa; 1956,0,https://www.grassley.senate.gov/,https://bioguide.congress.gov/search/bio/G000386,,
"Hagerty, Bill",46,Tennessee,TN,0,2,0.857410027434407,01/03/2021,12/31/2022,1.99178082191781,0,117,62.2,35.2,2020,https://twitter.com/SenatorHagerty,SenatorHagerty,https://twitter.com/billhagertytn,billhagertytn,08/14/1959,0,White,8,J.D.; Vanderbilt Law School; 1984,0,https://www.hagerty.senate.gov/,https://bioguide.congress.gov/search/bio/H000601,,
"Hassan, Margaret Wood",47,New Hampshire,NH,1,3,0.43611907238278,01/03/2017,12/31/2022,5.99452054794521,0,117,48,47.9,2016,https://twitter.com/SenatorHassan,SenatorHassan,https://twitter.com/Maggie_Hassan,Maggie_Hassan,02/27/1958,1,White,8,J.D.; Northeastern University School of law; 1985,11,https://www.hassan.senate.gov/,https://bioguide.congress.gov/search/bio/H001076,,
"Hawley, Josh",48,Missouri,MO,0,1,0.864366195602263,01/03/2019,12/31/2022,3.99452054794521,0,117,51.4,45.6,2018,https://twitter.com/HawleyMO,HawleyMO,N/A,N/A,12/31/1979,0,White,8,J.D.; Yale Law School; 2006,2,https://www.hawley.senate.gov/,https://bioguide.congress.gov/search/bio/H001089,,
"Heinrich, Martin",49,New Mexico,NM,1,1,0.2007037353465,01/03/2013,12/31/2022,9.9972602739726,0,117,54.1,30.5,2018,https://twitter.com/MartinHeinrich,MartinHeinrich,N/A,N/A,10/17/1971,0,White,6,B.S.; Mechanical Engineering; University of Missouri; 1995,12,https://www.heinrich.senate.gov/,https://bioguide.congress.gov/search/bio/H001046,,
"Hickenlooper, John W.",50,Colorado,CO,1,2,0.335030323955882,01/03/2021,12/31/2022,1.99178082191781,0,117,53.5,44.2,2020,https://twitter.com/SenatorHick,SenatorHick,https://twitter.com/hickenlooper,hickenlooper,02/07/1952,0,White,7,M.A.; Geology; Wesleyan University; 1980,0,https://www.hickenlooper.senate.gov/,https://bioguide.congress.gov/search/bio/H000273,,
"Hirono, Mazie K.",51,Hawaii,HI,1,1,0.0715447123166643,01/03/2013,12/31/2022,9.9972602739726,0,117,71.2,28.8,2018,https://twitter.com/maziehirono,maziehirono,https://twitter.com/mazieforhawaii,mazieforhawaii,11/03/1947,1,Asian,8,J.D.; Georgetown University; 1978,0,https://www.hirono.senate.gov/,https://bioguide.congress.gov/search/bio/H001042,,
"Hoeven, John",52,North Dakota,ND,0,3,0.815683863264003,01/05/2011,12/31/2022,11.9945205479452,0,117,78.6,17,2016,https://twitter.com/SenJohnHoeven,SenJohnHoeven,N/A,N/A,03/13/1957,0,White,7,M.B.A.; Northwestern University; 1981,12,https://www.hoeven.senate.gov/,https://bioguide.congress.gov/search/bio/H001061,,
"Hyde-Smith, Cindy",53,Mississippi,MS,0,2,0.868059764299163,04/09/2018,12/31/2022,4.73150684931507,0,117,54.1,44.1,2020,https://twitter.com/SenHydeSmith,SenHydeSmith,https://twitter.com/cindyhydesmith,cindyhydesmith,05/10/1959,1,White,6,"B.A.; Criminal justice, political science; University of Southern Mississippi; 1981",0,https://www.hydesmith.senate.gov/,https://bioguide.congress.gov/search/bio/H001079 ,,
"Inhofe, James",54,Oklahoma,OK,0,2,0.880238318204784,11/17/1994,12/31/2022,28.1397260273973,1,117,62.9,32.8,2020,https://twitter.com/JimInhofe,JimInhofe,N/A,N/A,11/17/1934,0,White,6,B.A.; Economics; University of Tulsa; 1973,0,N/A,https://bioguide.congress.gov/search/bio/I000024 ,,
"Johnson, Ron",55,Wisconsin,WI,0,3,0.743401705863958,01/05/2011,12/31/2022,11.9945205479452,0,117,50.2,46.8,2016,https://twitter.com/SenRonJohnson,SenRonJohnson,https://twitter.com/ronjohnsonwi,ronjohnsonwi,04/08/1955,0,White,6,B.S.; Business and Accounting; University of Minnesota; 1977,4,https://www.ronjohnson.senate.gov/,https://bioguide.congress.gov/search/bio/J000293,,
"Kaine, Tim",56,Virginia,VA,1,1,0.203600708089391,01/03/2013,12/31/2022,9.9972602739726,0,117,57.1,41.1,2018,https://twitter.com/timkaine,timkaine,N/A,N/A,02/26/1958,0,White,8,J.D.; Harvard University; 1983,11,https://www.kaine.senate.gov/,https://bioguide.congress.gov/search/bio/K000384,,
"Kelly, Mark",57,Arizona,AZ,1,3,0.399793347847799,12/02/2020,12/31/2022,2.07945205479452,0,117,51.2,48.8,2020,https://twitter.com/SenMarkKelly,SenMarkKelly,https://twitter.com/CaptMarkKelly,CaptMarkKelly,02/21/1964,0,White,7,M.S.; Aeronautical Engineering; U.S. Naval Postgraduate School,3,https://www.kelly.senate.gov/,https://bioguide.congress.gov/search/bio/K000377,,
"Kennedy, John Neely",58,Louisiana,LA,0,3,0.785684351248518,01/03/2017,12/31/2022,5.99452054794521,0,117,60.7,39.3,2016,https://twitter.com/SenJohnKennedy,SenJohnKennedy,https://twitter.com/JohnKennedyLA,JohnKennedyLA,11/21/1951,0,White,8,J.D.; University of Virginia School of LAw; 1977,11,https://www.kennedy.senate.gov/,https://bioguide.congress.gov/search/bio/K000393,,
"King, Angus S., Jr.",59,Maine,ME,2,1,0.346033257048853,01/03/2013,12/31/2022,9.9972602739726,0,117,54.3,35.2,2018,https://twitter.com/SenAngusKing,SenAngusKing,N/A,N/A,03/31/1944,0,White,8,J.D.; University of Virginia; 1969,2,https://www.king.senate.gov/,https://bioguide.congress.gov/search/bio/K000383 ,,
"Klobuchar, Amy",60,Minnesota,MN,1,1,0.130504324943533,01/04/2007,12/31/2022,16,0,117,60.3,36.2,2018,https://twitter.com/SenAmyKlobuchar,SenAmyKlobuchar,https://twitter.com/amyklobuchar,amyklobuchar,05/25/1960,1,White,8,"J.D.; University of Chicago, 1985",2,https://www.klobuchar.senate.gov/,https://bioguide.congress.gov/search/bio/K000367 ,,
"Lankford, James",61,Oklahoma,OK,0,3,0.89992933687588,01/03/2015,12/31/2022,7.9972602739726,0,117,67.7,24.6,2016,https://twitter.com/SenatorLankford,SenatorLankford,https://twitter.com/jameslankford,jameslankford,03/04/1968,0,White,7,M.Div.; Southwestern Theological Baptist Seminary; 1994,5,https://www.lankford.senate.gov/,https://bioguide.congress.gov/search/bio/L000575,,
"Leahy, Patrick",62,Vermont,VT,1,3,0.144121081911654,01/14/1975,12/31/2022,47.9945205479452,1,117,61.3,33,2016,https://twitter.com/SenatorLeahy,SenatorLeahy,N/A,N/A,03/31/1940,0,White,8,J.D.; Georgetown University; 1964,2,N/A,https://bioguide.congress.gov/search/bio/L000174,,
"Lee, Mike",63,Utah,UT,0,3,0.753748787807473,01/05/2011,12/31/2022,11.9945205479452,0,117,68,27.4,2016,https://twitter.com/SenMikeLee,SenMikeLee,https://twitter.com/BasedMikeLee,BasedMikeLee,06/04/1971,0,White,8,J.D.; Brigham Young university; 1997,2,https://www.lee.senate.gov/,https://bioguide.congress.gov/search/bio/L000577,,
"Luj<EFBFBD>n, Ben Ray",64,New Mexico,NM,1,2,0.174860888138848,01/03/2021,12/31/2022,1.99178082191781,0,117,51.7,45.6,2020,https://twitter.com/SenatorLujan,SenatorLujan,https://twitter.com/benraylujan,benraylujan,06/07/1972,0,Hispanic,6,B.B.A.; New Mexico Highlands University; 2007,0,https://www.lujan.senate.gov/,https://bioguide.congress.gov/search/bio/L000570 ,,
"Lummis, Cynthia M.",65,Wyoming,WY,0,2,0.893292958108508,01/03/2021,12/31/2022,1.99178082191781,0,117,73.1,26.9,2020,https://twitter.com/SenLummis,SenLummis,https://twitter.com/CynthiaMLummis,CynthiaMLummis,09/10/1954,1,White,8,"J.D.; University of Wyoming College of Law, Laramie, Wyo.; 1985",11,https://www.lummis.senate.gov/,https://bioguide.congress.gov/search/bio/L000571 ,,
"Manchin, Joe, III",66,West Virginia,WV,1,1,0.446686774398077,11/15/2010,12/31/2022,12.1342465753425,0,117,49.6,46.3,2018,https://twitter.com/Sen_JoeManchin,Sen_JoeManchin,https://twitter.com/JoeManchinWV,JoeManchinWV,08/24/1947,0,White,6,B.A.; Business Administration; West Virginia University; 1970,12,https://www.manchin.senate.gov/,https://bioguide.congress.gov/search/bio/M001183 ,,
"Markey, Edward J.",67,Massachusetts,MA,1,2,0.0139659683705929,07/16/2013,12/31/2022,9.46575342465753,0,117,66.2,33,2020,https://twitter.com/SenMarkey,SenMarkey,https://twitter.com/edmarkey,edmarkey,07/11/1946,0,White,8,J.D.; Boston College Law School; 1972,11,https://www.markey.senate.gov/,https://bioguide.congress.gov/search/bio/M000133,,
"Marshall, Roger",68,Kansas,KS,0,2,0.882124792228652,01/03/2021,12/31/2022,1.99178082191781,0,117,53.2,41.8,2020,https://twitter.com/SenatorMarshall,SenatorMarshall,https://twitter.com/RogerMarshallMD,RogerMarshallMD,08/09/1960,0,White,7,M.D.; University of Kansas School of Medicine; 1987,6,https://www.marshall.senate.gov/,https://bioguide.congress.gov/search/bio/M001198,,
"McConnell, Mitch",69,Kentucky,KY,0,2,0.599687533584357,01/03/1985,12/31/2022,38.0164383561644,0,117,57.8,38.2,2020,https://twitter.com/LeaderMcConnell,LeaderMcConnell,N/A,N/A,02/20/1942,0,White,8,J.D.; Kentucky Law School; 1967,11,https://www.mcconnell.senate.gov/,https://bioguide.congress.gov/search/bio/M000355,,
"Menendez, Robert",70,New Jersey,NJ,1,1,0.191515157461704,01/18/2006,12/31/2022,16.9616438356164,0,117,54,42.8,2018,https://twitter.com/SenatorMenendez,SenatorMenendez,N/A,N/A,01/01/1954,0,Hispanic,8,J.D.; Rutgers university of Law; 1979,11,https://www.menendez.senate.gov/,https://bioguide.congress.gov/search/bio/M000639,,
"Merkley, Jeff",71,Oregon,OR,1,2,0.0355414098997263,01/06/2009,12/31/2022,13.9917808219178,0,117,56.9,39.3,2020,https://twitter.com/SenJeffMerkley,SenJeffMerkley,https://twitter.com/jeffmerkley,jeffmerkley,10/24/1956,0,White,7,M.P.A.; Princeton University; 1982,0,https://www.merkley.senate.gov/,https://bioguide.congress.gov/search/bio/M001176,,
"Moran, Jerry",72,Kansas,KS,0,3,0.716270292467902,01/05/2011,12/31/2022,11.9945205479452,0,117,62.4,32.1,2016,https://twitter.com/JerryMoran,JerryMoran,N/A,N/A,05/29/1954,0,White,8,J.D.; Kansas University School of Law; 1981,11,https://www.moran.senate.gov/public/,https://bioguide.congress.gov/search/bio/M000934 ,,
"Murkowski, Lisa",73,Alaska,AK,0,3,0.473296745648617,12/20/2002,12/31/2022,20.0438356164384,0,117,44.3,29.5,2016,https://twitter.com/lisamurkowski,lisamurkowski,https://twitter.com/lisaforsenate,lisaforsenate,05/22/1957,1,White,8,J.D.; Willamette College of Law; 1985,2,https://www.murkowski.senate.gov/,https://bioguide.congress.gov/search/bio/M001153,,
"Murphy, Christopher",74,Connecticut,CT,1,1,0.152635018959264,01/03/2013,12/31/2022,9.9972602739726,0,117,59.5,39.4,2018,https://twitter.com/ChrisMurphyCT,ChrisMurphyCT,N/A,N/A,08/03/1973,0,White,8,J.D.; University of Connecticut; 2002,11,https://www.murphy.senate.gov/,https://bioguide.congress.gov/search/bio/M001169,,
"Murray, Patty",75,Washington,WA,1,3,0.142703588817088,01/05/1993,12/31/2022,30.0054794520548,0,117,59.1,40.9,2016,https://twitter.com/PattyMurray,PattyMurray,https://twitter.com/murraycampaign,murraycampaign,10/11/1950,1,White,6,B.A.; Physical Education; Washington State University; 1972,5,https://www.murray.senate.gov/,https://bioguide.congress.gov/search/bio/M001111,,
"Ossoff, Jon",76,Georgia,GA,1,2,0.303405364928085,01/20/2021,12/31/2022,1.94520547945205,0,117,50.6,49.4,2020,https://twitter.com/SenOssoff,SenOssoff,https://twitter.com/ossoff,ossoff,02/16/1987,0,White,7,M.S.; International Politicla Economy; London School of Economics; 2013,7,https://www.ossoff.senate.gov/,https://bioguide.congress.gov/search/bio/O000174,,
"Padilla, Alex",77,California,CA,1,3,0.0200324383981554,01/20/2021,12/31/2022,1.94520547945205,0,117,N/A,N/A,*,https://twitter.com/SenAlexPadilla,SenAlexPadilla,https://twitter.com/AlexPadilla4CA,AlexPadilla4CA,03/22/1973,0,Hispanic,6,B.S.; Mechanical Engineering; MIT; 1994,9,https://www.padilla.senate.gov/,https://bioguide.congress.gov/search/bio/P000145,appointed in 2020 to replace Kamala Harris ,
"Paul, Rand",78,Kentucky,KY,0,3,0.684883322748808,01/05/2011,12/31/2022,11.9945205479452,0,117,57.3,42.7,2016,https://twitter.com/senrandpaul,senrandpaul,https://twitter.com/RandPaul,RandPaul,01/07/1963,0,White,7,M.D.; Duke University; 1988,6,https://www.paul.senate.gov/,https://bioguide.congress.gov/search/bio/P000603,,
"Peters, Gary C.",79,Michigan,MI,1,2,0.355796587683312,01/06/2015,12/31/2022,7.98904109589041,0,117,49.9,48.2,2020,https://twitter.com/SenGaryPeters,SenGaryPeters,https://twitter.com/garypeters,garypeters,12/01/1958,0,White,8,J.D.; Wayne State University; 1989,2,https://www.peters.senate.gov/,https://bioguide.congress.gov/search/bio/P000595,,
"Portman, Robert",80,Ohio,OH,0,3,0.548120690430407,01/05/2011,12/31/2022,11.9945205479452,1,117,58.3,36.9,2016,https://twitter.com/senrobportman,senrobportman,N/A,N/A,12/19/1955,0,White,8,J.D.; University of Michigan; 1985,2,N/A,https://bioguide.congress.gov/search/bio/P000449,,
"Reed, John F.",81,Rhode Island,RI,1,2,0.145861826443275,01/07/1997,12/31/2022,25.9972602739726,0,117,66.6,33.4,2020,https://twitter.com/SenJackReed,SenJackReed,N/A,N/A,11/12/1949,0,White,8,J.D.; Harvard University; 1982,2,https://www.reed.senate.gov/,https://bioguide.congress.gov/search/bio/R000122,,
"Risch, James E.",82,Idaho,ID,0,2,0.82910906209038,01/06/2009,12/31/2022,13.9917808219178,0,117,62.6,33.2,2020,https://twitter.com/SenatorRisch,SenatorRisch,N/A,N/A,05/03/1943,0,White,8,J.D.; University of Idaho; 1968,2,https://www.risch.senate.gov/,https://bioguide.congress.gov/search/bio/R000584,,
"Romney, Mitt",83,Utah,UT,0,1,0.596688837978771,01/03/2019,12/31/2022,3.99452054794521,0,117,62.6,30.9,2018,https://twitter.com/SenatorRomney,SenatorRomney,https://twitter.com/mittromney,mittromney,03/12/1947,0,White,7,M.B.A.; Harvard Business School; 1975,1,https://www.romney.senate.gov/,https://bioguide.congress.gov/search/bio/R000615,,
"Rosen, Jacky",84,Nevada,NV,1,1,0.308548351377894,01/03/2019,12/31/2022,3.99452054794521,0,117,50.4,45.4,2018,https://twitter.com/SenJackyRosen,SenJackyRosen,https://twitter.com/RosenforNevada,RosenforNevada,08/02/1957,1,White,6,B.A.; Psychology; University of Minnesota; 1979,1,https://www.rosen.senate.gov/,https://bioguide.congress.gov/search/bio/R000608,,
"Rounds, Mike",85,South Dakota,SD,0,2,0.784008560585577,01/06/2015,12/31/2022,7.98904109589041,0,117,65.7,34.3,2020,https://twitter.com/SenatorRounds,SenatorRounds,N/A,N/A,10/24/1954,0,White,6,B.S.; Political Science; South Dakota State University; 1977,1,https://www.rounds.senate.gov/,https://bioguide.congress.gov/search/bio/R000605,,
"Rubio, Marco",86,Florida,FL,0,3,0.831181764071725,01/05/2011,12/31/2022,11.9945205479452,0,117,52,44.3,2016,https://twitter.com/senmarcorubio,senmarcorubio,https://twitter.com/marcorubio,marcorubio,05/28/1971,0,Hispanic,8,J.D.; University of Miami; 1996,2,https://www.rubio.senate.gov/,https://bioguide.congress.gov/search/bio/R000595,,
"Sanders, Bernard",87,Vermont,VT,2,1,0,01/04/2007,12/31/2022,16,0,117,67.4,27.5,2018,https://twitter.com/SenSanders,SenSanders,https://twitter.com/BernieSanders,BernieSanders,09/08/1941,0,White,6,B.A.; Political Science; University of Chicago; 1964,0,https://www.sanders.senate.gov/,https://bioguide.congress.gov/search/bio/S000033,,
"Sasse, Benjamin",88,Nebraska,NE,0,2,0.684229649213868,01/06/2015,12/31/2022,7.98904109589041,1,117,62.7,24.4,2020,https://twitter.com/sensasse,sensasse,https://twitter.com/BenSasse,BenSasse,02/22/1972,0,White,8,PhD in History; Yale University; 2004,5,N/A,https://bioguide.congress.gov/search/bio/S001197,,
"Schatz, Brian",89,Hawaii ,HI,1,3,0.213250458593456,12/27/2012,12/31/2022,10.0164383561644,0,117,73.6,22.2,2016,https://twitter.com/brianschatz,brianschatz,https://twitter.com/SenBrianSchatz,SenBrianSchatz,10/20/1972,0,White,6,B.A.; Philosophy; Pomona College; 1994,5,https://www.schatz.senate.gov/,https://bioguide.congress.gov/search/bio/S001194,,
"Schumer, Charles E.",90,New York,NY,1,3,0.239789022209428,01/06/1999,12/31/2022,24,0,117,70.4,27.4,2016,https://twitter.com/SenSchumer,SenSchumer,https://twitter.com/chuckschumer,chuckschumer,11/23/1950,0,White,8,J.D.; Harvard University; 1974,2,https://www.schumer.senate.gov/,https://bioguide.congress.gov/search/bio/S000148 ,,
"Scott, Rick",91,Florida,FL,0,1,1,01/08/2019,12/31/2022,3.98082191780822,0,117,50.1,49.9,2018,https://twitter.com/SenRickScott,SenRickScott,https://twitter.com/scottforflorida,scottforflorida,12/01/1952,0,White,8,J.D.; Southern Methodist University; 1978,2,https://www.rickscott.senate.gov/,https://bioguide.congress.gov/search/bio/S001217,,
"Scott, Tim",92,South Carolina,SC,0,3,0.781356077518849,01/03/2013,12/31/2022,9.9972602739726,0,117,60.6,37,2016,https://twitter.com/SenatorTimScott,SenatorTimScott,https://twitter.com/votetimscott,votetimscott,09/19/1965,0,African-American,6,B.S.; Political Science; Charleston Southern University; 1988 ,1,https://www.scott.senate.gov/,https://bioguide.congress.gov/search/bio/S001184,,
"Shaheen, Jeanne",93,New Hampshire,NH,1,2,0.2925665319541,01/06/2009,12/31/2022,13.9917808219178,0,117,56.6,41,2020,https://twitter.com/SenatorShaheen,SenatorShaheen,https://twitter.com/JeanneShaheen,JeanneShaheen,01/28/1947,1,White,7,M.S.S.; University of Mississippi; 1973,5,https://www.shaheen.senate.gov/,https://bioguide.congress.gov/search/bio/S001181,,
"Shelby, Richard",94,Alabama,AL,0,3,0.577739000839365,01/06/1987,12/31/2022,36.0082191780822,1,117,64.2,35.8,2016,https://twitter.com/SenShelby,SenShelby,N/A,N/A,05/06/1934,0,White,6,LL.B.; University of Alabama; 1963,2,N/A,https://bioguide.congress.gov/search/bio/S000320,,
"Sinema, Kyrsten",95,Arizona,AZ,2,1,0.500967034663567,01/03/2019,12/31/2022,3.99452054794521,0,117,50,47.6,2018,https://twitter.com/SenatorSinema,SenatorSinema,https://twitter.com/kyrstensinema,kyrstensinema,07/12/1976,1,White,8,PhD in Justice Studies; Arizona State University; 2012,2,https://www.sinema.senate.gov/,https://bioguide.congress.gov/search/bio/S001191,,
"Smith, Tina",96,Minnesota,MN,1,2,0.0756533259297989,01/03/2018,12/31/2022,4.99452054794521,0,117,48.8,43.5,2020,https://twitter.com/SenTinaSmith,SenTinaSmith,https://twitter.com/TinaSmithMN,TinaSmithMN,03/04/1958,1,White,7,M.B.A. Dartmouth College; 1984,1,https://www.smith.senate.gov/,https://bioguide.congress.gov/search/bio/S001203,,
"Stabenow, Debbie",97,Michigan,MI,1,1,0.221949395648287,01/03/2001,12/31/2022,22.0054794520548,0,117,52.3,45.8,2018,https://twitter.com/SenStabenow,SenStabenow,https://twitter.com/stabenow,stabenow,04/29/1950,1,White,7,M.S.W.; Michigan State University; 1975,5,https://www.stabenow.senate.gov/,https://bioguide.congress.gov/search/bio/S000770,,
"Sullivan, Dan",98,Alaska,AK,0,2,0.652100683642255,01/06/2015,12/31/2022,7.98904109589041,0,117,53.9,41.2,2020,https://twitter.com/SenDanSullivan,SenDanSullivan,N/A,N/A,11/13/1964,0,White,8,J.D.; Georgetown University; 1993,2,https://www.sullivan.senate.gov/,https://bioguide.congress.gov/search/bio/S001198,,
"Tester, Jon",99,Montana,MT,1,1,0.377646486433112,01/04/2007,12/31/2022,16,0,117,50.3,46.8,2018,https://twitter.com/SenatorTester,SenatorTester,https://twitter.com/jontester,jontester,08/21/1956,0,White,6,B.A.; Music; University of Providence; 1978,10,https://www.tester.senate.gov/,https://bioguide.congress.gov/search/bio/T000464 ,,
"Thune, John",100,South Dakota,SD,0,3,0.795060855902239,01/04/2005,12/31/2022,18,0,117,71.8,28.2,2016,https://twitter.com/SenJohnThune,SenJohnThune,https://twitter.com/johnthune,johnthune,01/07/1961,0,White,7,M.B.A.; University of South Dakota; 1984,1,https://www.thune.senate.gov/,https://bioguide.congress.gov/search/bio/T000250 ,,
"Tillis, Thom",101,North Carolina,NC,0,2,0.819146177750934,01/06/2015,12/31/2022,7.98904109589041,0,117,48.7,46.9,2020,https://twitter.com/SenThomTillis,SenThomTillis,https://twitter.com/ThomTillis,ThomTillis,08/30/1960,0,White,6,B.S.; Technology Management; University of Maryland; 1996,1,https://www.tillis.senate.gov/,https://bioguide.congress.gov/search/bio/T000476 ,,
"Toomey, Patrick",102,Pennsylvania,PA,0,3,0.607637714921737,01/05/2011,12/31/2022,11.9945205479452,1,117,48.9,47.2,2016,https://twitter.com/SenToomey,SenToomey,https://twitter.com/pattoomey,pattoomey,11/17/1961,0,White,6,A.B.; Government; Harvard College; 1984,1,N/A,https://bioguide.congress.gov/search/bio/T000461 ,,
"Tuberville, Tommy",103,Alabama,AL,0,2,0.808701355452043,01/03/2021,12/31/2022,1.99178082191781,0,117,60.1,39.7,2020,https://twitter.com/SenTuberville,SenTuberville,https://twitter.com/TTuberville,TTuberville,09/18/1954,0,White,6,"B.S., physical education, Southern Arkansas University, 1976",5,https://www.tuberville.senate.gov/,https://bioguide.congress.gov/search/bio/T000278 ,,
"Van Hollen, Chris",104,Maryland,MD,1,3,0.117646768842011,01/03/2017,12/31/2022,5.99452054794521,0,117,60.4,36.4,2016,https://twitter.com/ChrisVanHollen,ChrisVanHollen,N/A,N/A,01/10/1959,0,White,8,J.D.; Georgetown university; 1990,2,https://www.vanhollen.senate.gov/,https://bioguide.congress.gov/search/bio/V000128,,
"Warner, Mark R.",105,Virginia,VA,1,2,0.33022168507113,01/06/2009,12/31/2022,13.9917808219178,0,117,56,44,2020,https://twitter.com/SenatorWarner,SenatorWarner,https://twitter.com/MarkWarner,MarkWarner,12/15/1954,0,White,8,J.D.; Harvard Law School; 1980,1,https://www.warner.senate.gov/,https://bioguide.congress.gov/search/bio/W000805 ,,
"Warnock, Raphael G.",106,Georgia,GA,1,3,0.464158242867696,01/20/2021,12/31/2022,1.94520547945205,0,117,51,49,2020,https://twitter.com/SenatorWarnock,SenatorWarnock,https://twitter.com/ReverendWarnock,ReverendWarnock,07/23/1969,0,African-American,8,PhD in Philosophy; Union Theological Seminary; ,8,https://www.warnock.senate.gov/,https://bioguide.congress.gov/search/bio/W000790,,
"Warren, Elizabeth",107,Massachusetts,MA,1,1,0.0583875007437665,01/03/2013,12/31/2022,9.9972602739726,0,117,60.4,36.2,2018,https://twitter.com/SenWarren,SenWarren,https://twitter.com/ewarren,ewarren,06/22/1949,1,White,8,J.D.; Rutgers University; 1976,2,https://www.warren.senate.gov/,https://bioguide.congress.gov/search/bio/W000817 ,,
"Whitehouse, Sheldon",108,Rhode Island,RI,1,1,0.124737669119195,01/04/2007,12/31/2022,16,0,117,61.6,38.4,2018,https://twitter.com/SenWhitehouse,SenWhitehouse,N/A,N/A,10/20/1955,0,White,8,J.D.; University of Virginia; 1982,2,https://www.whitehouse.senate.gov/,https://bioguide.congress.gov/search/bio/W000802,,
"Wicker, Roger F.",109,Mississippi,MS,0,1,0.763788502839721,12/31/2007,12/31/2022,15.0109589041096,0,117,58.5,39.5,2018,https://twitter.com/SenatorWicker,SenatorWicker,https://twitter.com/RogerWicker,RogerWicker,07/05/1951,0,White,8,J.D.; University of Mississippi; 1975,2,https://www.wicker.senate.gov/,https://bioguide.congress.gov/search/bio/W000437,,
"Wyden, Ron",110,Oregon,OR,1,3,0.0591413132623803,02/05/1996,12/31/2022,26.9205479452055,0,117,56.7,33.6,2016,https://twitter.com/RonWyden,RonWyden,N/A,N/A,05/03/1949,0,White,8,J.D.; University of Oregon; 1974,2,https://www.wyden.senate.gov/,https://bioguide.congress.gov/search/bio/W000779,,
"Young, Todd",111,Indiana,IN,0,3,0.677696674158218,01/05/2011,12/31/2022,11.9945205479452,1,117,52.1,42.4,2016,https://twitter.com/SenToddYoung,SenToddYoung,https://twitter.com/ToddYoungIN,ToddYoungIN,08/24/1972,0,White,8,J.D.; Robert H. McKinney; 2006,2,https://www.young.senate.gov/,https://bioguide.congress.gov/search/bio/Y000064,,
"Jones, Gordon Douglas",5,Alabama,AL,1,2,0.632885678298333,01/03/2018,01/03/2021,3.0027397260274,1,116,49.9,48.4,2017,https://twitter.com/DougJones,DougJones,N/A,N/A,05/04/1954,0,White,8,"J.D.; Samford University, Cumberland School of Law; 1979",2,N/A,https://bioguide.congress.gov/search/bio/J000300/,special election to replace Jeff Sessions,
"Loeffler, Kelly",6,Georgia,GA,0,2,0.904293903291947,01/06/2020,01/20/2021,1.04109589041096,1,116,N/A,N/A,*,https://twitter.com/KLoeffler,KLoeffler,https://twitter.com/senatorloeffler,senatorloeffler,11/27/1970,1,White,7,M.B.A.; Internationla Finance and Marketing; DePaul University Chicago; 1999,1,N/A,https://bioguide.congress.gov/search/bio/L000594,Appointed in 2019 after the resignation of Johnny Isakson but lost the 2020 election,
"McSally, Martha",7,Arizona,AZ,0,2,*,01/03/2015,01/03/2019,1,1,116,N/A,N/A,*,https://twitter.com/MarthaMcSallyAZ,MarthaMcSallyAZ,https://twitter.com/marthamcsally,marthamcsally,03/22/1966,1,White,7,M.P.P.; John F. Kennedy School of Government,3,N/A,https://bioguide.congress.gov/search/bio/M001197,(left office Dec 2 2020),appointed in 2018 after death of John McCain but lot 2020 election
"Perdue, David",8,Georgia,GA,0,2,0.914979462126755,01/06/2015,01/03/2021,5.9972602739726,1,116,53,45.1,2014,https://twitter.com/DavidPerdueGA,DavidPerdueGA,https://twitter.com/sendavidperdue,sendavidperdue,12/10/1949,0,White,7,M.S.; Georgia Institute of Technology; 1976,1,N/A,https://bioguide.congress.gov/search/bio/P000612,,
"Roberts, Charles Patrick",9,Kansas,KS,0,2,0.822995787870405,01/07/1997,01/03/2021,24.0054794520548,1,116,53.3,42.5,2014,https://twitter.com/SenPatRoberts,SenPatRoberts,https://twitter.com/PatRoberts,PatRoberts,04/20/1936,0,White,6,"B.A.; Kansas State university, Manhattan; 1958",7,N/A,https://bioguide.congress.gov/search/bio/R000307,,
"Udall, Tom",10,New Mexico,NM,1,2,0.259828450248573,01/06/2009,01/03/2021,12,1,116,55.4,44.6,2014,https://twitter.com/SenatorTomUdall,SenatorTomUdall,https://twitter.com/tomudall,tomudall,05/18/1948,0,White,8,"J.D.; University of New Mexico School of Law, Albuquerque, N.M.; 1977",2,N/A,https://bioguide.congress.gov/search/bio/U000039,,
"Baldwin, Tammy",11,Wisconsin,WI,1,1,0.176999238019796,01/03/2013,12/31/2022,9.9972602739726,0,117,55.4,44.6,2018,https://twitter.com/SenatorBaldwin,SenatorBaldwin,https://twitter.com/tammybaldwin,tammybaldwin,02/11/1962,1,White,8,"J.D.; University of Wisconsin, Madison; 1989",2,https://www.baldwin.senate.gov/,https://bioguide.congress.gov/search/bio/B001230,,
"Barrasso, John",12,Wyoming,WY,0,1,0.817902617377421,06/22/2007,12/31/2022,15.5369863013699,0,117,67.1,30.1,2018,https://twitter.com/SenJohnBarrasso,SenJohnBarrasso,https://twitter.com/barrassoforwyo,barrassoforwyo,07/21/1952,0,White,7,M.D.; Georgetown University School of Medicine; 1978,6,https://www.barrasso.senate.gov/,https://bioguide.congress.gov/search/bio/B001261,,
"Bennet, Michael F.",13,Colorado,CO,1,3,0.248044568735702,01/21/2009,12/31/2022,13.9506849315069,0,117,49.1,45.4,2016,https://twitter.com/SenatorBennet,SenatorBennet,https://twitter.com/michaelbennet,michaelbennet,11/28/1964,0,White,8,J.D.; Yale Law School; 1993,2,https://www.bennet.senate.gov/,https://bioguide.congress.gov/search/bio/B001267,,
"Blackburn, Marsha",14,Tennessee,TN,0,1,0.93228239890635,01/03/2019,12/31/2022,3.99452054794521,0,117,54.7,43.9,2018,https://twitter.com/MarshaBlackburn,MarshaBlackburn,N/A,N/A,06/06/1952,1,White,6,"B.S.; Home Economics; Mississippi State University, Starkville; 1973",1,https://www.blackburn.senate.gov/,https://bioguide.congress.gov/search/bio/B001243,,
"Blumenthal, Richard",15,Connecticut,CT,1,3,0.0310655954121906,01/03/2010,12/31/2022,13,0,117,62.9,34.9,2016,https://twitter.com/SenBlumenthal,SenBlumenthal,N/A,N/A,02/13/1946,0,White,8,J.D.; Yale University; 1973,2,https://www.blumenthal.senate.gov/,https://bioguide.congress.gov/search/bio/B001277,,
"Blunt, Roy",16,Missouri,MO,0,3,0.584409139223541,01/03/2011,12/31/2022,12,1,117,49.4,46.2,2016,https://twitter.com/RoyBlunt,RoyBlunt,N/A,N/A,01/10/1950,0,White,7,"M.A.; Missouri State University ,Springfield; 1972",5,N/A,https://bioguide.congress.gov/search/bio/B000575,,
"Booker, Cory A.",17,New Jersey,NJ,1,2,0.0455802980872292,10/31/2013,12/31/2022,12,0,117,57.2,40.9,2020,https://twitter.com/senbooker,senbooker,https://twitter.com/CoryBooker,CoryBooker,04/27/1969,0,African-American; Asian-American,8,J.D.; Yale Law School; 1997,2,https://www.booker.senate.gov/,https://bioguide.congress.gov/search/bio/B001288,,
"Boozman, John",18,Arkansas,AR,0,3,0.768699282926499,01/05/2011,12/31/2022,11.9945205479452,0,117,59.8,36.2,2016,https://twitter.com/JohnBoozman,JohnBoozman,N/A,N/A,12/10/1950,0,White,6,Southern College of Optometry; 1977,6,https://www.boozman.senate.gov/,https://bioguide.congress.gov/search/bio/B001236,,
"Braun, Michael",19,Indiana,IN,0,1,0.98106874319906,01/03/2019,12/31/2022,3.99452054794521,0,117,50.9,45,2018,https://twitter.com/SenatorBraun,SenatorBraun,N/A,N/A,03/24/1954,0,White,7,M.B.A.; Harvard Business School; 1978,1,https://www.braun.senate.gov/,https://bioguide.congress.gov/search/bio/B001310,,
"Brown, Sherrod",20,Ohio,OH,1,1,0.0923940264109351,01/04/2007,12/31/2022,16,0,117,53.4,46.6,2018,https://twitter.com/SenSherrodBrown,SenSherrodBrown,https://twitter.com/SherrodBrown,SherrodBrown,11/09/1952,0,White,7,M.a.; Education; Ohio State University; 1981,5,https://www.brown.senate.gov/,https://bioguide.congress.gov/search/bio/B000944,,
"Burr, Richard",21,North Carolina,NC,0,3,0.605472891780936,01/03/2001,12/31/2022,22.0054794520548,1,117,51.1,45.3,2016,https://twitter.com/SenatorBurr,SenatorBurr,N/A,N/A,11/30/1955,0,White,6,B.A.; Communications; Wake Forest University; 1978,1,N/A,https://bioguide.congress.gov/search/bio/B001135,,
"Cantwell, Maria",22,Washington,WA,1,1,0.216591445478212,01/03/2001,12/31/2022,22.0054794520548,0,117,58.4,41.6,2018,https://twitter.com/SenatorCantwell,SenatorCantwell,N/A,N/A,10/13/1958,1,White,6,B.A.; Public Administration; Miami University of Ohio; 1980,1,https://www.cantwell.senate.gov/,https://bioguide.congress.gov/search/bio/C000127,,
"Capito, Shelley Moore",23,West Virginia,WV,0,2,0.61478303011512,01/06/2015,12/31/2022,7.98904109589041,0,117,70.3,27,2020,https://twitter.com/SenCapito,SenCapito,N/A,N/A,11/26/1953,1,White,7,M. Ed.; University of Virginia; 1976,5,https://www.capito.senate.gov/,https://bioguide.congress.gov/search/bio/C001047,,
"Cardin, Benjamin L.",24,Maryland,MD,1,1,0.1994990268606,01/04/2007,12/31/2022,16,0,117,64.9,30.3,2018,https://twitter.com/SenatorCardin,SenatorCardin,N/A,N/A,10/05/1943,0,White,8,J.D.; University of Maryland; 1967,2,https://www.cardin.senate.gov/,https://bioguide.congress.gov/search/bio/C000141,,
"Carper, Thomas R.",25,Delaware,DE,1,1,0.309479384969288,01/03/2001,12/31/2022,22.0054794520548,0,117,60,37.8,2018,https://twitter.com/SenatorCarper,SenatorCarper,N/A,N/A,01/23/1947,0,White,7,M.B.A.; University of Delaware; 1975,3,https://www.carper.senate.gov/,https://bioguide.congress.gov/search/bio/C000174,,
"Casey, Robert P., Jr.",26,Pennsylvania,PA,1,1,0.171897216341815,01/04/2007,12/31/2022,16,0,117,55.7,42.6,2018,https://twitter.com/SenBobCasey,SenBobCasey,https://twitter.com/Bob_Casey,Bob_Casey,04/13/1960,0,White,8,J.D.; Catholic University of America; 1988,2,https://www.casey.senate.gov/,https://bioguide.congress.gov/search/bio/C001070,,
"Cassidy, Bill",27,Louisiana,LA,0,2,0.682348710788942,01/06/2015,12/31/2022,7.98904109589041,0,117,59.3,19,2020,https://twitter.com/SenBillCassidy,SenBillCassidy,https://twitter.com/BillCassidy,BillCassidy,09/28/1957,0,White,7,M.D.; Louisiana State University; 1979,6,https://www.cassidy.senate.gov/,https://bioguide.congress.gov/search/bio/C001075,,
"Collins, Susan M.",28,Maine,ME,0,2,0.448622425849401,01/07/1997,12/31/2022,25.9972602739726,0,117,51,42.4,2020,https://twitter.com/SenatorCollins,SenatorCollins,N/A,N/A,12/07/1952,1,White,6,Bachelor in Government; St. Lawrence University; 1975,0,https://www.collins.senate.gov/,https://bioguide.congress.gov/search/bio/C001035,,
"Coons, Christopher A.",29,Delaware,DE,1,2,0.338422715351401,11/15/2010,12/31/2022,12.1342465753425,0,117,59.4,37.9,2020,https://twitter.com/ChrisCoons,ChrisCoons,N/A,N/A,09/09/1963,0,White,8,J.D.; Yale Law School; 1992,2,https://www.coons.senate.gov/,https://bioguide.congress.gov/search/bio/C001088,,
"Cornyn, John",30,Texas,TX,0,2,0.772226738391321,11/30/2002,12/31/2022,20.0986301369863,0,117,53.5,43.9,2020,https://twitter.com/JohnCornyn,JohnCornyn,N/A,N/A,02/02/1952,0,White,8,J.D.; St. Marys School of Law; 1977,2,https://www.cornyn.senate.gov/,https://bioguide.congress.gov/search/bio/C001056,,
"Cortez Masto, Catherine",31,Nevada,NV,1,3,0.236574567369409,01/03/2017,12/31/2022,5.99452054794521,0,117,47.1,44.7,2016,https://twitter.com/SenCortezMasto,SenCortezMasto,https://twitter.com/CortezMasto,CortezMasto,03/29/1964,1,Hispanic; White,8,J.D.; Gonzaga University School of Law; 1990,2,https://www.cortezmasto.senate.gov/,https://bioguide.congress.gov/search/bio/C001113,,
"Cotton, Tom",32,Arkansas,AR,0,2,0.876390364042756,01/06/2015,12/31/2022,7.98904109589041,0,117,66.5,33.5,2020,https://twitter.com/SenTomCotton,SenTomCotton,https://twitter.com/TomCottonAR,TomCottonAR,05/13/1977,0,White,8,J.D.; Harvard University; 2002,2,https://www.cotton.senate.gov/,https://bioguide.congress.gov/search/bio/C001095,,
"Cramer, Kevin",33,North Dakota,ND,0,1,0.910896298032277,01/03/2019,12/31/2022,3.99452054794521,0,117,55.5,44.5,2018,https://twitter.com/SenKevinCramer,SenKevinCramer,https://twitter.com/kevincramer,kevincramer,01/21/1961,0,White,7,M.A.; Management; University o fMary; 2003,0,https://www.cramer.senate.gov/,https://bioguide.congress.gov/search/bio/C001096,,
"Crapo, Michael",34,Idaho,ID,0,3,0.823331951918519,01/06/1999,12/31/2022,24,0,117,66.1,27.8,2016,https://twitter.com/MikeCrapo,MikeCrapo,N/A,N/A,05/20/1951,0,White,8,J.D.; Harvard University; 1977,2,https://www.crapo.senate.gov/,https://bioguide.congress.gov/search/bio/C000880,,
"Cruz, Ted",35,Texas,TX,0,1,0.944056385174951,01/03/2013,12/31/2022,9.9972602739726,0,117,50.9,48.3,2018,https://twitter.com/SenTedCruz,SenTedCruz,https://twitter.com/tedcruz,tedcruz,12/22/1970,0,Hispanic; White,8,J.D.; Harvard University; 1995,2,https://www.cruz.senate.gov/,https://bioguide.congress.gov/search/bio/C001098,,
"Daines, Steve",36,Montana,MT,0,2,0.859322244752884,01/06/2015,12/31/2022,7.98904109589041,0,117,55,45,2020,https://twitter.com/SteveDaines,SteveDaines,N/A,N/A,08/20/1962,0,White,6,B.S.; Chemical Engineering; Montana State University; 1984,1,https://www.daines.senate.gov/,https://bioguide.congress.gov/search/bio/D000618,,
"Duckworth, Tammy",37,Illinois,IL,1,3,0.0944404184553066,01/03/2017,12/31/2022,5.99452054794521,0,117,54.4,40.2,2016,https://twitter.com/SenDuckworth,SenDuckworth,https://twitter.com/tammyduckworth,tammyduckworth,03/12/1968,1,Asian; White,8,PhD in human services; Capella University School of Public Service Leadership; 2015,3,https://www.duckworth.senate.gov/,https://bioguide.congress.gov/search/bio/D000622,,
"Durbin, Richard J.",38,Illinois,IL,1,2,0.0855733771029607,01/07/1997,12/31/2022,25.9972602739726,0,117,54.9,38.9,2020,https://twitter.com/SenatorDurbin,SenatorDurbin,https://twitter.com/DickDurbin,DickDurbin,11/21/1944,0,White,8,J.D.; Georgetown University; 1969,2,https://www.durbin.senate.gov/,https://bioguide.congress.gov/search/bio/D000563,,
"Ernst, Joni",39,Iowa,IA,0,2,0.826265400967212,01/06/2015,12/31/2022,7.98904109589041,0,117,51.8,45.2,2020,https://twitter.com/SenJoniErnst,SenJoniErnst,https://twitter.com/joniernst,joniernst,07/01/1970,1,White,7,M.P.A.; Columbus State University; 1995,3,https://www.ernst.senate.gov/,https://bioguide.congress.gov/search/bio/E000295,,
"Feinstein, Dianne",40,California,CA,1,1,0.150865658191444,11/10/1992,12/31/2022,30.158904109589,0,117,54.2,45.8,2018,https://twitter.com/SenFeinstein,SenFeinstein,https://twitter.com/DianneFeinstein,DianneFeinstein,06/22/1933,1,White,6,B.A.; History; Stanford University; 1955,0,https://www.feinstein.senate.gov/public/,https://bioguide.congress.gov/search/bio/F000062,,
"Fischer, Debra",41,Nebraska,NE,0,1,0.688576408222131,01/03/2013,12/31/2022,9.9972602739726,0,117,57.7,38.6,2018,https://twitter.com/SenatorFischer,SenatorFischer,N/A,N/A,03/01/1951,1,White,6,B.S.; Education; University of Nebraska; 1988,0,https://www.fischer.senate.gov/,https://bioguide.congress.gov/search/bio/F000463,,
"Gillibrand, Kirsten E.",42,New York,NY,1,1,0.12072202063417,01/27/2009,12/31/2022,13.9342465753425,0,117,67,33,2018,https://twitter.com/SenGillibrand,SenGillibrand,https://twitter.com/gillibrandny,gillibrandny,12/09/1966,1,White,8,J.D.; University of California; 1991,2,https://www.gillibrand.senate.gov/,https://bioguide.congress.gov/search/bio/G000555,,
"Graham, Lindsey",43,South Carolina,SC,0,2,0.619070797359753,01/07/2003,12/31/2022,19.9945205479452,0,117,54.5,44.2,2020,https://twitter.com/LindseyGrahamSC,LindseyGrahamSC,https://twitter.com/grahamblog,grahamblog,07/09/1955,0,White,8,J.D.; University of South Carolina; 1981,2,https://www.lgraham.senate.gov/,https://bioguide.congress.gov/search/bio/G000359 ,,
"Grassley, Chuck",44,Iowa,IA,0,3,0.670073592619545,01/05/1981,12/31/2022,42.013698630137,0,117,60.2,35.7,2016,https://twitter.com/ChuckGrassley,ChuckGrassley,N/A,N/A,09/17/1933,0,White,7,M.A.; Political Science; University of Northern Iowa; 1956,0,https://www.grassley.senate.gov/,https://bioguide.congress.gov/search/bio/G000386,,
"Hagerty, Bill",45,Tennessee,TN,0,2,0.857410027434407,01/03/2021,12/31/2022,1.99178082191781,0,117,62.2,35.2,2020,https://twitter.com/SenatorHagerty,SenatorHagerty,https://twitter.com/billhagertytn,billhagertytn,08/14/1959,0,White,8,J.D.; Vanderbilt Law School; 1984,0,https://www.hagerty.senate.gov/,https://bioguide.congress.gov/search/bio/H000601,,
"Hassan, Margaret Wood",46,New Hampshire,NH,1,3,0.43611907238278,01/03/2017,12/31/2022,5.99452054794521,0,117,48,47.9,2016,https://twitter.com/SenatorHassan,SenatorHassan,https://twitter.com/Maggie_Hassan,Maggie_Hassan,02/27/1958,1,White,8,J.D.; Northeastern University School of law; 1985,11,https://www.hassan.senate.gov/,https://bioguide.congress.gov/search/bio/H001076,,
"Hawley, Josh",47,Missouri,MO,0,1,0.864366195602263,01/03/2019,12/31/2022,3.99452054794521,0,117,51.4,45.6,2018,https://twitter.com/HawleyMO,HawleyMO,N/A,N/A,12/31/1979,0,White,8,J.D.; Yale Law School; 2006,2,https://www.hawley.senate.gov/,https://bioguide.congress.gov/search/bio/H001089,,
"Heinrich, Martin",48,New Mexico,NM,1,1,0.2007037353465,01/03/2013,12/31/2022,9.9972602739726,0,117,54.1,30.5,2018,https://twitter.com/MartinHeinrich,MartinHeinrich,https://twitter.com/senatorheinrich,senatorheinrich,10/17/1971,0,White,6,B.S.; Mechanical Engineering; University of Missouri; 1995,12,https://www.heinrich.senate.gov/,https://bioguide.congress.gov/search/bio/H001046,,
"Hickenlooper, John W.",49,Colorado,CO,1,2,0.335030323955882,01/03/2021,12/31/2022,1.99178082191781,0,117,53.5,44.2,2020,https://twitter.com/SenatorHick,SenatorHick,https://twitter.com/hickenlooper,hickenlooper,02/07/1952,0,White,7,M.A.; Geology; Wesleyan University; 1980,0,https://www.hickenlooper.senate.gov/,https://bioguide.congress.gov/search/bio/H000273,,
"Hirono, Mazie K.",50,Hawaii,HI,1,1,0.0715447123166643,01/03/2013,12/31/2022,9.9972602739726,0,117,71.2,28.8,2018,https://twitter.com/maziehirono,maziehirono,https://twitter.com/mazieforhawaii,mazieforhawaii,11/03/1947,1,Asian,8,J.D.; Georgetown University; 1978,0,https://www.hirono.senate.gov/,https://bioguide.congress.gov/search/bio/H001042,,
"Hoeven, John",51,North Dakota,ND,0,3,0.815683863264003,01/05/2011,12/31/2022,11.9945205479452,0,117,78.6,17,2016,https://twitter.com/SenJohnHoeven,SenJohnHoeven,N/A,N/A,03/13/1957,0,White,7,M.B.A.; Northwestern University; 1981,12,https://www.hoeven.senate.gov/,https://bioguide.congress.gov/search/bio/H001061,,
"Hyde-Smith, Cindy",52,Mississippi,MS,0,2,0.868059764299163,04/09/2018,12/31/2022,4.73150684931507,0,117,54.1,44.1,2020,https://twitter.com/SenHydeSmith,SenHydeSmith,https://twitter.com/cindyhydesmith,cindyhydesmith,05/10/1959,1,White,6,"B.A.; Criminal justice, political science; University of Southern Mississippi; 1981",0,https://www.hydesmith.senate.gov/,https://bioguide.congress.gov/search/bio/H001079 ,,
"Inhofe, James",53,Oklahoma,OK,0,2,0.880238318204784,11/17/1994,12/31/2022,28.1397260273973,1,117,62.9,32.8,2020,https://twitter.com/JimInhofe,JimInhofe,N/A,N/A,11/17/1934,0,White,6,B.A.; Economics; University of Tulsa; 1973,0,N/A,https://bioguide.congress.gov/search/bio/I000024 ,,
"Johnson, Ron",54,Wisconsin,WI,0,3,0.743401705863958,01/05/2011,12/31/2022,11.9945205479452,0,117,50.2,46.8,2016,https://twitter.com/SenRonJohnson,SenRonJohnson,https://twitter.com/ronjohnsonwi,ronjohnsonwi,04/08/1955,0,White,6,B.S.; Business and Accounting; University of Minnesota; 1977,4,https://www.ronjohnson.senate.gov/,https://bioguide.congress.gov/search/bio/J000293,,
"Kaine, Tim",55,Virginia,VA,1,1,0.203600708089391,01/03/2013,12/31/2022,9.9972602739726,0,117,57.1,41.1,2018,https://twitter.com/timkaine,timkaine,N/A,N/A,02/26/1958,0,White,8,J.D.; Harvard University; 1983,11,https://www.kaine.senate.gov/,https://bioguide.congress.gov/search/bio/K000384,,
"Kelly, Mark",56,Arizona,AZ,1,3,0.399793347847799,12/02/2020,12/31/2022,2.07945205479452,0,117,51.2,48.8,2020,https://twitter.com/SenMarkKelly,SenMarkKelly,https://twitter.com/CaptMarkKelly,CaptMarkKelly,02/21/1964,0,White,7,M.S.; Aeronautical Engineering; U.S. Naval Postgraduate School,3,https://www.kelly.senate.gov/,https://bioguide.congress.gov/search/bio/K000377,,
"Kennedy, John Neely",57,Louisiana,LA,0,3,0.785684351248518,01/03/2017,12/31/2022,5.99452054794521,0,117,60.7,39.3,2016,https://twitter.com/SenJohnKennedy,SenJohnKennedy,https://twitter.com/JohnKennedyLA,JohnKennedyLA,11/21/1951,0,White,8,J.D.; University of Virginia School of LAw; 1977,11,https://www.kennedy.senate.gov/,https://bioguide.congress.gov/search/bio/K000393,,
"King, Angus S., Jr.",58,Maine,ME,2,1,0.346033257048853,01/03/2013,12/31/2022,9.9972602739726,0,117,54.3,35.2,2018,https://twitter.com/SenAngusKing,SenAngusKing,N/A,N/A,03/31/1944,0,White,8,J.D.; University of Virginia; 1969,2,https://www.king.senate.gov/,https://bioguide.congress.gov/search/bio/K000383 ,,
"Klobuchar, Amy",59,Minnesota,MN,1,1,0.130504324943533,01/04/2007,12/31/2022,16,0,117,60.3,36.2,2018,https://twitter.com/SenAmyKlobuchar,SenAmyKlobuchar,https://twitter.com/amyklobuchar,amyklobuchar,05/25/1960,1,White,8,"J.D.; University of Chicago, 1985",2,https://www.klobuchar.senate.gov/,https://bioguide.congress.gov/search/bio/K000367 ,,
"Lankford, James",60,Oklahoma,OK,0,3,0.89992933687588,01/03/2015,12/31/2022,7.9972602739726,0,117,67.7,24.6,2016,https://twitter.com/SenatorLankford,SenatorLankford,https://twitter.com/jameslankford,jameslankford,03/04/1968,0,White,7,M.Div.; Southwestern Theological Baptist Seminary; 1994,5,https://www.lankford.senate.gov/,https://bioguide.congress.gov/search/bio/L000575,,
"Leahy, Patrick",61,Vermont,VT,1,3,0.144121081911654,01/14/1975,12/31/2022,47.9945205479452,1,117,61.3,33,2016,https://twitter.com/SenatorLeahy,SenatorLeahy,N/A,N/A,03/31/1940,0,White,8,J.D.; Georgetown University; 1964,2,N/A,https://bioguide.congress.gov/search/bio/L000174,,
"Lee, Mike",62,Utah,UT,0,3,0.753748787807473,01/05/2011,12/31/2022,11.9945205479452,0,117,68,27.4,2016,https://twitter.com/SenMikeLee,SenMikeLee,https://twitter.com/BasedMikeLee,BasedMikeLee,06/04/1971,0,White,8,J.D.; Brigham Young university; 1997,2,https://www.lee.senate.gov/,https://bioguide.congress.gov/search/bio/L000577,,
"Luján, Ben Ray",63,New Mexico,NM,1,2,0.174860888138848,01/03/2021,12/31/2022,1.99178082191781,0,117,51.7,45.6,2020,https://twitter.com/SenatorLujan,SenatorLujan,https://twitter.com/benraylujan,benraylujan,06/07/1972,0,Hispanic,6,B.B.A.; New Mexico Highlands University; 2007,0,https://www.lujan.senate.gov/,https://bioguide.congress.gov/search/bio/L000570 ,,
"Lummis, Cynthia M.",64,Wyoming,WY,0,2,0.893292958108508,01/03/2021,12/31/2022,1.99178082191781,0,117,73.1,26.9,2020,https://twitter.com/SenLummis,SenLummis,https://twitter.com/CynthiaMLummis,CynthiaMLummis,09/10/1954,1,White,8,"J.D.; University of Wyoming College of Law, Laramie, Wyo.; 1985",11,https://www.lummis.senate.gov/,https://bioguide.congress.gov/search/bio/L000571 ,,
"Manchin, Joe, III",65,West Virginia,WV,1,1,0.446686774398077,11/15/2010,12/31/2022,12.1342465753425,0,117,49.6,46.3,2018,https://twitter.com/Sen_JoeManchin,Sen_JoeManchin,https://twitter.com/JoeManchinWV,JoeManchinWV,08/24/1947,0,White,6,B.A.; Business Administration; West Virginia University; 1970,12,https://www.manchin.senate.gov/,https://bioguide.congress.gov/search/bio/M001183 ,,
"Markey, Edward J.",66,Massachusetts,MA,1,2,0.0139659683705929,07/16/2013,12/31/2022,9.46575342465753,0,117,66.2,33,2020,https://twitter.com/SenMarkey,SenMarkey,https://twitter.com/edmarkey,edmarkey,07/11/1946,0,White,8,J.D.; Boston College Law School; 1972,11,https://www.markey.senate.gov/,https://bioguide.congress.gov/search/bio/M000133,,
"Marshall, Roger",67,Kansas,KS,0,2,0.882124792228652,01/03/2021,12/31/2022,1.99178082191781,0,117,53.2,41.8,2020,https://twitter.com/SenatorMarshall,SenatorMarshall,https://twitter.com/RogerMarshallMD,RogerMarshallMD,08/09/1960,0,White,7,M.D.; University of Kansas School of Medicine; 1987,6,https://www.marshall.senate.gov/,https://bioguide.congress.gov/search/bio/M001198,,
"McConnell, Mitch",68,Kentucky,KY,0,2,0.599687533584357,01/03/1985,12/31/2022,38.0164383561644,0,117,57.8,38.2,2020,https://twitter.com/LeaderMcConnell,LeaderMcConnell,N/A,N/A,02/20/1942,0,White,8,J.D.; Kentucky Law School; 1967,11,https://www.mcconnell.senate.gov/,https://bioguide.congress.gov/search/bio/M000355,,
"Menendez, Robert",69,New Jersey,NJ,1,1,0.191515157461704,01/18/2006,12/31/2022,16.9616438356164,0,117,54,42.8,2018,https://twitter.com/SenatorMenendez,SenatorMenendez,N/A,N/A,01/01/1954,0,Hispanic,8,J.D.; Rutgers university of Law; 1979,11,https://www.menendez.senate.gov/,https://bioguide.congress.gov/search/bio/M000639,,
"Merkley, Jeff",70,Oregon,OR,1,2,0.0355414098997263,01/06/2009,12/31/2022,13.9917808219178,0,117,56.9,39.3,2020,https://twitter.com/SenJeffMerkley,SenJeffMerkley,https://twitter.com/jeffmerkley,jeffmerkley,10/24/1956,0,White,7,M.P.A.; Princeton University; 1982,0,https://www.merkley.senate.gov/,https://bioguide.congress.gov/search/bio/M001176,,
"Moran, Jerry",71,Kansas,KS,0,3,0.716270292467902,01/05/2011,12/31/2022,11.9945205479452,0,117,62.4,32.1,2016,https://twitter.com/JerryMoran,JerryMoran,N/A,N/A,05/29/1954,0,White,8,J.D.; Kansas University School of Law; 1981,11,https://www.moran.senate.gov/public/,https://bioguide.congress.gov/search/bio/M000934 ,,
"Murkowski, Lisa",72,Alaska,AK,0,3,0.473296745648617,12/20/2002,12/31/2022,20.0438356164384,0,117,44.3,29.5,2016,https://twitter.com/lisamurkowski,lisamurkowski,https://twitter.com/lisaforsenate,lisaforsenate,05/22/1957,1,White,8,J.D.; Willamette College of Law; 1985,2,https://www.murkowski.senate.gov/,https://bioguide.congress.gov/search/bio/M001153,,
"Murphy, Christopher",73,Connecticut,CT,1,1,0.152635018959264,01/03/2013,12/31/2022,9.9972602739726,0,117,59.5,39.4,2018,https://twitter.com/ChrisMurphyCT,ChrisMurphyCT,N/A,N/A,08/03/1973,0,White,8,J.D.; University of Connecticut; 2002,11,https://www.murphy.senate.gov/,https://bioguide.congress.gov/search/bio/M001169,,
"Murray, Patty",74,Washington,WA,1,3,0.142703588817088,01/05/1993,12/31/2022,30.0054794520548,0,117,59.1,40.9,2016,https://twitter.com/PattyMurray,PattyMurray,https://twitter.com/murraycampaign,murraycampaign,10/11/1950,1,White,6,B.A.; Physical Education; Washington State University; 1972,5,https://www.murray.senate.gov/,https://bioguide.congress.gov/search/bio/M001111,,
"Ossoff, Jon",75,Georgia,GA,1,2,0.303405364928085,01/20/2021,12/31/2022,1.94520547945205,0,117,50.6,49.4,2020,https://twitter.com/SenOssoff,SenOssoff,https://twitter.com/ossoff,ossoff,02/16/1987,0,White,7,M.S.; International Politicla Economy; London School of Economics; 2013,7,https://www.ossoff.senate.gov/,https://bioguide.congress.gov/search/bio/O000174,,
"Padilla, Alex",76,California,CA,1,3,0.0200324383981554,01/20/2021,12/31/2022,1.94520547945205,0,117,N/A,N/A,*,https://twitter.com/SenAlexPadilla,SenAlexPadilla,https://twitter.com/AlexPadilla4CA,AlexPadilla4CA,03/22/1973,0,Hispanic,6,B.S.; Mechanical Engineering; MIT; 1994,9,https://www.padilla.senate.gov/,https://bioguide.congress.gov/search/bio/P000145,appointed in 2020 to replace Kamala Harris ,
"Paul, Rand",77,Kentucky,KY,0,3,0.684883322748808,01/05/2011,12/31/2022,11.9945205479452,0,117,57.3,42.7,2016,https://twitter.com/senrandpaul,senrandpaul,https://twitter.com/RandPaul,RandPaul,01/07/1963,0,White,7,M.D.; Duke University; 1988,6,https://www.paul.senate.gov/,https://bioguide.congress.gov/search/bio/P000603,,
"Peters, Gary C.",78,Michigan,MI,1,2,0.355796587683312,01/06/2015,12/31/2022,7.98904109589041,0,117,49.9,48.2,2020,https://twitter.com/SenGaryPeters,SenGaryPeters,https://twitter.com/garypeters,garypeters,12/01/1958,0,White,8,J.D.; Wayne State University; 1989,2,https://www.peters.senate.gov/,https://bioguide.congress.gov/search/bio/P000595,,
"Portman, Robert",79,Ohio,OH,0,3,0.548120690430407,01/05/2011,12/31/2022,11.9945205479452,1,117,58.3,36.9,2016,https://twitter.com/senrobportman,senrobportman,N/A,N/A,12/19/1955,0,White,8,J.D.; University of Michigan; 1985,2,N/A,https://bioguide.congress.gov/search/bio/P000449,,
"Reed, John F.",80,Rhode Island,RI,1,2,0.145861826443275,01/07/1997,12/31/2022,25.9972602739726,0,117,66.6,33.4,2020,https://twitter.com/SenJackReed,SenJackReed,N/A,N/A,11/12/1949,0,White,8,J.D.; Harvard University; 1982,2,https://www.reed.senate.gov/,https://bioguide.congress.gov/search/bio/R000122,,
"Risch, James E.",81,Idaho,ID,0,2,0.82910906209038,01/06/2009,12/31/2022,13.9917808219178,0,117,62.6,33.2,2020,https://twitter.com/SenatorRisch,SenatorRisch,N/A,N/A,05/03/1943,0,White,8,J.D.; University of Idaho; 1968,2,https://www.risch.senate.gov/,https://bioguide.congress.gov/search/bio/R000584,,
"Romney, Mitt",82,Utah,UT,0,1,0.596688837978771,01/03/2019,12/31/2022,3.99452054794521,0,117,62.6,30.9,2018,https://twitter.com/SenatorRomney,SenatorRomney,https://twitter.com/mittromney,mittromney,03/12/1947,0,White,7,M.B.A.; Harvard Business School; 1975,1,https://www.romney.senate.gov/,https://bioguide.congress.gov/search/bio/R000615,,
"Rosen, Jacky",83,Nevada,NV,1,1,0.308548351377894,01/03/2019,12/31/2022,3.99452054794521,0,117,50.4,45.4,2018,https://twitter.com/SenJackyRosen,SenJackyRosen,https://twitter.com/RosenforNevada,RosenforNevada,08/02/1957,1,White,6,B.A.; Psychology; University of Minnesota; 1979,1,https://www.rosen.senate.gov/,https://bioguide.congress.gov/search/bio/R000608,,
"Rounds, Mike",84,South Dakota,SD,0,2,0.784008560585577,01/06/2015,12/31/2022,7.98904109589041,0,117,65.7,34.3,2020,https://twitter.com/SenatorRounds,SenatorRounds,N/A,N/A,10/24/1954,0,White,6,B.S.; Political Science; South Dakota State University; 1977,1,https://www.rounds.senate.gov/,https://bioguide.congress.gov/search/bio/R000605,,
"Rubio, Marco",85,Florida,FL,0,3,0.831181764071725,01/05/2011,12/31/2022,11.9945205479452,0,117,52,44.3,2016,https://twitter.com/senmarcorubio,senmarcorubio,https://twitter.com/marcorubio,marcorubio,05/28/1971,0,Hispanic,8,J.D.; University of Miami; 1996,2,https://www.rubio.senate.gov/,https://bioguide.congress.gov/search/bio/R000595,,
"Sanders, Bernard",86,Vermont,VT,2,1,0,01/04/2007,12/31/2022,16,0,117,67.4,27.5,2018,https://twitter.com/SenSanders,SenSanders,https://twitter.com/BernieSanders,BernieSanders,09/08/1941,0,White,6,B.A.; Political Science; University of Chicago; 1964,0,https://www.sanders.senate.gov/,https://bioguide.congress.gov/search/bio/S000033,,
"Sasse, Benjamin",87,Nebraska,NE,0,2,0.684229649213868,01/06/2015,12/31/2022,7.98904109589041,1,117,62.7,24.4,2020,https://twitter.com/sensasse,sensasse,https://twitter.com/BenSasse,BenSasse,02/22/1972,0,White,8,PhD in History; Yale University; 2004,5,N/A,https://bioguide.congress.gov/search/bio/S001197,,
"Schatz, Brian",88,Hawaii ,HI,1,3,0.213250458593456,12/27/2012,12/31/2022,10.0164383561644,0,117,73.6,22.2,2016,https://twitter.com/brianschatz,brianschatz,https://twitter.com/SenBrianSchatz,SenBrianSchatz,10/20/1972,0,White,6,B.A.; Philosophy; Pomona College; 1994,5,https://www.schatz.senate.gov/,https://bioguide.congress.gov/search/bio/S001194,,
"Schumer, Charles E.",89,New York,NY,1,3,0.239789022209428,01/06/1999,12/31/2022,24,0,117,70.4,27.4,2016,https://twitter.com/SenSchumer,SenSchumer,https://twitter.com/chuckschumer,chuckschumer,11/23/1950,0,White,8,J.D.; Harvard University; 1974,2,https://www.schumer.senate.gov/,https://bioguide.congress.gov/search/bio/S000148 ,,
"Scott, Rick",90,Florida,FL,0,1,1,01/08/2019,12/31/2022,3.98082191780822,0,117,50.1,49.9,2018,https://twitter.com/SenRickScott,SenRickScott,https://twitter.com/scottforflorida,scottforflorida,12/01/1952,0,White,8,J.D.; Southern Methodist University; 1978,2,https://www.rickscott.senate.gov/,https://bioguide.congress.gov/search/bio/S001217,,
"Scott, Tim",91,South Carolina,SC,0,3,0.781356077518849,01/03/2013,12/31/2022,9.9972602739726,0,117,60.6,37,2016,https://twitter.com/SenatorTimScott,SenatorTimScott,https://twitter.com/votetimscott,votetimscott,09/19/1965,0,African-American,6,B.S.; Political Science; Charleston Southern University; 1988 ,1,https://www.scott.senate.gov/,https://bioguide.congress.gov/search/bio/S001184,,
"Shaheen, Jeanne",92,New Hampshire,NH,1,2,0.2925665319541,01/06/2009,12/31/2022,13.9917808219178,0,117,56.6,41,2020,https://twitter.com/SenatorShaheen,SenatorShaheen,https://twitter.com/JeanneShaheen,JeanneShaheen,01/28/1947,1,White,7,M.S.S.; University of Mississippi; 1973,5,https://www.shaheen.senate.gov/,https://bioguide.congress.gov/search/bio/S001181,,
"Shelby, Richard",93,Alabama,AL,0,3,0.577739000839365,01/06/1987,12/31/2022,36.0082191780822,1,117,64.2,35.8,2016,https://twitter.com/SenShelby,SenShelby,N/A,N/A,05/06/1934,0,White,6,LL.B.; University of Alabama; 1963,2,N/A,https://bioguide.congress.gov/search/bio/S000320,,
"Sinema, Kyrsten",94,Arizona,AZ,2,1,0.500967034663567,01/03/2019,12/31/2022,3.99452054794521,0,117,50,47.6,2018,https://twitter.com/SenatorSinema,SenatorSinema,https://twitter.com/kyrstensinema,kyrstensinema,07/12/1976,1,White,8,PhD in Justice Studies; Arizona State University; 2012,2,https://www.sinema.senate.gov/,https://bioguide.congress.gov/search/bio/S001191,,
"Smith, Tina",95,Minnesota,MN,1,2,0.0756533259297989,01/03/2018,12/31/2022,4.99452054794521,0,117,48.8,43.5,2020,https://twitter.com/SenTinaSmith,SenTinaSmith,https://twitter.com/TinaSmithMN,TinaSmithMN,03/04/1958,1,White,7,M.B.A. Dartmouth College; 1984,1,https://www.smith.senate.gov/,https://bioguide.congress.gov/search/bio/S001203,,
"Stabenow, Debbie",96,Michigan,MI,1,1,0.221949395648287,01/03/2001,12/31/2022,22.0054794520548,0,117,52.3,45.8,2018,https://twitter.com/SenStabenow,SenStabenow,https://twitter.com/stabenow,stabenow,04/29/1950,1,White,7,M.S.W.; Michigan State University; 1975,5,https://www.stabenow.senate.gov/,https://bioguide.congress.gov/search/bio/S000770,,
"Sullivan, Dan",97,Alaska,AK,0,2,0.652100683642255,01/06/2015,12/31/2022,7.98904109589041,0,117,53.9,41.2,2020,https://twitter.com/SenDanSullivan,SenDanSullivan,N/A,N/A,11/13/1964,0,White,8,J.D.; Georgetown University; 1993,2,https://www.sullivan.senate.gov/,https://bioguide.congress.gov/search/bio/S001198,,
"Tester, Jon",98,Montana,MT,1,1,0.377646486433112,01/04/2007,12/31/2022,16,0,117,50.3,46.8,2018,https://twitter.com/SenatorTester,SenatorTester,https://twitter.com/jontester,jontester,08/21/1956,0,White,6,B.A.; Music; University of Providence; 1978,10,https://www.tester.senate.gov/,https://bioguide.congress.gov/search/bio/T000464 ,,
"Thune, John",99,South Dakota,SD,0,3,0.795060855902239,01/04/2005,12/31/2022,18,0,117,71.8,28.2,2016,https://twitter.com/SenJohnThune,SenJohnThune,https://twitter.com/johnthune,johnthune,01/07/1961,0,White,7,M.B.A.; University of South Dakota; 1984,1,https://www.thune.senate.gov/,https://bioguide.congress.gov/search/bio/T000250 ,,
"Tillis, Thom",100,North Carolina,NC,0,2,0.819146177750934,01/06/2015,12/31/2022,7.98904109589041,0,117,48.7,46.9,2020,https://twitter.com/SenThomTillis,SenThomTillis,https://twitter.com/ThomTillis,ThomTillis,08/30/1960,0,White,6,B.S.; Technology Management; University of Maryland; 1996,1,https://www.tillis.senate.gov/,https://bioguide.congress.gov/search/bio/T000476 ,,
"Toomey, Patrick",101,Pennsylvania,PA,0,3,0.607637714921737,01/05/2011,12/31/2022,11.9945205479452,1,117,48.9,47.2,2016,https://twitter.com/SenToomey,SenToomey,https://twitter.com/pattoomey,pattoomey,11/17/1961,0,White,6,A.B.; Government; Harvard College; 1984,1,N/A,https://bioguide.congress.gov/search/bio/T000461 ,,
"Tuberville, Tommy",102,Alabama,AL,0,2,0.808701355452043,01/03/2021,12/31/2022,1.99178082191781,0,117,60.1,39.7,2020,https://twitter.com/SenTuberville,SenTuberville,https://twitter.com/TTuberville,TTuberville,09/18/1954,0,White,6,"B.S., physical education, Southern Arkansas University, 1976",5,https://www.tuberville.senate.gov/,https://bioguide.congress.gov/search/bio/T000278 ,,
"Van Hollen, Chris",103,Maryland,MD,1,3,0.117646768842011,01/03/2017,12/31/2022,5.99452054794521,0,117,60.4,36.4,2016,https://twitter.com/ChrisVanHollen,ChrisVanHollen,N/A,N/A,01/10/1959,0,White,8,J.D.; Georgetown university; 1990,2,https://www.vanhollen.senate.gov/,https://bioguide.congress.gov/search/bio/V000128,,
"Warner, Mark R.",104,Virginia,VA,1,2,0.33022168507113,01/06/2009,12/31/2022,13.9917808219178,0,117,56,44,2020,https://twitter.com/SenatorWarner,SenatorWarner,https://twitter.com/MarkWarner,MarkWarner,12/15/1954,0,White,8,J.D.; Harvard Law School; 1980,1,https://www.warner.senate.gov/,https://bioguide.congress.gov/search/bio/W000805 ,,
"Warnock, Raphael G.",105,Georgia,GA,1,3,0.464158242867696,01/20/2021,12/31/2022,1.94520547945205,0,117,51,49,2020,https://twitter.com/SenatorWarnock,SenatorWarnock,https://twitter.com/ReverendWarnock,ReverendWarnock,07/23/1969,0,African-American,8,PhD in Philosophy; Union Theological Seminary; ,8,https://www.warnock.senate.gov/,https://bioguide.congress.gov/search/bio/W000790,,
"Warren, Elizabeth",106,Massachusetts,MA,1,1,0.0583875007437665,01/03/2013,12/31/2022,9.9972602739726,0,117,60.4,36.2,2018,https://twitter.com/SenWarren,SenWarren,https://twitter.com/ewarren,ewarren,06/22/1949,1,White,8,J.D.; Rutgers University; 1976,2,https://www.warren.senate.gov/,https://bioguide.congress.gov/search/bio/W000817 ,,
"Whitehouse, Sheldon",107,Rhode Island,RI,1,1,0.124737669119195,01/04/2007,12/31/2022,16,0,117,61.6,38.4,2018,https://twitter.com/SenWhitehouse,SenWhitehouse,N/A,N/A,10/20/1955,0,White,8,J.D.; University of Virginia; 1982,2,https://www.whitehouse.senate.gov/,https://bioguide.congress.gov/search/bio/W000802,,
"Wicker, Roger F.",108,Mississippi,MS,0,1,0.763788502839721,12/31/2007,12/31/2022,15.0109589041096,0,117,58.5,39.5,2018,https://twitter.com/SenatorWicker,SenatorWicker,https://twitter.com/RogerWicker,RogerWicker,07/05/1951,0,White,8,J.D.; University of Mississippi; 1975,2,https://www.wicker.senate.gov/,https://bioguide.congress.gov/search/bio/W000437,,
"Wyden, Ron",109,Oregon,OR,1,3,0.0591413132623803,02/05/1996,12/31/2022,26.9205479452055,0,117,56.7,33.6,2016,https://twitter.com/RonWyden,RonWyden,N/A,N/A,05/03/1949,0,White,8,J.D.; University of Oregon; 1974,2,https://www.wyden.senate.gov/,https://bioguide.congress.gov/search/bio/W000779,,
"Young, Todd",110,Indiana,IN,0,3,0.677696674158218,01/05/2011,12/31/2022,11.9945205479452,1,117,52.1,42.4,2016,https://twitter.com/SenToddYoung,SenToddYoung,https://twitter.com/ToddYoungIN,ToddYoungIN,08/24/1972,0,White,8,J.D.; Robert H. McKinney; 2006,2,https://www.young.senate.gov/,https://bioguide.congress.gov/search/bio/Y000064,,
1 name id state state_short party class ideology start_serving end_serving time_in_office not_in_office last_congress vote_share next_closest_share election_year twitter_url twitter_handle alt_account alt_handle date_of_birth female ethnicity edu_level edu_information occup_level website_url bioguide_link Comments_1 Comments_2
2 Alexander, Andrew L., Jr. 1 Tennessee TN 0 2 0.681815808318192 01/07/2003 01/03/2021 18.0027397260274 1 116 61.9 31.8 2014 https://twitter.com/SenAlexander SenAlexander https://twitter.com/LamarAlexander https://twitter.com/LamarAlexander LamarAlexander LamarAlexander 07/03/1940 0 White 8 J.D.; New York Univeristy; 1965 2 N/A https://bioguide.congress.gov/search/bio/A000360
3 Enzi, Mike 2 Wyoming WY 0 2 0.719285383539398 01/03/1997 01/03/2021 24 1 116 72.3 17.6 2014 https://twitter.com/senatorenzi?lang=zh-Hant https://twitter.com/senatorenzi SenatorEnzi senatorenzi N/A N/A 02/01/1944 0 White 7 M.B.A.; Retail Marketing; Denver University; 1968 4 N/A https://bioguide.congress.gov/search/bio/E000285
4 Gardner, Cory 3 Colorado CO 0 2 0.719285383539398 01/06/2015 01/03/2021 5.9972602739726 1 116 48.5 46 2014 https://twitter.com/CoryGardner CoryGardner https://twitter.com/corygardner corygardner 08/22/1974 0 White 8 J.D.; University of Colorado, Boulder; 2001 2 N/A https://bioguide.congress.gov/search/bio/G000562
5 Harris, Kamala 4 California CA 1 3 0.0213759569468058 01/03/2017 01/18/2021 4.04383561643836 1 116 62.4 37.6 2016 https://twitter.com/VP VP https://twitter.com/KamalaHarris KamalaHarris 10/20/1964 1 African-American; Asian-American 8 J.D.; University of California; 1989 2 N/A https://bioguide.congress.gov/search/bio/H001075 (became VP on jan 20 2021)
6 Isakson, John Jones, Gordon Douglas 5 Georgia Alabama GA AL 0 1 3 2 * 0.632885678298333 01/03/2005 01/03/2018 12/31/2019 01/03/2021 14 3.0027397260274 1 116 55 49.9 40.8 48.4 2016 2017 https://twitter.com/SenatorIsakson https://twitter.com/DougJones SenatorIsakson DougJones N/A N/A 12/28/1944 05/04/1954 0 White 6 8 University of Georgia, Athens; 1966 J.D.; Samford University, Cumberland School of Law; 1979 1 2 N/A https://bioguide.congress.gov/search/bio/I000055 https://bioguide.congress.gov/search/bio/J000300/ (died in 2019) special election to replace Jeff Sessions
7 Jones, Gordon Douglas Loeffler, Kelly 6 Alabama Georgia AL GA 1 0 2 0.632885678298333 0.904293903291947 01/03/2018 01/06/2020 01/03/2021 01/20/2021 3.0027397260274 1.04109589041096 1 116 49.9 N/A 48.4 N/A 2017 * https://twitter.com/DougJones https://twitter.com/KLoeffler DougJones KLoeffler N/A https://twitter.com/senatorloeffler N/A senatorloeffler 05/04/1954 11/27/1970 0 1 White 8 7 J.D.; Samford University, Cumberland School of Law; 1979 M.B.A.; Internationla Finance and Marketing; DePaul University Chicago; 1999 2 1 N/A https://bioguide.congress.gov/search/bio/J000300/ https://bioguide.congress.gov/search/bio/L000594 special election to replace Jeff Sessions Appointed in 2019 after the resignation of Johnny Isakson but lost the 2020 election
8 Loeffler, Kelly McSally, Martha 7 Georgia Arizona GA AZ 0 2 0.904293903291947 * 01/06/2020 01/03/2015 01/20/2021 01/03/2019 1.04109589041096 1 1 116 N/A N/A * https://twitter.com/KLoeffler https://twitter.com/MarthaMcSallyAZ KLoeffler MarthaMcSallyAZ https://twitter.com/senatorloeffler https://twitter.com/marthamcsally senatorloeffler marthamcsally 11/27/1970 03/22/1966 1 White 7 M.B.A.; Internationla Finance and Marketing; DePaul University Chicago; 1999 M.P.P.; John F. Kennedy School of Government 1 3 N/A https://bioguide.congress.gov/search/bio/L000594 https://bioguide.congress.gov/search/bio/M001197 Appointed in 2019 after the resignation of Johnny Isakson but lost the 2020 election (left office Dec 2 2020) appointed in 2018 after death of John McCain but lot 2020 election
9 McSally, Martha Perdue, David 8 Arizona Georgia AZ GA 0 2 * 0.914979462126755 01/03/2015 01/06/2015 01/03/2019 01/03/2021 1 5.9972602739726 1 116 N/A 53 N/A 45.1 * 2014 https://twitter.com/MarthaMcSallyAZ https://twitter.com/DavidPerdueGA MarthaMcSallyAZ DavidPerdueGA https://twitter.com/marthamcsally https://twitter.com/sendavidperdue marthamcsally sendavidperdue 03/22/1966 12/10/1949 1 0 White 7 M.P.P.; John F. Kennedy School of Government M.S.; Georgia Institute of Technology; 1976 3 1 N/A https://bioguide.congress.gov/search/bio/M001197 https://bioguide.congress.gov/search/bio/P000612 (left office Dec 2 2020) appointed in 2018 after death of John McCain but lot 2020 election
10 Perdue, David Roberts, Charles Patrick 9 Georgia Kansas GA KS 0 2 0.914979462126755 0.822995787870405 01/06/2015 01/07/1997 01/03/2021 5.9972602739726 24.0054794520548 1 116 53 53.3 45.1 42.5 2014 https://twitter.com/DavidPerdueGA https://twitter.com/SenPatRoberts DavidPerdueGA SenPatRoberts https://twitter.com/sendavidperdue https://twitter.com/PatRoberts sendavidperdue PatRoberts 12/10/1949 04/20/1936 0 White 7 6 M.S.; Georgia Institute of Technology; 1976 B.A.; Kansas State university, Manhattan; 1958 1 7 N/A https://bioguide.congress.gov/search/bio/P000612 https://bioguide.congress.gov/search/bio/R000307
11 Roberts, Charles Patrick Udall, Tom 10 Kansas New Mexico KS NM 0 1 2 0.822995787870405 0.259828450248573 01/07/1997 01/06/2009 01/03/2021 24.0054794520548 12 1 116 53.3 55.4 42.5 44.6 2014 https://twitter.com/SenPatRoberts https://twitter.com/SenatorTomUdall SenPatRoberts SenatorTomUdall https://twitter.com/PatRoberts https://twitter.com/tomudall PatRoberts tomudall 04/20/1936 05/18/1948 0 White 6 8 B.A.; Kansas State university, Manhattan; 1958 J.D.; University of New Mexico School of Law, Albuquerque, N.M.; 1977 7 2 N/A https://bioguide.congress.gov/search/bio/R000307 https://bioguide.congress.gov/search/bio/U000039
12 Udall, Tom Baldwin, Tammy 11 New Mexico Wisconsin NM WI 1 2 1 0.259828450248573 0.176999238019796 01/06/2009 01/03/2013 01/03/2021 12/31/2022 12 9.9972602739726 1 0 116 117 55.4 44.6 2014 2018 https://twitter.com/SenatorTomUdall https://twitter.com/SenatorBaldwin SenatorTomUdall SenatorBaldwin https://twitter.com/tomudall https://twitter.com/tammybaldwin tomudall tammybaldwin 05/18/1948 02/11/1962 0 1 White 8 J.D.; University of New Mexico School of Law, Albuquerque, N.M.; 1977 J.D.; University of Wisconsin, Madison; 1989 2 N/A https://www.baldwin.senate.gov/ https://bioguide.congress.gov/search/bio/U000039 https://bioguide.congress.gov/search/bio/B001230
13 Baldwin, Tammy Barrasso, John 12 Wisconsin Wyoming WI WY 1 0 1 0.176999238019796 0.817902617377421 01/03/2013 06/22/2007 12/31/2022 9.9972602739726 15.5369863013699 0 117 55.4 67.1 44.6 30.1 2018 https://twitter.com/SenatorBaldwin https://twitter.com/SenJohnBarrasso SenatorBaldwin SenJohnBarrasso https://twitter.com/tammybaldwin https://twitter.com/barrassoforwyo tammybaldwin barrassoforwyo 02/11/1962 07/21/1952 1 0 White 8 7 J.D.; University of Wisconsin, Madison; 1989 M.D.; Georgetown University School of Medicine; 1978 2 6 https://www.baldwin.senate.gov/ https://www.barrasso.senate.gov/ https://bioguide.congress.gov/search/bio/B001230 https://bioguide.congress.gov/search/bio/B001261
14 Barrasso, John Bennet, Michael F. 13 Wyoming Colorado WY CO 0 1 1 3 0.817902617377421 0.248044568735702 06/22/2007 01/21/2009 12/31/2022 15.5369863013699 13.9506849315069 0 117 67.1 49.1 30.1 45.4 2018 2016 https://twitter.com/SenJohnBarrasso https://twitter.com/SenatorBennet SenJohnBarrasso SenatorBennet https://twitter.com/barrassoforwyo https://twitter.com/michaelbennet barrassoforwyo michaelbennet 07/21/1952 11/28/1964 0 White 7 8 M.D.; Georgetown University School of Medicine; 1978 J.D.; Yale Law School; 1993 6 2 https://www.barrasso.senate.gov/ https://www.bennet.senate.gov/ https://bioguide.congress.gov/search/bio/B001261 https://bioguide.congress.gov/search/bio/B001267
15 Bennet, Michael F. Blackburn, Marsha 14 Colorado Tennessee CO TN 1 0 3 1 0.248044568735702 0.93228239890635 01/21/2009 01/03/2019 12/31/2022 13.9506849315069 3.99452054794521 0 117 49.1 54.7 45.4 43.9 2016 2018 https://twitter.com/SenatorBennet https://twitter.com/MarshaBlackburn SenatorBennet MarshaBlackburn https://twitter.com/michaelbennet N/A michaelbennet N/A 11/28/1964 06/06/1952 0 1 White 8 6 J.D.; Yale Law School; 1993 B.S.; Home Economics; Mississippi State University, Starkville; 1973 2 1 https://www.bennet.senate.gov/ https://www.blackburn.senate.gov/ https://bioguide.congress.gov/search/bio/B001267 https://bioguide.congress.gov/search/bio/B001243
16 Blackburn, Marsha Blumenthal, Richard 15 Tennessee Connecticut TN CT 0 1 1 3 0.93228239890635 0.0310655954121906 01/03/2019 01/03/2010 12/31/2022 3.99452054794521 13 0 117 54.7 62.9 43.9 34.9 2018 2016 https://twitter.com/MarshaBlackburn https://twitter.com/SenBlumenthal MarshaBlackburn SenBlumenthal N/A N/A 06/06/1952 02/13/1946 1 0 White 6 8 B.S.; Home Economics; Mississippi State University, Starkville; 1973 J.D.; Yale University; 1973 1 2 https://www.blackburn.senate.gov/ https://www.blumenthal.senate.gov/ https://bioguide.congress.gov/search/bio/B001243 https://bioguide.congress.gov/search/bio/B001277
17 Blumenthal, Richard Blunt, Roy 16 Connecticut Missouri CT MO 1 0 3 0.0310655954121906 0.584409139223541 01/03/2010 01/03/2011 12/31/2022 13 12 0 1 117 62.9 49.4 34.9 46.2 2016 https://twitter.com/SenBlumenthal https://twitter.com/RoyBlunt SenBlumenthal RoyBlunt N/A N/A 02/13/1946 01/10/1950 0 White 8 7 J.D.; Yale University; 1973 M.A.; Missouri State University ,Springfield; 1972 2 5 https://www.blumenthal.senate.gov/ N/A https://bioguide.congress.gov/search/bio/B001277 https://bioguide.congress.gov/search/bio/B000575
18 Blunt, Roy Booker, Cory A. 17 Missouri New Jersey MO NJ 0 1 3 2 0.584409139223541 0.0455802980872292 01/03/2011 10/31/2013 12/31/2022 12 1 0 117 49.4 57.2 46.2 40.9 2016 2020 https://twitter.com/RoyBlunt https://twitter.com/senbooker RoyBlunt senbooker N/A https://twitter.com/CoryBooker N/A CoryBooker 01/10/1950 04/27/1969 0 White African-American; Asian-American 7 8 M.A.; Missouri State University ,Springfield; 1972 J.D.; Yale Law School; 1997 5 2 N/A https://www.booker.senate.gov/ https://bioguide.congress.gov/search/bio/B000575 https://bioguide.congress.gov/search/bio/B001288
19 Booker, Cory A. Boozman, John 18 New Jersey Arkansas NJ AR 1 0 2 3 0.0455802980872292 0.768699282926499 10/31/2013 01/05/2011 12/31/2022 12 11.9945205479452 0 117 57.2 59.8 40.9 36.2 2020 2016 https://twitter.com/senbooker https://twitter.com/JohnBoozman senbooker JohnBoozman https://twitter.com/CoryBooker N/A CoryBooker N/A 04/27/1969 12/10/1950 0 African-American; Asian-American White 8 6 J.D.; Yale Law School; 1997 Southern College of Optometry; 1977 2 6 https://www.booker.senate.gov/ https://www.boozman.senate.gov/ https://bioguide.congress.gov/search/bio/B001288 https://bioguide.congress.gov/search/bio/B001236
20 Boozman, John Braun, Michael 19 Arkansas Indiana AR IN 0 3 1 0.768699282926499 0.98106874319906 01/05/2011 01/03/2019 12/31/2022 11.9945205479452 3.99452054794521 0 117 59.8 50.9 36.2 45 2016 2018 https://twitter.com/JohnBoozman https://twitter.com/SenatorBraun JohnBoozman SenatorBraun N/A N/A 12/10/1950 03/24/1954 0 White 6 7 Southern College of Optometry; 1977 M.B.A.; Harvard Business School; 1978 6 1 https://www.boozman.senate.gov/ https://www.braun.senate.gov/ https://bioguide.congress.gov/search/bio/B001236 https://bioguide.congress.gov/search/bio/B001310
21 Braun, Michael Brown, Sherrod 20 Indiana Ohio IN OH 0 1 1 0.98106874319906 0.0923940264109351 01/03/2019 01/04/2007 12/31/2022 3.99452054794521 16 0 117 50.9 53.4 45 46.6 2018 https://twitter.com/SenatorBraun https://twitter.com/SenSherrodBrown SenatorBraun SenSherrodBrown N/A https://twitter.com/SherrodBrown N/A SherrodBrown 03/24/1954 11/09/1952 0 White 7 M.B.A.; Harvard Business School; 1978 M.a.; Education; Ohio State University; 1981 1 5 https://www.braun.senate.gov/ https://www.brown.senate.gov/ https://bioguide.congress.gov/search/bio/B001310 https://bioguide.congress.gov/search/bio/B000944
22 Brown, Sherrod Burr, Richard 21 Ohio North Carolina OH NC 1 0 1 3 0.0923940264109351 0.605472891780936 01/04/2007 01/03/2001 12/31/2022 16 22.0054794520548 0 1 117 53.4 51.1 46.6 45.3 2018 2016 https://twitter.com/SenSherrodBrown https://twitter.com/SenatorBurr SenSherrodBrown SenatorBurr https://twitter.com/SherrodBrown N/A SherrodBrown N/A 11/09/1952 11/30/1955 0 White 7 6 M.a.; Education; Ohio State University; 1981 B.A.; Communications; Wake Forest University; 1978 5 1 https://www.brown.senate.gov/ N/A https://bioguide.congress.gov/search/bio/B000944 https://bioguide.congress.gov/search/bio/B001135
23 Burr, Richard Cantwell, Maria 22 North Carolina Washington NC WA 0 1 3 1 0.605472891780936 0.216591445478212 01/03/2001 12/31/2022 22.0054794520548 1 0 117 51.1 58.4 45.3 41.6 2016 2018 https://twitter.com/SenatorBurr https://twitter.com/SenatorCantwell SenatorBurr SenatorCantwell N/A N/A 11/30/1955 10/13/1958 0 1 White 6 B.A.; Communications; Wake Forest University; 1978 B.A.; Public Administration; Miami University of Ohio; 1980 1 N/A https://www.cantwell.senate.gov/ https://bioguide.congress.gov/search/bio/B001135 https://bioguide.congress.gov/search/bio/C000127
24 Cantwell, Maria Capito, Shelley Moore 23 Washington West Virginia WA WV 1 0 1 2 0.216591445478212 0.61478303011512 01/03/2001 01/06/2015 12/31/2022 22.0054794520548 7.98904109589041 0 117 58.4 70.3 41.6 27 2018 2020 https://twitter.com/SenatorCantwell https://twitter.com/SenCapito SenatorCantwell SenCapito N/A N/A 10/13/1958 11/26/1953 1 White 6 7 B.A.; Public Administration; Miami University of Ohio; 1980 M. Ed.; University of Virginia; 1976 1 5 https://www.cantwell.senate.gov/ https://www.capito.senate.gov/ https://bioguide.congress.gov/search/bio/C000127 https://bioguide.congress.gov/search/bio/C001047
25 Capito, Shelley Moore Cardin, Benjamin L. 24 West Virginia Maryland WV MD 0 1 2 1 0.61478303011512 0.1994990268606 01/06/2015 01/04/2007 12/31/2022 7.98904109589041 16 0 117 70.3 64.9 27 30.3 2020 2018 https://twitter.com/SenCapito https://twitter.com/SenatorCardin SenCapito SenatorCardin N/A N/A 11/26/1953 10/05/1943 1 0 White 7 8 M. Ed.; University of Virginia; 1976 J.D.; University of Maryland; 1967 5 2 https://www.capito.senate.gov/ https://www.cardin.senate.gov/ https://bioguide.congress.gov/search/bio/C001047 https://bioguide.congress.gov/search/bio/C000141
26 Cardin, Benjamin L. Carper, Thomas R. 25 Maryland Delaware MD DE 1 1 0.1994990268606 0.309479384969288 01/04/2007 01/03/2001 12/31/2022 16 22.0054794520548 0 117 64.9 60 30.3 37.8 2018 https://twitter.com/SenatorCardin https://twitter.com/SenatorCarper SenatorCardin SenatorCarper N/A N/A 10/05/1943 01/23/1947 0 White 8 7 J.D.; University of Maryland; 1967 M.B.A.; University of Delaware; 1975 2 3 https://www.cardin.senate.gov/ https://www.carper.senate.gov/ https://bioguide.congress.gov/search/bio/C000141 https://bioguide.congress.gov/search/bio/C000174
27 Carper, Thomas R. Casey, Robert P., Jr. 26 Delaware Pennsylvania DE PA 1 1 0.309479384969288 0.171897216341815 01/03/2001 01/04/2007 12/31/2022 22.0054794520548 16 0 117 60 55.7 37.8 42.6 2018 https://twitter.com/SenatorCarper https://twitter.com/SenBobCasey SenatorCarper SenBobCasey N/A https://twitter.com/Bob_Casey N/A Bob_Casey 01/23/1947 04/13/1960 0 White 7 8 M.B.A.; University of Delaware; 1975 J.D.; Catholic University of America; 1988 3 2 https://www.carper.senate.gov/ https://www.casey.senate.gov/ https://bioguide.congress.gov/search/bio/C000174 https://bioguide.congress.gov/search/bio/C001070
28 Casey, Robert P., Jr. Cassidy, Bill 27 Pennsylvania Louisiana PA LA 1 0 1 2 0.171897216341815 0.682348710788942 01/04/2007 01/06/2015 12/31/2022 16 7.98904109589041 0 117 55.7 59.3 42.6 19 2018 2020 https://twitter.com/SenBobCasey https://twitter.com/SenBillCassidy SenBobCasey SenBillCassidy https://twitter.com/Bob_Casey https://twitter.com/BillCassidy Bob_Casey BillCassidy 04/13/1960 09/28/1957 0 White 8 7 J.D.; Catholic University of America; 1988 M.D.; Louisiana State University; 1979 2 6 https://www.casey.senate.gov/ https://www.cassidy.senate.gov/ https://bioguide.congress.gov/search/bio/C001070 https://bioguide.congress.gov/search/bio/C001075
29 Cassidy, Bill Collins, Susan M. 28 Louisiana Maine LA ME 0 2 0.682348710788942 0.448622425849401 01/06/2015 01/07/1997 12/31/2022 7.98904109589041 25.9972602739726 0 117 59.3 51 19 42.4 2020 https://twitter.com/SenBillCassidy https://twitter.com/SenatorCollins SenBillCassidy SenatorCollins https://twitter.com/BillCassidy N/A BillCassidy N/A 09/28/1957 12/07/1952 0 1 White 7 6 M.D.; Louisiana State University; 1979 Bachelor in Government; St. Lawrence University; 1975 6 0 https://www.cassidy.senate.gov/ https://www.collins.senate.gov/ https://bioguide.congress.gov/search/bio/C001075 https://bioguide.congress.gov/search/bio/C001035
30 Collins, Susan M. Coons, Christopher A. 29 Maine Delaware ME DE 0 1 2 0.448622425849401 0.338422715351401 01/07/1997 11/15/2010 12/31/2022 25.9972602739726 12.1342465753425 0 117 51 59.4 42.4 37.9 2020 https://twitter.com/SenatorCollins https://twitter.com/ChrisCoons SenatorCollins ChrisCoons N/A N/A 12/07/1952 09/09/1963 1 0 White 6 8 Bachelor in Government; St. Lawrence University; 1975 J.D.; Yale Law School; 1992 0 2 https://www.collins.senate.gov/ https://www.coons.senate.gov/ https://bioguide.congress.gov/search/bio/C001035 https://bioguide.congress.gov/search/bio/C001088
31 Coons, Christopher A. Cornyn, John 30 Delaware Texas DE TX 1 0 2 0.338422715351401 0.772226738391321 11/15/2010 11/30/2002 12/31/2022 12.1342465753425 20.0986301369863 0 117 59.4 53.5 37.9 43.9 2020 https://twitter.com/ChrisCoons https://twitter.com/JohnCornyn ChrisCoons JohnCornyn N/A N/A 09/09/1963 02/02/1952 0 White 8 J.D.; Yale Law School; 1992 J.D.; St. Mary’s School of Law; 1977 2 https://www.coons.senate.gov/ https://www.cornyn.senate.gov/ https://bioguide.congress.gov/search/bio/C001088 https://bioguide.congress.gov/search/bio/C001056
32 Cornyn, John Cortez Masto, Catherine 31 Texas Nevada TX NV 0 1 2 3 0.772226738391321 0.236574567369409 11/30/2002 01/03/2017 12/31/2022 20.0986301369863 5.99452054794521 0 117 53.5 47.1 43.9 44.7 2020 2016 https://twitter.com/JohnCornyn https://twitter.com/SenCortezMasto JohnCornyn SenCortezMasto N/A https://twitter.com/CortezMasto N/A CortezMasto 02/02/1952 03/29/1964 0 1 White Hispanic; White 8 J.D.; St. Mary�s School of Law; 1977 J.D.; Gonzaga University School of Law; 1990 2 https://www.cornyn.senate.gov/ https://www.cortezmasto.senate.gov/ https://bioguide.congress.gov/search/bio/C001056 https://bioguide.congress.gov/search/bio/C001113
33 Cortez Masto, Catherine Cotton, Tom 32 Nevada Arkansas NV AR 1 0 3 2 0.236574567369409 0.876390364042756 01/03/2017 01/06/2015 12/31/2022 5.99452054794521 7.98904109589041 0 117 47.1 66.5 44.7 33.5 2016 2020 https://twitter.com/SenCortezMasto https://twitter.com/SenTomCotton SenCortezMasto SenTomCotton https://twitter.com/CortezMasto https://twitter.com/TomCottonAR CortezMasto TomCottonAR 03/29/1964 05/13/1977 1 0 Hispanic; White White 8 J.D.; Gonzaga University School of Law; 1990 J.D.; Harvard University; 2002 2 https://www.cortezmasto.senate.gov/ https://www.cotton.senate.gov/ https://bioguide.congress.gov/search/bio/C001113 https://bioguide.congress.gov/search/bio/C001095
34 Cotton, Tom Cramer, Kevin 33 Arkansas North Dakota AR ND 0 2 1 0.876390364042756 0.910896298032277 01/06/2015 01/03/2019 12/31/2022 7.98904109589041 3.99452054794521 0 117 66.5 55.5 33.5 44.5 2020 2018 https://twitter.com/SenTomCotton https://twitter.com/SenKevinCramer SenTomCotton SenKevinCramer https://twitter.com/TomCottonAR https://twitter.com/kevincramer TomCottonAR kevincramer 05/13/1977 01/21/1961 0 White 8 7 J.D.; Harvard University; 2002 M.A.; Management; University o fMary; 2003 2 0 https://www.cotton.senate.gov/ https://www.cramer.senate.gov/ https://bioguide.congress.gov/search/bio/C001095 https://bioguide.congress.gov/search/bio/C001096
35 Cramer, Kevin Crapo, Michael 34 North Dakota Idaho ND ID 0 1 3 0.910896298032277 0.823331951918519 01/03/2019 01/06/1999 12/31/2022 3.99452054794521 24 0 117 55.5 66.1 44.5 27.8 2018 2016 https://twitter.com/SenKevinCramer https://twitter.com/MikeCrapo SenKevinCramer MikeCrapo https://twitter.com/kevincramer N/A kevincramer N/A 01/21/1961 05/20/1951 0 White 7 8 M.A.; Management; University o fMary; 2003 J.D.; Harvard University; 1977 0 2 https://www.cramer.senate.gov/ https://www.crapo.senate.gov/ https://bioguide.congress.gov/search/bio/C001096 https://bioguide.congress.gov/search/bio/C000880
36 Crapo, Michael Cruz, Ted 35 Idaho Texas ID TX 0 3 1 0.823331951918519 0.944056385174951 01/06/1999 01/03/2013 12/31/2022 24 9.9972602739726 0 117 66.1 50.9 27.8 48.3 2016 2018 https://twitter.com/MikeCrapo https://twitter.com/SenTedCruz MikeCrapo SenTedCruz N/A https://twitter.com/tedcruz N/A tedcruz 05/20/1951 12/22/1970 0 White Hispanic; White 8 J.D.; Harvard University; 1977 J.D.; Harvard University; 1995 2 https://www.crapo.senate.gov/ https://www.cruz.senate.gov/ https://bioguide.congress.gov/search/bio/C000880 https://bioguide.congress.gov/search/bio/C001098
37 Cruz, Ted Daines, Steve 36 Texas Montana TX MT 0 1 2 0.944056385174951 0.859322244752884 01/03/2013 01/06/2015 12/31/2022 9.9972602739726 7.98904109589041 0 117 50.9 55 48.3 45 2018 2020 https://twitter.com/SenTedCruz https://twitter.com/SteveDaines SenTedCruz SteveDaines https://twitter.com/tedcruz N/A tedcruz N/A 12/22/1970 08/20/1962 0 Hispanic; White White 8 6 J.D.; Harvard University; 1995 B.S.; Chemical Engineering; Montana State University; 1984 2 1 https://www.cruz.senate.gov/ https://www.daines.senate.gov/ https://bioguide.congress.gov/search/bio/C001098 https://bioguide.congress.gov/search/bio/D000618
38 Daines, Steve Duckworth, Tammy 37 Montana Illinois MT IL 0 1 2 3 0.859322244752884 0.0944404184553066 01/06/2015 01/03/2017 12/31/2022 7.98904109589041 5.99452054794521 0 117 55 54.4 45 40.2 2020 2016 https://twitter.com/SteveDaines https://twitter.com/SenDuckworth SteveDaines SenDuckworth N/A https://twitter.com/tammyduckworth N/A tammyduckworth 08/20/1962 03/12/1968 0 1 White Asian; White 6 8 B.S.; Chemical Engineering; Montana State University; 1984 PhD in human services; Capella University School of Public Service Leadership; 2015 1 3 https://www.daines.senate.gov/ https://www.duckworth.senate.gov/ https://bioguide.congress.gov/search/bio/D000618 https://bioguide.congress.gov/search/bio/D000622
39 Duckworth, Tammy Durbin, Richard J. 38 Illinois IL 1 3 2 0.0944404184553066 0.0855733771029607 01/03/2017 01/07/1997 12/31/2022 5.99452054794521 25.9972602739726 0 117 54.4 54.9 40.2 38.9 2016 2020 https://twitter.com/SenDuckworth https://twitter.com/SenatorDurbin SenDuckworth SenatorDurbin https://twitter.com/tammyduckworth https://twitter.com/DickDurbin tammyduckworth DickDurbin 03/12/1968 11/21/1944 1 0 Asian; White White 8 PhD in human services; Capella University School of Public Service Leadership; 2015 J.D.; Georgetown University; 1969 3 2 https://www.duckworth.senate.gov/ https://www.durbin.senate.gov/ https://bioguide.congress.gov/search/bio/D000622 https://bioguide.congress.gov/search/bio/D000563
40 Durbin, Richard J. Ernst, Joni 39 Illinois Iowa IL IA 1 0 2 0.0855733771029607 0.826265400967212 01/07/1997 01/06/2015 12/31/2022 25.9972602739726 7.98904109589041 0 117 54.9 51.8 38.9 45.2 2020 https://twitter.com/SenatorDurbin https://twitter.com/SenJoniErnst SenatorDurbin SenJoniErnst https://twitter.com/DickDurbin https://twitter.com/joniernst DickDurbin joniernst 11/21/1944 07/01/1970 0 1 White 8 7 J.D.; Georgetown University; 1969 M.P.A.; Columbus State University; 1995 2 3 https://www.durbin.senate.gov/ https://www.ernst.senate.gov/ https://bioguide.congress.gov/search/bio/D000563 https://bioguide.congress.gov/search/bio/E000295
41 Ernst, Joni Feinstein, Dianne 40 Iowa California IA CA 0 1 2 1 0.826265400967212 0.150865658191444 01/06/2015 11/10/1992 12/31/2022 7.98904109589041 30.158904109589 0 117 51.8 54.2 45.2 45.8 2020 2018 https://twitter.com/SenJoniErnst https://twitter.com/SenFeinstein SenJoniErnst SenFeinstein https://twitter.com/joniernst https://twitter.com/DianneFeinstein joniernst DianneFeinstein 07/01/1970 06/22/1933 1 White 7 6 M.P.A.; Columbus State University; 1995 B.A.; History; Stanford University; 1955 3 0 https://www.ernst.senate.gov/ https://www.feinstein.senate.gov/public/ https://bioguide.congress.gov/search/bio/E000295 https://bioguide.congress.gov/search/bio/F000062
42 Feinstein, Dianne Fischer, Debra 41 California Nebraska CA NE 1 0 1 0.150865658191444 0.688576408222131 11/10/1992 01/03/2013 12/31/2022 30.158904109589 9.9972602739726 0 117 54.2 57.7 45.8 38.6 2018 https://twitter.com/SenFeinstein https://twitter.com/SenatorFischer SenFeinstein SenatorFischer https://twitter.com/DianneFeinstein N/A DianneFeinstein N/A 06/22/1933 03/01/1951 1 White 6 B.A.; History; Stanford University; 1955 B.S.; Education; University of Nebraska; 1988 0 https://www.feinstein.senate.gov/public/ https://www.fischer.senate.gov/ https://bioguide.congress.gov/search/bio/F000062 https://bioguide.congress.gov/search/bio/F000463
43 Fischer, Debra Gillibrand, Kirsten E. 42 Nebraska New York NE NY 0 1 1 0.688576408222131 0.12072202063417 01/03/2013 01/27/2009 12/31/2022 9.9972602739726 13.9342465753425 0 117 57.7 67 38.6 33 2018 https://twitter.com/SenatorFischer https://twitter.com/SenGillibrand SenatorFischer SenGillibrand N/A https://twitter.com/gillibrandny N/A gillibrandny 03/01/1951 12/09/1966 1 White 6 8 B.S.; Education; University of Nebraska; 1988 J.D.; University of California; 1991 0 2 https://www.fischer.senate.gov/ https://www.gillibrand.senate.gov/ https://bioguide.congress.gov/search/bio/F000463 https://bioguide.congress.gov/search/bio/G000555
44 Gillibrand, Kirsten E. Graham, Lindsey 43 New York South Carolina NY SC 1 0 1 2 0.12072202063417 0.619070797359753 01/27/2009 01/07/2003 12/31/2022 13.9342465753425 19.9945205479452 0 117 67 54.5 33 44.2 2018 2020 https://twitter.com/SenGillibrand https://twitter.com/LindseyGrahamSC SenGillibrand LindseyGrahamSC https://twitter.com/gillibrandny https://twitter.com/grahamblog gillibrandny grahamblog 12/09/1966 07/09/1955 1 0 White 8 J.D.; University of California; 1991 J.D.; University of South Carolina; 1981 2 https://www.gillibrand.senate.gov/ https://www.lgraham.senate.gov/ https://bioguide.congress.gov/search/bio/G000555 https://bioguide.congress.gov/search/bio/G000359
45 Graham, Lindsey Grassley, Chuck 44 South Carolina Iowa SC IA 0 2 3 0.619070797359753 0.670073592619545 01/07/2003 01/05/1981 12/31/2022 19.9945205479452 42.013698630137 0 117 54.5 60.2 44.2 35.7 2020 2016 https://twitter.com/LindseyGrahamSC https://twitter.com/ChuckGrassley LindseyGrahamSC ChuckGrassley https://twitter.com/grahamblog N/A grahamblog N/A 07/09/1955 09/17/1933 0 White 8 7 J.D.; University of South Carolina; 1981 M.A.; Political Science; University of Northern Iowa; 1956 2 0 https://www.lgraham.senate.gov/ https://www.grassley.senate.gov/ https://bioguide.congress.gov/search/bio/G000359 https://bioguide.congress.gov/search/bio/G000386
46 Grassley, Chuck Hagerty, Bill 45 Iowa Tennessee IA TN 0 3 2 0.670073592619545 0.857410027434407 01/05/1981 01/03/2021 12/31/2022 42.013698630137 1.99178082191781 0 117 60.2 62.2 35.7 35.2 2016 2020 https://twitter.com/ChuckGrassley https://twitter.com/SenatorHagerty ChuckGrassley SenatorHagerty N/A https://twitter.com/billhagertytn N/A billhagertytn 09/17/1933 08/14/1959 0 White 7 8 M.A.; Political Science; University of Northern Iowa; 1956 J.D.; Vanderbilt Law School; 1984 0 https://www.grassley.senate.gov/ https://www.hagerty.senate.gov/ https://bioguide.congress.gov/search/bio/G000386 https://bioguide.congress.gov/search/bio/H000601
47 Hagerty, Bill Hassan, Margaret Wood 46 Tennessee New Hampshire TN NH 0 1 2 3 0.857410027434407 0.43611907238278 01/03/2021 01/03/2017 12/31/2022 1.99178082191781 5.99452054794521 0 117 62.2 48 35.2 47.9 2020 2016 https://twitter.com/SenatorHagerty https://twitter.com/SenatorHassan SenatorHagerty SenatorHassan https://twitter.com/billhagertytn https://twitter.com/Maggie_Hassan billhagertytn Maggie_Hassan 08/14/1959 02/27/1958 0 1 White 8 J.D.; Vanderbilt Law School; 1984 J.D.; Northeastern University School of law; 1985 0 11 https://www.hagerty.senate.gov/ https://www.hassan.senate.gov/ https://bioguide.congress.gov/search/bio/H000601 https://bioguide.congress.gov/search/bio/H001076
48 Hassan, Margaret Wood Hawley, Josh 47 New Hampshire Missouri NH MO 1 0 3 1 0.43611907238278 0.864366195602263 01/03/2017 01/03/2019 12/31/2022 5.99452054794521 3.99452054794521 0 117 48 51.4 47.9 45.6 2016 2018 https://twitter.com/SenatorHassan https://twitter.com/HawleyMO SenatorHassan HawleyMO https://twitter.com/Maggie_Hassan N/A Maggie_Hassan N/A 02/27/1958 12/31/1979 1 0 White 8 J.D.; Northeastern University School of law; 1985 J.D.; Yale Law School; 2006 11 2 https://www.hassan.senate.gov/ https://www.hawley.senate.gov/ https://bioguide.congress.gov/search/bio/H001076 https://bioguide.congress.gov/search/bio/H001089
49 Hawley, Josh Heinrich, Martin 48 Missouri New Mexico MO NM 0 1 1 0.864366195602263 0.2007037353465 01/03/2019 01/03/2013 12/31/2022 3.99452054794521 9.9972602739726 0 117 51.4 54.1 45.6 30.5 2018 https://twitter.com/HawleyMO https://twitter.com/MartinHeinrich HawleyMO MartinHeinrich N/A https://twitter.com/senatorheinrich N/A senatorheinrich 12/31/1979 10/17/1971 0 White 8 6 J.D.; Yale Law School; 2006 B.S.; Mechanical Engineering; University of Missouri; 1995 2 12 https://www.hawley.senate.gov/ https://www.heinrich.senate.gov/ https://bioguide.congress.gov/search/bio/H001089 https://bioguide.congress.gov/search/bio/H001046
50 Heinrich, Martin Hickenlooper, John W. 49 New Mexico Colorado NM CO 1 1 2 0.2007037353465 0.335030323955882 01/03/2013 01/03/2021 12/31/2022 9.9972602739726 1.99178082191781 0 117 54.1 53.5 30.5 44.2 2018 2020 https://twitter.com/MartinHeinrich https://twitter.com/SenatorHick MartinHeinrich SenatorHick N/A https://twitter.com/hickenlooper N/A hickenlooper 10/17/1971 02/07/1952 0 White 6 7 B.S.; Mechanical Engineering; University of Missouri; 1995 M.A.; Geology; Wesleyan University; 1980 12 0 https://www.heinrich.senate.gov/ https://www.hickenlooper.senate.gov/ https://bioguide.congress.gov/search/bio/H001046 https://bioguide.congress.gov/search/bio/H000273
51 Hickenlooper, John W. Hirono, Mazie K. 50 Colorado Hawaii CO HI 1 2 1 0.335030323955882 0.0715447123166643 01/03/2021 01/03/2013 12/31/2022 1.99178082191781 9.9972602739726 0 117 53.5 71.2 44.2 28.8 2020 2018 https://twitter.com/SenatorHick https://twitter.com/maziehirono SenatorHick maziehirono https://twitter.com/hickenlooper https://twitter.com/mazieforhawaii hickenlooper mazieforhawaii 02/07/1952 11/03/1947 0 1 White Asian 7 8 M.A.; Geology; Wesleyan University; 1980 J.D.; Georgetown University; 1978 0 https://www.hickenlooper.senate.gov/ https://www.hirono.senate.gov/ https://bioguide.congress.gov/search/bio/H000273 https://bioguide.congress.gov/search/bio/H001042
52 Hirono, Mazie K. Hoeven, John 51 Hawaii North Dakota HI ND 1 0 1 3 0.0715447123166643 0.815683863264003 01/03/2013 01/05/2011 12/31/2022 9.9972602739726 11.9945205479452 0 117 71.2 78.6 28.8 17 2018 2016 https://twitter.com/maziehirono https://twitter.com/SenJohnHoeven maziehirono SenJohnHoeven https://twitter.com/mazieforhawaii N/A mazieforhawaii N/A 11/03/1947 03/13/1957 1 0 Asian White 8 7 J.D.; Georgetown University; 1978 M.B.A.; Northwestern University; 1981 0 12 https://www.hirono.senate.gov/ https://www.hoeven.senate.gov/ https://bioguide.congress.gov/search/bio/H001042 https://bioguide.congress.gov/search/bio/H001061
53 Hoeven, John Hyde-Smith, Cindy 52 North Dakota Mississippi ND MS 0 3 2 0.815683863264003 0.868059764299163 01/05/2011 04/09/2018 12/31/2022 11.9945205479452 4.73150684931507 0 117 78.6 54.1 17 44.1 2016 2020 https://twitter.com/SenJohnHoeven https://twitter.com/SenHydeSmith SenJohnHoeven SenHydeSmith N/A https://twitter.com/cindyhydesmith N/A cindyhydesmith 03/13/1957 05/10/1959 0 1 White 7 6 M.B.A.; Northwestern University; 1981 B.A.; Criminal justice, political science; University of Southern Mississippi; 1981 12 0 https://www.hoeven.senate.gov/ https://www.hydesmith.senate.gov/ https://bioguide.congress.gov/search/bio/H001061 https://bioguide.congress.gov/search/bio/H001079
54 Hyde-Smith, Cindy Inhofe, James 53 Mississippi Oklahoma MS OK 0 2 0.868059764299163 0.880238318204784 04/09/2018 11/17/1994 12/31/2022 4.73150684931507 28.1397260273973 0 1 117 54.1 62.9 44.1 32.8 2020 https://twitter.com/SenHydeSmith https://twitter.com/JimInhofe SenHydeSmith JimInhofe https://twitter.com/cindyhydesmith N/A cindyhydesmith N/A 05/10/1959 11/17/1934 1 0 White 6 B.A.; Criminal justice, political science; University of Southern Mississippi; 1981 B.A.; Economics; University of Tulsa; 1973 0 https://www.hydesmith.senate.gov/ N/A https://bioguide.congress.gov/search/bio/H001079 https://bioguide.congress.gov/search/bio/I000024
55 Inhofe, James Johnson, Ron 54 Oklahoma Wisconsin OK WI 0 2 3 0.880238318204784 0.743401705863958 11/17/1994 01/05/2011 12/31/2022 28.1397260273973 11.9945205479452 1 0 117 62.9 50.2 32.8 46.8 2020 2016 https://twitter.com/JimInhofe https://twitter.com/SenRonJohnson JimInhofe SenRonJohnson N/A https://twitter.com/ronjohnsonwi N/A ronjohnsonwi 11/17/1934 04/08/1955 0 White 6 B.A.; Economics; University of Tulsa; 1973 B.S.; Business and Accounting; University of Minnesota; 1977 0 4 N/A https://www.ronjohnson.senate.gov/ https://bioguide.congress.gov/search/bio/I000024 https://bioguide.congress.gov/search/bio/J000293
56 Johnson, Ron Kaine, Tim 55 Wisconsin Virginia WI VA 0 1 3 1 0.743401705863958 0.203600708089391 01/05/2011 01/03/2013 12/31/2022 11.9945205479452 9.9972602739726 0 117 50.2 57.1 46.8 41.1 2016 2018 https://twitter.com/SenRonJohnson https://twitter.com/timkaine SenRonJohnson timkaine https://twitter.com/ronjohnsonwi N/A ronjohnsonwi N/A 04/08/1955 02/26/1958 0 White 6 8 B.S.; Business and Accounting; University of Minnesota; 1977 J.D.; Harvard University; 1983 4 11 https://www.ronjohnson.senate.gov/ https://www.kaine.senate.gov/ https://bioguide.congress.gov/search/bio/J000293 https://bioguide.congress.gov/search/bio/K000384
57 Kaine, Tim Kelly, Mark 56 Virginia Arizona VA AZ 1 1 3 0.203600708089391 0.399793347847799 01/03/2013 12/02/2020 12/31/2022 9.9972602739726 2.07945205479452 0 117 57.1 51.2 41.1 48.8 2018 2020 https://twitter.com/timkaine https://twitter.com/SenMarkKelly timkaine SenMarkKelly N/A https://twitter.com/CaptMarkKelly N/A CaptMarkKelly 02/26/1958 02/21/1964 0 White 8 7 J.D.; Harvard University; 1983 M.S.; Aeronautical Engineering; U.S. Naval Postgraduate School 11 3 https://www.kaine.senate.gov/ https://www.kelly.senate.gov/ https://bioguide.congress.gov/search/bio/K000384 https://bioguide.congress.gov/search/bio/K000377
58 Kelly, Mark Kennedy, John Neely 57 Arizona Louisiana AZ LA 1 0 3 0.399793347847799 0.785684351248518 12/02/2020 01/03/2017 12/31/2022 2.07945205479452 5.99452054794521 0 117 51.2 60.7 48.8 39.3 2020 2016 https://twitter.com/SenMarkKelly https://twitter.com/SenJohnKennedy SenMarkKelly SenJohnKennedy https://twitter.com/CaptMarkKelly https://twitter.com/JohnKennedyLA CaptMarkKelly JohnKennedyLA 02/21/1964 11/21/1951 0 White 7 8 M.S.; Aeronautical Engineering; U.S. Naval Postgraduate School J.D.; University of Virginia School of LAw; 1977 3 11 https://www.kelly.senate.gov/ https://www.kennedy.senate.gov/ https://bioguide.congress.gov/search/bio/K000377 https://bioguide.congress.gov/search/bio/K000393
59 Kennedy, John Neely King, Angus S., Jr. 58 Louisiana Maine LA ME 0 2 3 1 0.785684351248518 0.346033257048853 01/03/2017 01/03/2013 12/31/2022 5.99452054794521 9.9972602739726 0 117 60.7 54.3 39.3 35.2 2016 2018 https://twitter.com/SenJohnKennedy https://twitter.com/SenAngusKing SenJohnKennedy SenAngusKing https://twitter.com/JohnKennedyLA N/A JohnKennedyLA N/A 11/21/1951 03/31/1944 0 White 8 J.D.; University of Virginia School of LAw; 1977 J.D.; University of Virginia; 1969 11 2 https://www.kennedy.senate.gov/ https://www.king.senate.gov/ https://bioguide.congress.gov/search/bio/K000393 https://bioguide.congress.gov/search/bio/K000383
60 King, Angus S., Jr. Klobuchar, Amy 59 Maine Minnesota ME MN 2 1 1 0.346033257048853 0.130504324943533 01/03/2013 01/04/2007 12/31/2022 9.9972602739726 16 0 117 54.3 60.3 35.2 36.2 2018 https://twitter.com/SenAngusKing https://twitter.com/SenAmyKlobuchar SenAngusKing SenAmyKlobuchar N/A https://twitter.com/amyklobuchar N/A amyklobuchar 03/31/1944 05/25/1960 0 1 White 8 J.D.; University of Virginia; 1969 J.D.; University of Chicago, 1985 2 https://www.king.senate.gov/ https://www.klobuchar.senate.gov/ https://bioguide.congress.gov/search/bio/K000383 https://bioguide.congress.gov/search/bio/K000367
61 Klobuchar, Amy Lankford, James 60 Minnesota Oklahoma MN OK 1 0 1 3 0.130504324943533 0.89992933687588 01/04/2007 01/03/2015 12/31/2022 16 7.9972602739726 0 117 60.3 67.7 36.2 24.6 2018 2016 https://twitter.com/SenAmyKlobuchar https://twitter.com/SenatorLankford SenAmyKlobuchar SenatorLankford https://twitter.com/amyklobuchar https://twitter.com/jameslankford amyklobuchar jameslankford 05/25/1960 03/04/1968 1 0 White 8 7 J.D.; University of Chicago, 1985 M.Div.; Southwestern Theological Baptist Seminary; 1994 2 5 https://www.klobuchar.senate.gov/ https://www.lankford.senate.gov/ https://bioguide.congress.gov/search/bio/K000367 https://bioguide.congress.gov/search/bio/L000575
62 Lankford, James Leahy, Patrick 61 Oklahoma Vermont OK VT 0 1 3 0.89992933687588 0.144121081911654 01/03/2015 01/14/1975 12/31/2022 7.9972602739726 47.9945205479452 0 1 117 67.7 61.3 24.6 33 2016 https://twitter.com/SenatorLankford https://twitter.com/SenatorLeahy SenatorLankford SenatorLeahy https://twitter.com/jameslankford N/A jameslankford N/A 03/04/1968 03/31/1940 0 White 7 8 M.Div.; Southwestern Theological Baptist Seminary; 1994 J.D.; Georgetown University; 1964 5 2 https://www.lankford.senate.gov/ N/A https://bioguide.congress.gov/search/bio/L000575 https://bioguide.congress.gov/search/bio/L000174
63 Leahy, Patrick Lee, Mike 62 Vermont Utah VT UT 1 0 3 0.144121081911654 0.753748787807473 01/14/1975 01/05/2011 12/31/2022 47.9945205479452 11.9945205479452 1 0 117 61.3 68 33 27.4 2016 https://twitter.com/SenatorLeahy https://twitter.com/SenMikeLee SenatorLeahy SenMikeLee N/A https://twitter.com/BasedMikeLee N/A BasedMikeLee 03/31/1940 06/04/1971 0 White 8 J.D.; Georgetown University; 1964 J.D.; Brigham Young university; 1997 2 N/A https://www.lee.senate.gov/ https://bioguide.congress.gov/search/bio/L000174 https://bioguide.congress.gov/search/bio/L000577
64 Lee, Mike Luján, Ben Ray 63 Utah New Mexico UT NM 0 1 3 2 0.753748787807473 0.174860888138848 01/05/2011 01/03/2021 12/31/2022 11.9945205479452 1.99178082191781 0 117 68 51.7 27.4 45.6 2016 2020 https://twitter.com/SenMikeLee https://twitter.com/SenatorLujan SenMikeLee SenatorLujan https://twitter.com/BasedMikeLee https://twitter.com/benraylujan BasedMikeLee benraylujan 06/04/1971 06/07/1972 0 White Hispanic 8 6 J.D.; Brigham Young university; 1997 B.B.A.; New Mexico Highlands University; 2007 2 0 https://www.lee.senate.gov/ https://www.lujan.senate.gov/ https://bioguide.congress.gov/search/bio/L000577 https://bioguide.congress.gov/search/bio/L000570
65 Luj�n, Ben Ray Lummis, Cynthia M. 64 New Mexico Wyoming NM WY 1 0 2 0.174860888138848 0.893292958108508 01/03/2021 12/31/2022 1.99178082191781 0 117 51.7 73.1 45.6 26.9 2020 https://twitter.com/SenatorLujan https://twitter.com/SenLummis SenatorLujan SenLummis https://twitter.com/benraylujan https://twitter.com/CynthiaMLummis benraylujan CynthiaMLummis 06/07/1972 09/10/1954 0 1 Hispanic White 6 8 B.B.A.; New Mexico Highlands University; 2007 J.D.; University of Wyoming College of Law, Laramie, Wyo.; 1985 0 11 https://www.lujan.senate.gov/ https://www.lummis.senate.gov/ https://bioguide.congress.gov/search/bio/L000570 https://bioguide.congress.gov/search/bio/L000571
66 Lummis, Cynthia M. Manchin, Joe, III 65 Wyoming West Virginia WY WV 0 1 2 1 0.893292958108508 0.446686774398077 01/03/2021 11/15/2010 12/31/2022 1.99178082191781 12.1342465753425 0 117 73.1 49.6 26.9 46.3 2020 2018 https://twitter.com/SenLummis https://twitter.com/Sen_JoeManchin SenLummis Sen_JoeManchin https://twitter.com/CynthiaMLummis https://twitter.com/JoeManchinWV CynthiaMLummis JoeManchinWV 09/10/1954 08/24/1947 1 0 White 8 6 J.D.; University of Wyoming College of Law, Laramie, Wyo.; 1985 B.A.; Business Administration; West Virginia University; 1970 11 12 https://www.lummis.senate.gov/ https://www.manchin.senate.gov/ https://bioguide.congress.gov/search/bio/L000571 https://bioguide.congress.gov/search/bio/M001183
67 Manchin, Joe, III Markey, Edward J. 66 West Virginia Massachusetts WV MA 1 1 2 0.446686774398077 0.0139659683705929 11/15/2010 07/16/2013 12/31/2022 12.1342465753425 9.46575342465753 0 117 49.6 66.2 46.3 33 2018 2020 https://twitter.com/Sen_JoeManchin https://twitter.com/SenMarkey Sen_JoeManchin SenMarkey https://twitter.com/JoeManchinWV https://twitter.com/edmarkey JoeManchinWV edmarkey 08/24/1947 07/11/1946 0 White 6 8 B.A.; Business Administration; West Virginia University; 1970 J.D.; Boston College Law School; 1972 12 11 https://www.manchin.senate.gov/ https://www.markey.senate.gov/ https://bioguide.congress.gov/search/bio/M001183 https://bioguide.congress.gov/search/bio/M000133
68 Markey, Edward J. Marshall, Roger 67 Massachusetts Kansas MA KS 1 0 2 0.0139659683705929 0.882124792228652 07/16/2013 01/03/2021 12/31/2022 9.46575342465753 1.99178082191781 0 117 66.2 53.2 33 41.8 2020 https://twitter.com/SenMarkey https://twitter.com/SenatorMarshall SenMarkey SenatorMarshall https://twitter.com/edmarkey https://twitter.com/RogerMarshallMD edmarkey RogerMarshallMD 07/11/1946 08/09/1960 0 White 8 7 J.D.; Boston College Law School; 1972 M.D.; University of Kansas School of Medicine; 1987 11 6 https://www.markey.senate.gov/ https://www.marshall.senate.gov/ https://bioguide.congress.gov/search/bio/M000133 https://bioguide.congress.gov/search/bio/M001198
69 Marshall, Roger McConnell, Mitch 68 Kansas Kentucky KS KY 0 2 0.882124792228652 0.599687533584357 01/03/2021 01/03/1985 12/31/2022 1.99178082191781 38.0164383561644 0 117 53.2 57.8 41.8 38.2 2020 https://twitter.com/SenatorMarshall https://twitter.com/LeaderMcConnell SenatorMarshall LeaderMcConnell https://twitter.com/RogerMarshallMD N/A RogerMarshallMD N/A 08/09/1960 02/20/1942 0 White 7 8 M.D.; University of Kansas School of Medicine; 1987 J.D.; Kentucky Law School; 1967 6 11 https://www.marshall.senate.gov/ https://www.mcconnell.senate.gov/ https://bioguide.congress.gov/search/bio/M001198 https://bioguide.congress.gov/search/bio/M000355
70 McConnell, Mitch Menendez, Robert 69 Kentucky New Jersey KY NJ 0 1 2 1 0.599687533584357 0.191515157461704 01/03/1985 01/18/2006 12/31/2022 38.0164383561644 16.9616438356164 0 117 57.8 54 38.2 42.8 2020 2018 https://twitter.com/LeaderMcConnell https://twitter.com/SenatorMenendez LeaderMcConnell SenatorMenendez N/A N/A 02/20/1942 01/01/1954 0 White Hispanic 8 J.D.; Kentucky Law School; 1967 J.D.; Rutgers university of Law; 1979 11 https://www.mcconnell.senate.gov/ https://www.menendez.senate.gov/ https://bioguide.congress.gov/search/bio/M000355 https://bioguide.congress.gov/search/bio/M000639
71 Menendez, Robert Merkley, Jeff 70 New Jersey Oregon NJ OR 1 1 2 0.191515157461704 0.0355414098997263 01/18/2006 01/06/2009 12/31/2022 16.9616438356164 13.9917808219178 0 117 54 56.9 42.8 39.3 2018 2020 https://twitter.com/SenatorMenendez https://twitter.com/SenJeffMerkley SenatorMenendez SenJeffMerkley N/A https://twitter.com/jeffmerkley N/A jeffmerkley 01/01/1954 10/24/1956 0 Hispanic White 8 7 J.D.; Rutgers university of Law; 1979 M.P.A.; Princeton University; 1982 11 0 https://www.menendez.senate.gov/ https://www.merkley.senate.gov/ https://bioguide.congress.gov/search/bio/M000639 https://bioguide.congress.gov/search/bio/M001176
72 Merkley, Jeff Moran, Jerry 71 Oregon Kansas OR KS 1 0 2 3 0.0355414098997263 0.716270292467902 01/06/2009 01/05/2011 12/31/2022 13.9917808219178 11.9945205479452 0 117 56.9 62.4 39.3 32.1 2020 2016 https://twitter.com/SenJeffMerkley https://twitter.com/JerryMoran SenJeffMerkley JerryMoran https://twitter.com/jeffmerkley N/A jeffmerkley N/A 10/24/1956 05/29/1954 0 White 7 8 M.P.A.; Princeton University; 1982 J.D.; Kansas University School of Law; 1981 0 11 https://www.merkley.senate.gov/ https://www.moran.senate.gov/public/ https://bioguide.congress.gov/search/bio/M001176 https://bioguide.congress.gov/search/bio/M000934
73 Moran, Jerry Murkowski, Lisa 72 Kansas Alaska KS AK 0 3 0.716270292467902 0.473296745648617 01/05/2011 12/20/2002 12/31/2022 11.9945205479452 20.0438356164384 0 117 62.4 44.3 32.1 29.5 2016 https://twitter.com/JerryMoran https://twitter.com/lisamurkowski JerryMoran lisamurkowski N/A https://twitter.com/lisaforsenate N/A lisaforsenate 05/29/1954 05/22/1957 0 1 White 8 J.D.; Kansas University School of Law; 1981 J.D.; Willamette College of Law; 1985 11 2 https://www.moran.senate.gov/public/ https://www.murkowski.senate.gov/ https://bioguide.congress.gov/search/bio/M000934 https://bioguide.congress.gov/search/bio/M001153
74 Murkowski, Lisa Murphy, Christopher 73 Alaska Connecticut AK CT 0 1 3 1 0.473296745648617 0.152635018959264 12/20/2002 01/03/2013 12/31/2022 20.0438356164384 9.9972602739726 0 117 44.3 59.5 29.5 39.4 2016 2018 https://twitter.com/lisamurkowski https://twitter.com/ChrisMurphyCT lisamurkowski ChrisMurphyCT https://twitter.com/lisaforsenate N/A lisaforsenate N/A 05/22/1957 08/03/1973 1 0 White 8 J.D.; Willamette College of Law; 1985 J.D.; University of Connecticut; 2002 2 11 https://www.murkowski.senate.gov/ https://www.murphy.senate.gov/ https://bioguide.congress.gov/search/bio/M001153 https://bioguide.congress.gov/search/bio/M001169
75 Murphy, Christopher Murray, Patty 74 Connecticut Washington CT WA 1 1 3 0.152635018959264 0.142703588817088 01/03/2013 01/05/1993 12/31/2022 9.9972602739726 30.0054794520548 0 117 59.5 59.1 39.4 40.9 2018 2016 https://twitter.com/ChrisMurphyCT https://twitter.com/PattyMurray ChrisMurphyCT PattyMurray N/A https://twitter.com/murraycampaign N/A murraycampaign 08/03/1973 10/11/1950 0 1 White 8 6 J.D.; University of Connecticut; 2002 B.A.; Physical Education; Washington State University; 1972 11 5 https://www.murphy.senate.gov/ https://www.murray.senate.gov/ https://bioguide.congress.gov/search/bio/M001169 https://bioguide.congress.gov/search/bio/M001111
76 Murray, Patty Ossoff, Jon 75 Washington Georgia WA GA 1 3 2 0.142703588817088 0.303405364928085 01/05/1993 01/20/2021 12/31/2022 30.0054794520548 1.94520547945205 0 117 59.1 50.6 40.9 49.4 2016 2020 https://twitter.com/PattyMurray https://twitter.com/SenOssoff PattyMurray SenOssoff https://twitter.com/murraycampaign https://twitter.com/ossoff murraycampaign ossoff 10/11/1950 02/16/1987 1 0 White 6 7 B.A.; Physical Education; Washington State University; 1972 M.S.; International Politicla Economy; London School of Economics; 2013 5 7 https://www.murray.senate.gov/ https://www.ossoff.senate.gov/ https://bioguide.congress.gov/search/bio/M001111 https://bioguide.congress.gov/search/bio/O000174
77 Ossoff, Jon Padilla, Alex 76 Georgia California GA CA 1 2 3 0.303405364928085 0.0200324383981554 01/20/2021 12/31/2022 1.94520547945205 0 117 50.6 N/A 49.4 N/A 2020 * https://twitter.com/SenOssoff https://twitter.com/SenAlexPadilla SenOssoff SenAlexPadilla https://twitter.com/ossoff https://twitter.com/AlexPadilla4CA ossoff AlexPadilla4CA 02/16/1987 03/22/1973 0 White Hispanic 7 6 M.S.; International Politicla Economy; London School of Economics; 2013 B.S.; Mechanical Engineering; MIT; 1994 7 9 https://www.ossoff.senate.gov/ https://www.padilla.senate.gov/ https://bioguide.congress.gov/search/bio/O000174 https://bioguide.congress.gov/search/bio/P000145 appointed in 2020 to replace Kamala Harris
78 Padilla, Alex Paul, Rand 77 California Kentucky CA KY 1 0 3 0.0200324383981554 0.684883322748808 01/20/2021 01/05/2011 12/31/2022 1.94520547945205 11.9945205479452 0 117 N/A 57.3 N/A 42.7 * 2016 https://twitter.com/SenAlexPadilla https://twitter.com/senrandpaul SenAlexPadilla senrandpaul https://twitter.com/AlexPadilla4CA https://twitter.com/RandPaul AlexPadilla4CA RandPaul 03/22/1973 01/07/1963 0 Hispanic White 6 7 B.S.; Mechanical Engineering; MIT; 1994 M.D.; Duke University; 1988 9 6 https://www.padilla.senate.gov/ https://www.paul.senate.gov/ https://bioguide.congress.gov/search/bio/P000145 https://bioguide.congress.gov/search/bio/P000603 appointed in 2020 to replace Kamala Harris
79 Paul, Rand Peters, Gary C. 78 Kentucky Michigan KY MI 0 1 3 2 0.684883322748808 0.355796587683312 01/05/2011 01/06/2015 12/31/2022 11.9945205479452 7.98904109589041 0 117 57.3 49.9 42.7 48.2 2016 2020 https://twitter.com/senrandpaul https://twitter.com/SenGaryPeters senrandpaul SenGaryPeters https://twitter.com/RandPaul https://twitter.com/garypeters RandPaul garypeters 01/07/1963 12/01/1958 0 White 7 8 M.D.; Duke University; 1988 J.D.; Wayne State University; 1989 6 2 https://www.paul.senate.gov/ https://www.peters.senate.gov/ https://bioguide.congress.gov/search/bio/P000603 https://bioguide.congress.gov/search/bio/P000595
80 Peters, Gary C. Portman, Robert 79 Michigan Ohio MI OH 1 0 2 3 0.355796587683312 0.548120690430407 01/06/2015 01/05/2011 12/31/2022 7.98904109589041 11.9945205479452 0 1 117 49.9 58.3 48.2 36.9 2020 2016 https://twitter.com/SenGaryPeters https://twitter.com/senrobportman SenGaryPeters senrobportman https://twitter.com/garypeters N/A garypeters N/A 12/01/1958 12/19/1955 0 White 8 J.D.; Wayne State University; 1989 J.D.; University of Michigan; 1985 2 https://www.peters.senate.gov/ N/A https://bioguide.congress.gov/search/bio/P000595 https://bioguide.congress.gov/search/bio/P000449
81 Portman, Robert Reed, John F. 80 Ohio Rhode Island OH RI 0 1 3 2 0.548120690430407 0.145861826443275 01/05/2011 01/07/1997 12/31/2022 11.9945205479452 25.9972602739726 1 0 117 58.3 66.6 36.9 33.4 2016 2020 https://twitter.com/senrobportman https://twitter.com/SenJackReed senrobportman SenJackReed N/A N/A 12/19/1955 11/12/1949 0 White 8 J.D.; University of Michigan; 1985 J.D.; Harvard University; 1982 2 N/A https://www.reed.senate.gov/ https://bioguide.congress.gov/search/bio/P000449 https://bioguide.congress.gov/search/bio/R000122
82 Reed, John F. Risch, James E. 81 Rhode Island Idaho RI ID 1 0 2 0.145861826443275 0.82910906209038 01/07/1997 01/06/2009 12/31/2022 25.9972602739726 13.9917808219178 0 117 66.6 62.6 33.4 33.2 2020 https://twitter.com/SenJackReed https://twitter.com/SenatorRisch SenJackReed SenatorRisch N/A N/A 11/12/1949 05/03/1943 0 White 8 J.D.; Harvard University; 1982 J.D.; University of Idaho; 1968 2 https://www.reed.senate.gov/ https://www.risch.senate.gov/ https://bioguide.congress.gov/search/bio/R000122 https://bioguide.congress.gov/search/bio/R000584
83 Risch, James E. Romney, Mitt 82 Idaho Utah ID UT 0 2 1 0.82910906209038 0.596688837978771 01/06/2009 01/03/2019 12/31/2022 13.9917808219178 3.99452054794521 0 117 62.6 33.2 30.9 2020 2018 https://twitter.com/SenatorRisch https://twitter.com/SenatorRomney SenatorRisch SenatorRomney N/A https://twitter.com/mittromney N/A mittromney 05/03/1943 03/12/1947 0 White 8 7 J.D.; University of Idaho; 1968 M.B.A.; Harvard Business School; 1975 2 1 https://www.risch.senate.gov/ https://www.romney.senate.gov/ https://bioguide.congress.gov/search/bio/R000584 https://bioguide.congress.gov/search/bio/R000615
84 Romney, Mitt Rosen, Jacky 83 Utah Nevada UT NV 0 1 1 0.596688837978771 0.308548351377894 01/03/2019 12/31/2022 3.99452054794521 0 117 62.6 50.4 30.9 45.4 2018 https://twitter.com/SenatorRomney https://twitter.com/SenJackyRosen SenatorRomney SenJackyRosen https://twitter.com/mittromney https://twitter.com/RosenforNevada mittromney RosenforNevada 03/12/1947 08/02/1957 0 1 White 7 6 M.B.A.; Harvard Business School; 1975 B.A.; Psychology; University of Minnesota; 1979 1 https://www.romney.senate.gov/ https://www.rosen.senate.gov/ https://bioguide.congress.gov/search/bio/R000615 https://bioguide.congress.gov/search/bio/R000608
85 Rosen, Jacky Rounds, Mike 84 Nevada South Dakota NV SD 1 0 1 2 0.308548351377894 0.784008560585577 01/03/2019 01/06/2015 12/31/2022 3.99452054794521 7.98904109589041 0 117 50.4 65.7 45.4 34.3 2018 2020 https://twitter.com/SenJackyRosen https://twitter.com/SenatorRounds SenJackyRosen SenatorRounds https://twitter.com/RosenforNevada N/A RosenforNevada N/A 08/02/1957 10/24/1954 1 0 White 6 B.A.; Psychology; University of Minnesota; 1979 B.S.; Political Science; South Dakota State University; 1977 1 https://www.rosen.senate.gov/ https://www.rounds.senate.gov/ https://bioguide.congress.gov/search/bio/R000608 https://bioguide.congress.gov/search/bio/R000605
86 Rounds, Mike Rubio, Marco 85 South Dakota Florida SD FL 0 2 3 0.784008560585577 0.831181764071725 01/06/2015 01/05/2011 12/31/2022 7.98904109589041 11.9945205479452 0 117 65.7 52 34.3 44.3 2020 2016 https://twitter.com/SenatorRounds https://twitter.com/senmarcorubio SenatorRounds senmarcorubio N/A https://twitter.com/marcorubio N/A marcorubio 10/24/1954 05/28/1971 0 White Hispanic 6 8 B.S.; Political Science; South Dakota State University; 1977 J.D.; University of Miami; 1996 1 2 https://www.rounds.senate.gov/ https://www.rubio.senate.gov/ https://bioguide.congress.gov/search/bio/R000605 https://bioguide.congress.gov/search/bio/R000595
87 Rubio, Marco Sanders, Bernard 86 Florida Vermont FL VT 0 2 3 1 0.831181764071725 0 01/05/2011 01/04/2007 12/31/2022 11.9945205479452 16 0 117 52 67.4 44.3 27.5 2016 2018 https://twitter.com/senmarcorubio https://twitter.com/SenSanders senmarcorubio SenSanders https://twitter.com/marcorubio https://twitter.com/BernieSanders marcorubio BernieSanders 05/28/1971 09/08/1941 0 Hispanic White 8 6 J.D.; University of Miami; 1996 B.A.; Political Science; University of Chicago; 1964 2 0 https://www.rubio.senate.gov/ https://www.sanders.senate.gov/ https://bioguide.congress.gov/search/bio/R000595 https://bioguide.congress.gov/search/bio/S000033
88 Sanders, Bernard Sasse, Benjamin 87 Vermont Nebraska VT NE 2 0 1 2 0 0.684229649213868 01/04/2007 01/06/2015 12/31/2022 16 7.98904109589041 0 1 117 67.4 62.7 27.5 24.4 2018 2020 https://twitter.com/SenSanders https://twitter.com/sensasse SenSanders sensasse https://twitter.com/BernieSanders https://twitter.com/BenSasse BernieSanders BenSasse 09/08/1941 02/22/1972 0 White 6 8 B.A.; Political Science; University of Chicago; 1964 PhD in History; Yale University; 2004 0 5 https://www.sanders.senate.gov/ N/A https://bioguide.congress.gov/search/bio/S000033 https://bioguide.congress.gov/search/bio/S001197
89 Sasse, Benjamin Schatz, Brian 88 Nebraska Hawaii NE HI 0 1 2 3 0.684229649213868 0.213250458593456 01/06/2015 12/27/2012 12/31/2022 7.98904109589041 10.0164383561644 1 0 117 62.7 73.6 24.4 22.2 2020 2016 https://twitter.com/sensasse https://twitter.com/brianschatz sensasse brianschatz https://twitter.com/BenSasse https://twitter.com/SenBrianSchatz BenSasse SenBrianSchatz 02/22/1972 10/20/1972 0 White 8 6 PhD in History; Yale University; 2004 B.A.; Philosophy; Pomona College; 1994 5 N/A https://www.schatz.senate.gov/ https://bioguide.congress.gov/search/bio/S001197 https://bioguide.congress.gov/search/bio/S001194
90 Schatz, Brian Schumer, Charles E. 89 Hawaii New York HI NY 1 3 0.213250458593456 0.239789022209428 12/27/2012 01/06/1999 12/31/2022 10.0164383561644 24 0 117 73.6 70.4 22.2 27.4 2016 https://twitter.com/brianschatz https://twitter.com/SenSchumer brianschatz SenSchumer https://twitter.com/SenBrianSchatz https://twitter.com/chuckschumer SenBrianSchatz chuckschumer 10/20/1972 11/23/1950 0 White 6 8 B.A.; Philosophy; Pomona College; 1994 J.D.; Harvard University; 1974 5 2 https://www.schatz.senate.gov/ https://www.schumer.senate.gov/ https://bioguide.congress.gov/search/bio/S001194 https://bioguide.congress.gov/search/bio/S000148
91 Schumer, Charles E. Scott, Rick 90 New York Florida NY FL 1 0 3 1 0.239789022209428 1 01/06/1999 01/08/2019 12/31/2022 24 3.98082191780822 0 117 70.4 50.1 27.4 49.9 2016 2018 https://twitter.com/SenSchumer https://twitter.com/SenRickScott SenSchumer SenRickScott https://twitter.com/chuckschumer https://twitter.com/scottforflorida chuckschumer scottforflorida 11/23/1950 12/01/1952 0 White 8 J.D.; Harvard University; 1974 J.D.; Southern Methodist University; 1978 2 https://www.schumer.senate.gov/ https://www.rickscott.senate.gov/ https://bioguide.congress.gov/search/bio/S000148 https://bioguide.congress.gov/search/bio/S001217
92 Scott, Rick Scott, Tim 91 Florida South Carolina FL SC 0 1 3 1 0.781356077518849 01/08/2019 01/03/2013 12/31/2022 3.98082191780822 9.9972602739726 0 117 50.1 60.6 49.9 37 2018 2016 https://twitter.com/SenRickScott https://twitter.com/SenatorTimScott SenRickScott SenatorTimScott https://twitter.com/scottforflorida https://twitter.com/votetimscott scottforflorida votetimscott 12/01/1952 09/19/1965 0 White African-American 8 6 J.D.; Southern Methodist University; 1978 B.S.; Political Science; Charleston Southern University; 1988 2 1 https://www.rickscott.senate.gov/ https://www.scott.senate.gov/ https://bioguide.congress.gov/search/bio/S001217 https://bioguide.congress.gov/search/bio/S001184
93 Scott, Tim Shaheen, Jeanne 92 South Carolina New Hampshire SC NH 0 1 3 2 0.781356077518849 0.2925665319541 01/03/2013 01/06/2009 12/31/2022 9.9972602739726 13.9917808219178 0 117 60.6 56.6 37 41 2016 2020 https://twitter.com/SenatorTimScott https://twitter.com/SenatorShaheen SenatorTimScott SenatorShaheen https://twitter.com/votetimscott https://twitter.com/JeanneShaheen votetimscott JeanneShaheen 09/19/1965 01/28/1947 0 1 African-American White 6 7 B.S.; Political Science; Charleston Southern University; 1988 M.S.S.; University of Mississippi; 1973 1 5 https://www.scott.senate.gov/ https://www.shaheen.senate.gov/ https://bioguide.congress.gov/search/bio/S001184 https://bioguide.congress.gov/search/bio/S001181
94 Shaheen, Jeanne Shelby, Richard 93 New Hampshire Alabama NH AL 1 0 2 3 0.2925665319541 0.577739000839365 01/06/2009 01/06/1987 12/31/2022 13.9917808219178 36.0082191780822 0 1 117 56.6 64.2 41 35.8 2020 2016 https://twitter.com/SenatorShaheen https://twitter.com/SenShelby SenatorShaheen SenShelby https://twitter.com/JeanneShaheen N/A JeanneShaheen N/A 01/28/1947 05/06/1934 1 0 White 7 6 M.S.S.; University of Mississippi; 1973 LL.B.; University of Alabama; 1963 5 2 https://www.shaheen.senate.gov/ N/A https://bioguide.congress.gov/search/bio/S001181 https://bioguide.congress.gov/search/bio/S000320
95 Shelby, Richard Sinema, Kyrsten 94 Alabama Arizona AL AZ 0 2 3 1 0.577739000839365 0.500967034663567 01/06/1987 01/03/2019 12/31/2022 36.0082191780822 3.99452054794521 1 0 117 64.2 50 35.8 47.6 2016 2018 https://twitter.com/SenShelby https://twitter.com/SenatorSinema SenShelby SenatorSinema N/A https://twitter.com/kyrstensinema N/A kyrstensinema 05/06/1934 07/12/1976 0 1 White 6 8 LL.B.; University of Alabama; 1963 PhD in Justice Studies; Arizona State University; 2012 2 N/A https://www.sinema.senate.gov/ https://bioguide.congress.gov/search/bio/S000320 https://bioguide.congress.gov/search/bio/S001191
96 Sinema, Kyrsten Smith, Tina 95 Arizona Minnesota AZ MN 2 1 1 2 0.500967034663567 0.0756533259297989 01/03/2019 01/03/2018 12/31/2022 3.99452054794521 4.99452054794521 0 117 50 48.8 47.6 43.5 2018 2020 https://twitter.com/SenatorSinema https://twitter.com/SenTinaSmith SenatorSinema SenTinaSmith https://twitter.com/kyrstensinema https://twitter.com/TinaSmithMN kyrstensinema TinaSmithMN 07/12/1976 03/04/1958 1 White 8 7 PhD in Justice Studies; Arizona State University; 2012 M.B.A. Dartmouth College; 1984 2 1 https://www.sinema.senate.gov/ https://www.smith.senate.gov/ https://bioguide.congress.gov/search/bio/S001191 https://bioguide.congress.gov/search/bio/S001203
97 Smith, Tina Stabenow, Debbie 96 Minnesota Michigan MN MI 1 2 1 0.0756533259297989 0.221949395648287 01/03/2018 01/03/2001 12/31/2022 4.99452054794521 22.0054794520548 0 117 48.8 52.3 43.5 45.8 2020 2018 https://twitter.com/SenTinaSmith https://twitter.com/SenStabenow SenTinaSmith SenStabenow https://twitter.com/TinaSmithMN https://twitter.com/stabenow TinaSmithMN stabenow 03/04/1958 04/29/1950 1 White 7 M.B.A. Dartmouth College; 1984 M.S.W.; Michigan State University; 1975 1 5 https://www.smith.senate.gov/ https://www.stabenow.senate.gov/ https://bioguide.congress.gov/search/bio/S001203 https://bioguide.congress.gov/search/bio/S000770
98 Stabenow, Debbie Sullivan, Dan 97 Michigan Alaska MI AK 1 0 1 2 0.221949395648287 0.652100683642255 01/03/2001 01/06/2015 12/31/2022 22.0054794520548 7.98904109589041 0 117 52.3 53.9 45.8 41.2 2018 2020 https://twitter.com/SenStabenow https://twitter.com/SenDanSullivan SenStabenow SenDanSullivan https://twitter.com/stabenow N/A stabenow N/A 04/29/1950 11/13/1964 1 0 White 7 8 M.S.W.; Michigan State University; 1975 J.D.; Georgetown University; 1993 5 2 https://www.stabenow.senate.gov/ https://www.sullivan.senate.gov/ https://bioguide.congress.gov/search/bio/S000770 https://bioguide.congress.gov/search/bio/S001198
99 Sullivan, Dan Tester, Jon 98 Alaska Montana AK MT 0 1 2 1 0.652100683642255 0.377646486433112 01/06/2015 01/04/2007 12/31/2022 7.98904109589041 16 0 117 53.9 50.3 41.2 46.8 2020 2018 https://twitter.com/SenDanSullivan https://twitter.com/SenatorTester SenDanSullivan SenatorTester N/A https://twitter.com/jontester N/A jontester 11/13/1964 08/21/1956 0 White 8 6 J.D.; Georgetown University; 1993 B.A.; Music; University of Providence; 1978 2 10 https://www.sullivan.senate.gov/ https://www.tester.senate.gov/ https://bioguide.congress.gov/search/bio/S001198 https://bioguide.congress.gov/search/bio/T000464
100 Tester, Jon Thune, John 99 Montana South Dakota MT SD 1 0 1 3 0.377646486433112 0.795060855902239 01/04/2007 01/04/2005 12/31/2022 16 18 0 117 50.3 71.8 46.8 28.2 2018 2016 https://twitter.com/SenatorTester https://twitter.com/SenJohnThune SenatorTester SenJohnThune https://twitter.com/jontester https://twitter.com/johnthune jontester johnthune 08/21/1956 01/07/1961 0 White 6 7 B.A.; Music; University of Providence; 1978 M.B.A.; University of South Dakota; 1984 10 1 https://www.tester.senate.gov/ https://www.thune.senate.gov/ https://bioguide.congress.gov/search/bio/T000464 https://bioguide.congress.gov/search/bio/T000250
101 Thune, John Tillis, Thom 100 South Dakota North Carolina SD NC 0 3 2 0.795060855902239 0.819146177750934 01/04/2005 01/06/2015 12/31/2022 18 7.98904109589041 0 117 71.8 48.7 28.2 46.9 2016 2020 https://twitter.com/SenJohnThune https://twitter.com/SenThomTillis SenJohnThune SenThomTillis https://twitter.com/johnthune https://twitter.com/ThomTillis johnthune ThomTillis 01/07/1961 08/30/1960 0 White 7 6 M.B.A.; University of South Dakota; 1984 B.S.; Technology Management; University of Maryland; 1996 1 https://www.thune.senate.gov/ https://www.tillis.senate.gov/ https://bioguide.congress.gov/search/bio/T000250 https://bioguide.congress.gov/search/bio/T000476
102 Tillis, Thom Toomey, Patrick 101 North Carolina Pennsylvania NC PA 0 2 3 0.819146177750934 0.607637714921737 01/06/2015 01/05/2011 12/31/2022 7.98904109589041 11.9945205479452 0 1 117 48.7 48.9 46.9 47.2 2020 2016 https://twitter.com/SenThomTillis https://twitter.com/SenToomey SenThomTillis SenToomey https://twitter.com/ThomTillis https://twitter.com/pattoomey ThomTillis pattoomey 08/30/1960 11/17/1961 0 White 6 B.S.; Technology Management; University of Maryland; 1996 A.B.; Government; Harvard College; 1984 1 https://www.tillis.senate.gov/ N/A https://bioguide.congress.gov/search/bio/T000476 https://bioguide.congress.gov/search/bio/T000461
103 Toomey, Patrick Tuberville, Tommy 102 Pennsylvania Alabama PA AL 0 3 2 0.607637714921737 0.808701355452043 01/05/2011 01/03/2021 12/31/2022 11.9945205479452 1.99178082191781 1 0 117 48.9 60.1 47.2 39.7 2016 2020 https://twitter.com/SenToomey https://twitter.com/SenTuberville SenToomey SenTuberville https://twitter.com/pattoomey https://twitter.com/TTuberville pattoomey TTuberville 11/17/1961 09/18/1954 0 White 6 A.B.; Government; Harvard College; 1984 B.S., physical education, Southern Arkansas University, 1976 1 5 N/A https://www.tuberville.senate.gov/ https://bioguide.congress.gov/search/bio/T000461 https://bioguide.congress.gov/search/bio/T000278
104 Tuberville, Tommy Van Hollen, Chris 103 Alabama Maryland AL MD 0 1 2 3 0.808701355452043 0.117646768842011 01/03/2021 01/03/2017 12/31/2022 1.99178082191781 5.99452054794521 0 117 60.1 60.4 39.7 36.4 2020 2016 https://twitter.com/SenTuberville https://twitter.com/ChrisVanHollen SenTuberville ChrisVanHollen https://twitter.com/TTuberville N/A TTuberville N/A 09/18/1954 01/10/1959 0 White 6 8 B.S., physical education, Southern Arkansas University, 1976 J.D.; Georgetown university; 1990 5 2 https://www.tuberville.senate.gov/ https://www.vanhollen.senate.gov/ https://bioguide.congress.gov/search/bio/T000278 https://bioguide.congress.gov/search/bio/V000128
105 Van Hollen, Chris Warner, Mark R. 104 Maryland Virginia MD VA 1 3 2 0.117646768842011 0.33022168507113 01/03/2017 01/06/2009 12/31/2022 5.99452054794521 13.9917808219178 0 117 60.4 56 36.4 44 2016 2020 https://twitter.com/ChrisVanHollen https://twitter.com/SenatorWarner ChrisVanHollen SenatorWarner N/A https://twitter.com/MarkWarner N/A MarkWarner 01/10/1959 12/15/1954 0 White 8 J.D.; Georgetown university; 1990 J.D.; Harvard Law School; 1980 2 1 https://www.vanhollen.senate.gov/ https://www.warner.senate.gov/ https://bioguide.congress.gov/search/bio/V000128 https://bioguide.congress.gov/search/bio/W000805
106 Warner, Mark R. Warnock, Raphael G. 105 Virginia Georgia VA GA 1 2 3 0.33022168507113 0.464158242867696 01/06/2009 01/20/2021 12/31/2022 13.9917808219178 1.94520547945205 0 117 56 51 44 49 2020 https://twitter.com/SenatorWarner https://twitter.com/SenatorWarnock SenatorWarner SenatorWarnock https://twitter.com/MarkWarner https://twitter.com/ReverendWarnock MarkWarner ReverendWarnock 12/15/1954 07/23/1969 0 White African-American 8 J.D.; Harvard Law School; 1980 PhD in Philosophy; Union Theological Seminary; 1 8 https://www.warner.senate.gov/ https://www.warnock.senate.gov/ https://bioguide.congress.gov/search/bio/W000805 https://bioguide.congress.gov/search/bio/W000790
107 Warnock, Raphael G. Warren, Elizabeth 106 Georgia Massachusetts GA MA 1 3 1 0.464158242867696 0.0583875007437665 01/20/2021 01/03/2013 12/31/2022 1.94520547945205 9.9972602739726 0 117 51 60.4 49 36.2 2020 2018 https://twitter.com/SenatorWarnock https://twitter.com/SenWarren SenatorWarnock SenWarren https://twitter.com/ReverendWarnock https://twitter.com/ewarren ReverendWarnock ewarren 07/23/1969 06/22/1949 0 1 African-American White 8 PhD in Philosophy; Union Theological Seminary; J.D.; Rutgers University; 1976 8 2 https://www.warnock.senate.gov/ https://www.warren.senate.gov/ https://bioguide.congress.gov/search/bio/W000790 https://bioguide.congress.gov/search/bio/W000817
108 Warren, Elizabeth Whitehouse, Sheldon 107 Massachusetts Rhode Island MA RI 1 1 0.0583875007437665 0.124737669119195 01/03/2013 01/04/2007 12/31/2022 9.9972602739726 16 0 117 60.4 61.6 36.2 38.4 2018 https://twitter.com/SenWarren https://twitter.com/SenWhitehouse SenWarren SenWhitehouse https://twitter.com/ewarren N/A ewarren N/A 06/22/1949 10/20/1955 1 0 White 8 J.D.; Rutgers University; 1976 J.D.; University of Virginia; 1982 2 https://www.warren.senate.gov/ https://www.whitehouse.senate.gov/ https://bioguide.congress.gov/search/bio/W000817 https://bioguide.congress.gov/search/bio/W000802
109 Whitehouse, Sheldon Wicker, Roger F. 108 Rhode Island Mississippi RI MS 1 0 1 0.124737669119195 0.763788502839721 01/04/2007 12/31/2007 12/31/2022 16 15.0109589041096 0 117 61.6 58.5 38.4 39.5 2018 https://twitter.com/SenWhitehouse https://twitter.com/SenatorWicker SenWhitehouse SenatorWicker N/A https://twitter.com/RogerWicker N/A RogerWicker 10/20/1955 07/05/1951 0 White 8 J.D.; University of Virginia; 1982 J.D.; University of Mississippi; 1975 2 https://www.whitehouse.senate.gov/ https://www.wicker.senate.gov/ https://bioguide.congress.gov/search/bio/W000802 https://bioguide.congress.gov/search/bio/W000437
110 Wicker, Roger F. Wyden, Ron 109 Mississippi Oregon MS OR 0 1 1 3 0.763788502839721 0.0591413132623803 12/31/2007 02/05/1996 12/31/2022 15.0109589041096 26.9205479452055 0 117 58.5 56.7 39.5 33.6 2018 2016 https://twitter.com/SenatorWicker https://twitter.com/RonWyden SenatorWicker RonWyden https://twitter.com/RogerWicker N/A RogerWicker N/A 07/05/1951 05/03/1949 0 White 8 J.D.; University of Mississippi; 1975 J.D.; University of Oregon; 1974 2 https://www.wicker.senate.gov/ https://www.wyden.senate.gov/ https://bioguide.congress.gov/search/bio/W000437 https://bioguide.congress.gov/search/bio/W000779
111 Wyden, Ron Young, Todd 110 Oregon Indiana OR IN 1 0 3 0.0591413132623803 0.677696674158218 02/05/1996 01/05/2011 12/31/2022 26.9205479452055 11.9945205479452 0 1 117 56.7 52.1 33.6 42.4 2016 https://twitter.com/RonWyden https://twitter.com/SenToddYoung RonWyden SenToddYoung N/A https://twitter.com/ToddYoungIN N/A ToddYoungIN 05/03/1949 08/24/1972 0 White 8 J.D.; University of Oregon; 1974 J.D.; Robert H. McKinney; 2006 2 https://www.wyden.senate.gov/ https://www.young.senate.gov/ https://bioguide.congress.gov/search/bio/W000779 https://bioguide.congress.gov/search/bio/Y000064
Young, Todd 111 Indiana IN 0 3 0.677696674158218 01/05/2011 12/31/2022 11.9945205479452 1 117 52.1 42.4 2016 https://twitter.com/SenToddYoung SenToddYoung https://twitter.com/ToddYoungIN ToddYoungIN 08/24/1972 0 White 8 J.D.; Robert H. McKinney; 2006 2 https://www.young.senate.gov/ https://bioguide.congress.gov/search/bio/Y000064

8
data/OUT/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
/ALL-SENATORS-TWEETS.csv
/Pretest-Prep.csv
/Pretest-Results.csv
/Pretest-SENATORS-TWEETS.csv
/SenatorsTweets-Final.csv
/SenatorsTweets-OnlyCov.csv
/Tweets-Classified-Prep.csv
/Tweets-Stub.csv

0
data/OUT/.gitkeep Normal file
View File

3
data/OUT/graphs/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/Timeline.png
/Wordcloud-All.png
/Wordcloud-Cov.png

View File

@ -1,24 +0,0 @@
/ALL-SENATORS-LONG-LONG.csv
/ALL-SENATORS.csv
/CoryGardner-LONG.csv
/CoryGardner.csv
/DavidPerdueGA-LONG.csv
/DavidPerdueGA.csv
/DougJones-LONG.csv
/DougJones.csv
/KLoeffler-LONG.csv
/KLoeffler.csv
/MarthaMcSallyAZ-LONG.csv
/MarthaMcSallyAZ.csv
/SenAlexander-LONG.csv
/SenAlexander.csv
/SenPatRoberts-LONG.csv
/SenPatRoberts.csv
/SenatorEnzi-LONG.csv
/SenatorEnzi.csv
/SenatorIsakson-LONG.csv
/SenatorIsakson.csv
/SenatorTomUdall-LONG.csv
/SenatorTomUdall.csv
/VP-LONG.csv
/VP.csv

89
funs/CleanTweets.py Normal file
View File

@ -0,0 +1,89 @@
import re
import string
def preprocess_roberta(text): # https://huggingface.co/cardiffnlp/twitter-roberta-base-sep2022
preprocessed_text = []
for t in text.split():
if len(t) > 1:
t = '@user' if t[0] == '@' and t.count('@') == 1 else t
t = 'http' if t.startswith('http') else t
preprocessed_text.append(t)
return ' '.join(preprocessed_text)
def remove_URL(text):
try:
url = re.compile(r'https?://\S+|www\.\S+')
except: print(text)
return url.sub(r'', text)
def remove_emoji(text):
emoji_pattern = re.compile(
'['
u'\U0001F600-\U0001F64F' # emoticons
u'\U0001F300-\U0001F5FF' # symbols & pictographs
u'\U0001F680-\U0001F6FF' # transport & map symbols
u'\U0001F1E0-\U0001F1FF' # flags (iOS)
u'\U00002702-\U000027B0'
u'\U000024C2-\U0001F251'
']+',
flags=re.UNICODE)
return emoji_pattern.sub(r'', text)
def remove_html(text):
html = re.compile(r'<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
return re.sub(html, '', text)
def remove_punct(text):
table = str.maketrans('', '', string.punctuation)
return text.translate(table)
def remove_nonascii(text):
return re.sub(r'[^\x00-\x7F]+', '', text)
def remove_spec(text):
text = re.sub(r'&amp;?', r'and', text)
text = re.sub(r'&lt;', r'<', text)
return re.sub(r'&gt;', r'>', text)
def remove_spaces(text): # also new line chars and to lower case
text = re.sub(r'&lt;', r'<', text)
text = " ".join(text.splitlines()) # remove newline characters
text = text.lower()
text = text.strip()
return re.sub(r'\s{2,}', ' ', text)
def remove_retw(text):
text = re.sub(r'(RT|rt)[ ]*@[ ]*[\S]+', '', text)
return re.sub(r'@[\S]+', '', text)
def preprocess_text(text):
text = remove_URL(text)
text = remove_emoji(text)
text = remove_html(text)
text = remove_punct(text)
text = remove_nonascii(text)
text = remove_spec(text)
text = remove_spaces(text)
text = remove_retw(text)
return text
def preprocess_text_series(series):
series = series.apply(remove_URL)
series = series.apply(remove_emoji)
series = series.apply(remove_html)
series = series.apply(remove_punct)
series = series.apply(remove_nonascii)
series = series.apply(remove_spec)
series = series.apply(remove_spaces)
series = series.apply(remove_retw)
return series
# Check all functions:
input_text = """
Check out this amazing website: https://www.example.com! 😃
<html>This is an HTML tag.</html>
RT @user123: Just received a package from @companyXYZ. It's awesome! 📦
This is a test text with lots of punctuations!!! Can't wait to see more...
"""
processed_text = preprocess_text(input_text)
# print(processed_text)

View File

@ -3,13 +3,22 @@ import time
import pandas as pd
import snscrape.modules.twitter as sntwitter
def scrapeTweets(handle, slice_data, keywords, td, tweetDFColumns, maxTweets = 5000):
def scrapeTweets(handle, keywords, td, tweetDFColumns, ts_beg, ts_end, suffix, maxTweets = 5000):
"""Scrapes tweets from a specific account in a specific time span using snscrape.modules.twitter.
Args:
handle (str): twitter handle of account to be scraped
keywords (list): list of strings containing the keywords that the tweets shall be searched for
td (str): tweet file output path
tweetDFColumns (list): Columns for tweet dataframe. Parameters for snscrape.modules.twitter.Tweet
ts_beg (str): scrape from ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
ts_end (_type_): scrape until ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
suffix (str): suffix that shall be added to filename after the handle. Example: "-slice1" of handle "handle" will produce the file "Tweets-handle-slice1.csv"
maxTweets (int, optional): Maximum number of tweets to be scraped. Defaults to 5000.
"""
i = 0
currentTime = datetime.now()
ts_beg = slice_data['beg_time']
ts_end = slice_data['end_time']
suffix = slice_data['suffix']
tweetDataFilePath = td + f"Tweets-{handle}{suffix}.csv"
# create empty tweetlist that will be filled with tweets of current sen
@ -54,4 +63,55 @@ def scrapeTweets(handle, slice_data, keywords, td, tweetDFColumns, maxTweets = 5
# save short csv
tweet_df.to_csv(csv_path, encoding='utf-8')
# sleep 1 second to not get blocked because of excessive requests
time.sleep(0.5)
time.sleep(0.5)
def getHandles(di):
"""grabs accounts from senators-raw.csv
Args:
di (str): path to senators-raw.csv
Returns:
list: list containing str of senator account handles
"""
accounts = pd.read_csv(f"{di}senators-raw.csv")["twitter_handle"].tolist()
alt_accounts = pd.read_csv(f"{di}senators-raw.csv")["alt_handle"].tolist()
alt_accounts = [x for x in alt_accounts if str(x) != 'nan'] # remove empty alt_accounts fields
accounts.extend(alt_accounts)
return accounts
def printHandles(accounts):
"""returns string with all accounts in a readable way.
Args:
accounts (list): list of str with handles
Returns:
str: containing text that can be written to txtfile
"""
txt = ["Accounts to be scraped:\n"]
for i, acc in enumerate(accounts): # print 5 accounts per line
txt.append(f"{acc:^17}") # twitter handle max length = 15 chars
if i % 5 == 4:
txt.append(" \n")
txt.append(f"\n{i} accounts in total.")
return ''.join(txt)
def scrapeUsers(handle, userDFColumns, maxTweets=1):
currentTime = datetime.now()
userList = []
print(f'{currentTime:<30} Fetching: {handle:>15}')
query = f'from:{handle}'
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
if i > maxTweets:
break
# Get user data and append to singleUserList
userList = []
for col in userDFColumns:
singleUser = eval(f'tweet.user.{col}')
userList.append(singleUser)
# Create dataframe using userList and userDFColumns
#df = pd.DataFrame(userList, columns=userDFColumns)
return userList

View File

@ -8,6 +8,16 @@ Created on Wed Jun 21 13:58:42 2023
# create slices
def get_Tslices(ts_beg, ts_end, no_slices):
"""Splits the time-period between two points in time into #no_slices and returns start and end time of each slice period.
Args:
ts_beg (datetime): Datetime start of overall period to be sliced.
ts_end (datetime): Datetime end of overall period to be sliced.
no_slices (int): number of slices. 24 e.g. will produce 24 start and end dates each.
Returns:
list[dict[str:datetime|str]]: One dict for each containing 'beg_time' 'end_time' and 'suffix' (e.g. -slice1)
"""
from datetime import datetime
from datetime import timedelta
ts_beg = datetime.strptime(ts_beg, '%Y-%m-%dT%H:%M:%SZ')
@ -25,6 +35,16 @@ def get_Tslices(ts_beg, ts_end, no_slices):
# For log time conversions (seconds to days, hours, minutes)
def convertTime(duration):
"""Converts seconds to hours, minutes and seconds.
Args:
duration (int): seconds
Returns:
int: hours
int: minutes
int: seconds
"""
days, seconds = duration.days, duration.seconds
hours = days * 24 + seconds // 3600
minutes = (seconds % 3600) // 60

0
log/.gitkeep Normal file
View File

View File

@ -0,0 +1,7 @@
epoch,Training Loss,Valid. Loss,Valid. Accur.,Training Time,Validation Time
1,0.39025546515679493,0.40877932761593355,0.9103260869565217,0:10:21,0:00:40
2,0.3057803610952067,0.3502063500978377,0.9103260869565217,0:10:53,0:00:43
3,0.17910970049364833,0.27903796154904464,0.9375,0:10:30,0:00:38
4,0.09279396105943587,0.41342766528301267,0.904891304347826,0:11:03,0:00:43
5,0.06132459050129317,0.4468563502887264,0.9239130434782609,0:12:07,0:00:44
6,0.04195396880810895,0.4350045176675928,0.9266304347826086,0:11:21,0:00:40
1 epoch Training Loss Valid. Loss Valid. Accur. Training Time Validation Time
2 1 0.39025546515679493 0.40877932761593355 0.9103260869565217 0:10:21 0:00:40
3 2 0.3057803610952067 0.3502063500978377 0.9103260869565217 0:10:53 0:00:43
4 3 0.17910970049364833 0.27903796154904464 0.9375 0:10:30 0:00:38
5 4 0.09279396105943587 0.41342766528301267 0.904891304347826 0:11:03 0:00:43
6 5 0.06132459050129317 0.4468563502887264 0.9239130434782609 0:12:07 0:00:44
7 6 0.04195396880810895 0.4350045176675928 0.9266304347826086 0:11:21 0:00:40

View File

@ -0,0 +1,7 @@
epoch,Training Loss,Valid. Loss,Valid. Accur.,Training Time,Validation Time
1,0.6699380816093513,0.6216431430407933,0.6964285714285714,0:01:03,0:00:02
2,0.6649796058024678,0.621175297669002,0.6964285714285714,0:01:03,0:00:01
3,0.642247314964022,0.6377243144171578,0.6964285714285714,0:01:05,0:00:02
4,0.6300328698541436,0.6038827853543418,0.6964285714285714,0:01:04,0:00:02
5,0.544977219509227,0.6619421115943364,0.625,0:01:02,0:00:02
6,0.3951783587357828,0.48477122613361906,0.7857142857142857,0:01:05,0:00:01
1 epoch Training Loss Valid. Loss Valid. Accur. Training Time Validation Time
2 1 0.6699380816093513 0.6216431430407933 0.6964285714285714 0:01:03 0:00:02
3 2 0.6649796058024678 0.621175297669002 0.6964285714285714 0:01:03 0:00:01
4 3 0.642247314964022 0.6377243144171578 0.6964285714285714 0:01:05 0:00:02
5 4 0.6300328698541436 0.6038827853543418 0.6964285714285714 0:01:04 0:00:02
6 5 0.544977219509227 0.6619421115943364 0.625 0:01:02 0:00:02
7 6 0.3951783587357828 0.48477122613361906 0.7857142857142857 0:01:05 0:00:01

View File

@ -0,0 +1,7 @@
epoch,Training Loss,Valid. Loss,Valid. Accur.,Training Time,Validation Time
1,0.5610552686641376,0.4569096086310089,0.9116022099447514,0:37:20,0:00:31
2,0.43647773836513126,0.5441495520680196,0.9005524861878453,0:36:14,0:00:30
3,0.288773139899344,0.43471020716692715,0.9392265193370166,0:36:10,0:00:29
4,0.19330878817686287,0.4555162174395349,0.9281767955801105,0:36:17,0:00:30
5,0.09109889855869348,0.5060150003684702,0.9281767955801105,0:36:13,0:00:30
6,0.05734757932275739,0.6043995772428771,0.9226519337016574,0:36:11,0:00:31
1 epoch Training Loss Valid. Loss Valid. Accur. Training Time Validation Time
2 1 0.5610552686641376 0.4569096086310089 0.9116022099447514 0:37:20 0:00:31
3 2 0.43647773836513126 0.5441495520680196 0.9005524861878453 0:36:14 0:00:30
4 3 0.288773139899344 0.43471020716692715 0.9392265193370166 0:36:10 0:00:29
5 4 0.19330878817686287 0.4555162174395349 0.9281767955801105 0:36:17 0:00:30
6 5 0.09109889855869348 0.5060150003684702 0.9281767955801105 0:36:13 0:00:30
7 6 0.05734757932275739 0.6043995772428771 0.9226519337016574 0:36:11 0:00:31

View File

@ -0,0 +1,7 @@
epoch,Training Loss,Valid. Loss,Valid. Accur.,Training Time,Validation Time
1,0.21681843259712502,0.0005426188472483773,1.0,0:01:13,0:00:02
2,0.00016121647037353423,0.0002873415878639207,1.0,0:01:12,0:00:02
3,6.752021149355535e-05,0.00024319994372490328,1.0,0:01:12,0:00:02
4,4.7950222591787355e-05,0.00022139604243420763,1.0,0:01:13,0:00:02
5,3.99839740138679e-05,0.00021302999493855168,1.0,0:01:11,0:00:02
6,3.5356899656214995e-05,0.00020912183117616223,1.0,0:01:13,0:00:02
1 epoch Training Loss Valid. Loss Valid. Accur. Training Time Validation Time
2 1 0.21681843259712502 0.0005426188472483773 1.0 0:01:13 0:00:02
3 2 0.00016121647037353423 0.0002873415878639207 1.0 0:01:12 0:00:02
4 3 6.752021149355535e-05 0.00024319994372490328 1.0 0:01:12 0:00:02
5 4 4.7950222591787355e-05 0.00022139604243420763 1.0 0:01:13 0:00:02
6 5 3.99839740138679e-05 0.00021302999493855168 1.0 0:01:11 0:00:02
7 6 3.5356899656214995e-05 0.00020912183117616223 1.0 0:01:13 0:00:02

135
preTestClassification.py Normal file
View File

@ -0,0 +1,135 @@
import pandas as pd
from datetime import datetime
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from datasets import load_dataset
from transformers.pipelines.pt_utils import KeyDataset
#%%
# prepare
# install xformers (pip install xformers) for better performance
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Name of file that all senator data will be written to
senCSV = "ALL-SENATORS-TWEETS.csv"
# Name of new datafile generated
senCSVc = "Tweets-Stub.csv"
# Name of pretest files
preTestIDsFake = "pretest-tweets_fake.txt"
preTestIDsNot = "pretest-tweets_not_fake.txt"
# Name of pretest datafile
senCSVPretest = "Pretest.csv"
senCSVPretestPrep = "Pretest-Prep.csv"
senCSVPretestResult = "Pretest-Results.csv"
# don't change this one
senCSVPath = wd + ud + senCSV
senCSVcPath = wd + ud + senCSVc
senCSVcPretestPath = wd + ud + senCSVPretest
senCSVcPretestPrepPath = wd + ud + senCSVPretestPrep
senCSVcPretestResultPath = wd + ud + senCSVPretestResult
preTestIDsFakePath = wd + di + preTestIDsFake
preTestIDsNotPath = wd + di + preTestIDsNot
import sys
funs = wd+"funs"
sys.path.insert(1, funs)
import CleanTweets
# List of IDs to select
# Read the IDs from a file
preTestIDsFakeL = []
preTestIDsNotL = []
with open(preTestIDsFakePath, "r") as file:
lines = file.readlines()
for line in lines:
tid = line.strip() # Remove the newline character
preTestIDsFakeL.append(tid)
with open(preTestIDsNotPath, "r") as file:
lines = file.readlines()
for line in lines:
tid = line.strip() # Remove the newline character
preTestIDsNotL.append(tid)
# Select rows based on the IDs
df = pd.read_csv(senCSVPath, dtype=(object))
#%%
# Create pretest dataframe
dfPreTest = df[df['id'].isin(preTestIDsFakeL)].copy()
dfPreTest['fake'] = True
dfPreTest = pd.concat([dfPreTest, df[df['id'].isin(preTestIDsNotL)]], ignore_index=True)
dfPreTest['fake'] = dfPreTest['fake'].fillna(False)
#%%
# https://huggingface.co/bvrau/covid-twitter-bert-v2-struth
# HowTo:
# https://huggingface.co/docs/transformers/main/en/model_doc/bert#transformers.BertForSequenceClassification
# https://stackoverflow.com/questions/75932605/getting-the-input-text-from-transformers-pipeline
pipe = pipeline("text-classification", model="bvrau/covid-twitter-bert-v2-struth")
model = AutoModelForSequenceClassification.from_pretrained("bvrau/covid-twitter-bert-v2-struth")
tokenizer = AutoTokenizer.from_pretrained("bvrau/covid-twitter-bert-v2-struth")
# Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(CleanTweets.preprocess_text)
#%%
timeStart = datetime.now() # start counting execution time
max_length = 128
dfPreTest['input_ids'] = dfPreTest['cleanContent'].apply(lambda x: tokenizer(x, max_length=max_length, padding="max_length",)['input_ids'])
#train.rename(columns={'target': 'labels'}, inplace=True)
#train.head()
# %%
dfPreTest.to_csv(senCSVcPretestPrepPath, encoding='utf-8', columns=['id', 'cleanContent'])
#%%
dataset = load_dataset("csv", data_files=senCSVcPretestPrepPath)
# %%
results = pipe(KeyDataset(dataset, "text"))
# %%
#from tqdm.auto import tqdm
#for out in tqdm(pipe(KeyDataset(dataset['train'], "cleanContent"))):
# print(out)
#%%
output_labels = []
output_score = []
for out in pipe(KeyDataset(dataset['train'], "cleanContent"), batch_size=8, truncation="only_first"):
output_labels.append(out['label'])
output_score.append(out['score'])
# [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
# Exactly the same output as before, but the content are passed
# as batches to the model
# %%
dfPreTest['output_label'] = output_labels
dfPreTest['output_score'] = output_score
timeEnd = datetime.now()
timeTotal = timeEnd - timeStart
timePerTweet = timeTotal / 96
print(f"Total classification execution time: {timeTotal} seconds")
print(f"Time per tweet classification: {timePerTweet}")
print(f"Estimated time for full classification of tweets: {timePerTweet*50183}")
# %%
dfPreTest.to_csv(senCSVcPretestResultPath, encoding='utf-8')
# %%

55
profiler.py Normal file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 8 14:49:02 2023
@author: michael
"""
import pandas as pd
import pandas_profiling as pp
import numpy
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Name of file that all senator data will be written to
senCSV = "ALL-SENATORS-TWEETS.csv"
# Name of file that all senator data will be written to
senDataset = "senators-raw.csv"
# Name of new datafile generated
senCSVc = "SenatorsTweets-Final"
senCSVcCov = "SenatorsTweets-OnlyCov"
# don't change this one
senCSVPath = wd + ud + senCSV
senCSVcPath = wd + ud + senCSVc + ".csv"
senCSVcCovPath = wd + ud + senCSVcCov + ".csv"
senSAVcPath = wd + ud + senCSV + ".sav"
senDTAcPath = wd + ud + senCSV + ".dta"
senDatasetPath = wd + di + senDataset
# forming dataframe and printing
df = pd.read_csv(senCSVPath, dtype=(object))
# forming ProfileReport and save
# as output.html file
profileAll = pp.ProfileReport(df, minimal=True)
profileAll.to_file("data/OUT/profiles/AllTweets.html")
df = pd.read_csv(senCSVcCovPath, dtype=(object))
profileAll = pp.ProfileReport(df, minimal=True)
profileAll.to_file("data/OUT/profiles/CovTweets.html")

35
repairmystupidity.py Normal file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 14 20:47:22 2023
@author: michael
"""
import pandas as pd
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
falsch = wd + ud + "SenatorsTweets-Training_WORKING-COPY-correct.csv"
richtig = wd + ud + "SenatorsTweets-Training.csv"
correct = wd + ud + "SenatorsTweets-Training_WORKING-COPY-correct2.csv"
# Name of new datafile generated
senCSVprep = "SenatorsTweets-Training_WORKING-COPY-prepared"
# don't change this one
falsch = pd.read_csv(falsch, dtype=(object), sep=";")
richtig = pd.read_csv(richtig, dtype=(object))
df = pd.merge(falsch,richtig[['tid','rawContent', 'date']],on='tid', how='left')
df.drop(columns=['rawContent_x', 'date_x'], inplace=True)
df.rename(columns={'tid_y':'tid', 'rawContent_y':'rawContent', 'date_y':'date'}, inplace=True)
df = df[['tid','date','topicCovid','fake','rawContent','Unnamed: 6']]
df.rename(columns={'Unnamed: 6':'comment'}, inplace=True)
df.to_csv(correct, encoding='utf-8', sep=";")

613
trainFake.py Normal file
View File

@ -0,0 +1,613 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 12 12:25:18 2023
@author: michael
"""
#from datasets import load_dataset
#from transformers import Trainer
#from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
import torch
import numpy as np
from sklearn.model_selection import train_test_split # pip install scikit-learn
import pandas as pd
## Uses snippets from this guide:
# https://mccormickml.com/2019/07/22/BERT-fine-tuning/
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
import sys
funs = wd+"funs"
sys.path.insert(1, funs)
import CleanTweets
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Training CSV dataset
twtCSV = "SenatorsTweets-Training_WORKING-COPY-correct2"
twtCSVtrainCovClass = "SenatorsTweets-train-CovClassification"
twtCSVtrainFakeClass = "SenatorsTweets-train-FakeClassification"
statsTrainingTopicClass = "statsTopicClassification-"
# don't change this one
twtCSVPath = wd + ud + twtCSV + ".csv"
twtCSVtrainCovClassPath = wd + ud + twtCSVtrainCovClass + ".csv"
twtCSVtrainFakeClassPath = wd + ud + twtCSVtrainFakeClass + ".csv"
statsTrainingTopicClassPath = wd + ud + statsTrainingTopicClass
twtCSVtrainCovClassPathTrain = wd + ud + twtCSVtrainCovClass + "TRAIN.csv"
twtCSVtrainFakeClassPathTrain = wd + ud + twtCSVtrainFakeClass + "TRAIN.csv"
twtTSVtrainCovClassPathTrain = wd + ud + "cov-train.tsv"
twtTSVtrainFakeClassPathTrain = wd + ud + "fake-train.tsv"
twtTSVtrainCovClassPathEval = wd + ud + "cov-eval.tsv"
twtTSVtrainFakeClassPathEval = wd + ud + "fake-eval.tsv"
seed = 12355
# Model paths
modCovClassPath = wd + "models/CovClass/"
modFakeClassPath = wd + "models/FakeClass/"
model_name = 'digitalepidemiologylab/covid-twitter-bert-v2' # accuracy 69
#model_name = 'justinqbui/bertweet-covid19-base-uncased-pretraining-covid-vaccine-tweets' #48
#model_name = "cardiffnlp/tweet-topic-latest-multi"
model_name = "bvrau/covid-twitter-bert-v2-struth"
#model_name = "cardiffnlp/roberta-base-tweet-topic-single-all"
model_fake_name = 'bvrau/covid-twitter-bert-v2-struth'
# More models for fake detection:
# https://huggingface.co/justinqbui/bertweet-covid-vaccine-tweets-finetuned
tokenizer = AutoTokenizer.from_pretrained(model_name)
max_length = 64 # max token sentence length
#%%
# Create training and testing dataset
dfTest = pd.read_csv(twtCSVPath, dtype=(object), delimiter=";")
#dfTest = dfTest[:-900] # remove last 800 rows
#dfTest = dfTest.iloc[:,:-3] # remove last 800 rows
dfTest['text'] = dfTest['rawContent'].apply(CleanTweets.preprocess_roberta)
dfTest.drop(columns=['rawContent'], inplace=True)
# Only keep tweets that are longer than 3 words
dfTest['tweet_proc_length'] = [len(text.split(' ')) for text in dfTest['text']]
dfTest['tweet_proc_length'].value_counts()
dfTest = dfTest[dfTest['tweet_proc_length']>3]
dfTest = dfTest.drop_duplicates(subset=['text'])
dfTest = dfTest.drop(columns=['date', 'Unnamed: 0'])
# Create datasets for each classification
dfCovClass = dfTest
dfFakeClass = dfTest
dfCovClass = dfCovClass.drop(columns=['fake']) # fake column not neeeded in covid topic classification data
dfFakeClass = dfFakeClass[dfFakeClass['topicCovid']=='True'].drop(columns=['topicCovid']) # topicCovid column not neeeded in covid topic classification data
#type_map = {'Covid tweet': 'covid tweets', 'Noncovid tweet': 'noncovid tweet'}
dfCovClass.rename(index = str, columns={'topicCovid': 'labels', 'tid': 'id'}, inplace = True)
dfCovClass.labels = dfCovClass.labels.replace({"True": 'Covid', "False": 'NonCovid'})
#type_map = {'fake news tweet': 'fake news tweet', 'non-fake-news-tweet': 'non-fake-news-tweet'}
dfFakeClass.rename(index = str, columns={'fake': 'labels', 'tid': 'id'}, inplace = True)
#%%
# Tokenize tweets
dfCovClass = dfCovClass[dfCovClass['labels'].notna()]
dfFakeClass['labels'].replace({'Check': '','check': '', 'FALSE':''}, inplace=True)
dfFakeClass = dfFakeClass[dfFakeClass['labels'].notna()]
dfCovClass['input_ids'] = dfCovClass['text'].apply(lambda x: tokenizer(x, max_length=max_length, padding="max_length",)['input_ids'])
dfFakeClass['input_ids'] = dfFakeClass['text'].apply(lambda x: tokenizer(x, max_length=max_length, padding="max_length",)['input_ids'])
def encode_labels(label):
if label == 'Covid':
return 1
elif label == 'NonCovid':
return 0
elif label == 'False':
return 1
elif label == 'True':
return 0
return 0
dfCovClass['labels_encoded'] = dfCovClass['labels'].apply(encode_labels)
dfFakeClass['labels_encoded'] = dfFakeClass['labels'].apply(encode_labels)
dfFakeClass = dfFakeClass[dfFakeClass['labels']!=""]
#dfFakeClass = dfFakeClass[(dfFakeClass['labels']=="Fake") | (dfFakeClass['labels']=="True")]
# get n of classes
print("# of Non-Covid tweets (coded 0):")
print(dfCovClass.groupby('labels_encoded', group_keys=False)['id'].nunique())
# 62 non-covid tweets, disproportionate sample for training has to be 124 tweets
print("# of Fake-news tweets (coded 1):")
print(dfFakeClass.groupby('labels_encoded', group_keys=False)['id'].nunique())
# create disproportionate sample - 50/50 of both
#dfCovClass.groupby('labels_encoded', group_keys=False)['id'].nunique()
#dfCovClass = dfCovClass.groupby('labels_encoded', group_keys=False).apply(lambda x: x.sample(164, random_state=seed))
# after a lot of tests, it seems that a sample in which non-fake news tweets are overrepresented leads to better results.
# because of this, performance limitations and time constraints, group 1 (covid topic) will be overrepresented (twice as many), which still doesn't reflect the real preoportions ~10/1
'''dfCovClassa = dfCovClass.groupby('labels_encoded', group_keys=False).get_group(1).sample(frac=1, replace=True).reset_index()
dfCovClassb = dfCovClass.groupby('labels_encoded', group_keys=False).get_group(0).sample(frac=1, replace=True).reset_index()
dfCovClassab= pd.concat([dfCovClassa,dfCovClassb])
dfCovClassab.reset_index(inplace=True)
dfCovClass_train, dfCovClass_test = train_test_split(dfCovClassab, test_size=0.1, random_state=seed, stratify=dfCovClassab['labels_encoded'])
'''
# create training and validation samples
dfFakeClass_train, dfFakeClass_test = train_test_split(dfFakeClass, test_size=0.1, random_state=seed, stratify=dfFakeClass['labels_encoded'])
# reset index and drop unnecessary columns
dfFakeClass_train.reset_index(drop=True, inplace=True)
dfFakeClass_train.drop(inplace=True, columns=['tweet_proc_length'])
dfFakeClass_train.groupby('labels_encoded', group_keys=False)['id'].nunique()
dfFakeClass_test.reset_index(drop=True, inplace=True)
dfFakeClass_test.drop(inplace=True, columns=['tweet_proc_length'])
dfFakeClass_test.groupby('labels_encoded', group_keys=False)['id'].nunique()
# save dfs as csvs and tsvs, for training and validation
# covid classification datafiles
# rows 0-41 = noncovid, 42-81 covid, therfore:
#dfCovClass = dfCovClass.drop(columns=['tweet_proc_length'])
#dfCovClass.reset_index(inplace=True, drop=True)
#dfCovClass.loc[np.r_[0:31, 42:71], :].reset_index(drop=True).to_csv(twtCSVtrainCovClassPathTrain, encoding='utf-8', sep=";")
#dfCovClass.loc[np.r_[0:31, 42:72], :].reset_index(drop=True).to_csv(twtTSVtrainCovClassPathTrain, encoding='utf-8', sep="\t")
#dfCovClass.loc[np.r_[31:41, 72:81], :].reset_index(drop=True).to_csv(twtCSVtrainCovClassPath, encoding='utf-8', sep=";")
#dfCovClass.loc[np.r_[31:41, 72:81], :].reset_index(drop=True).to_csv(twtTSVtrainCovClassPathEval, encoding='utf-8', sep="\t")
# fake news classification datafiles
#dfFakeClass = dfFakeClass.drop(columns=['tweet_proc_length'])
#dfFakeClass[200:1000].reset_index(drop=True).to_csv(twtCSVtrainFakeClassPathTrain, encoding='utf-8', sep=";")
#dfFakeClass[200:1000].reset_index(drop=True).to_csv(twtTSVtrainFakeClassPathTrain, encoding='utf-8', sep="\t")
#dfFakeClass[0:199].reset_index(drop=True).to_csv(twtCSVtrainFakeClassPath, encoding='utf-8', sep=";")
#dfFakeClass[0:199].reset_index(drop=True).to_csv(twtTSVtrainFakeClassPathEval, encoding='utf-8', sep="\t")
#%%
# Prepare trainer
#from transformers import TrainingArguments
#training_args = TrainingArguments(
# report_to = 'wandb',
# output_dir=wd+'results', # output directory/
# overwrite_output_dir = True,
# num_train_epochs=6, # total number of training epochs
# per_device_train_batch_size=8, # batch size per device during training
# per_device_eval_batch_size=16, # batch size for evaluation
# learning_rate=2e-5,
# warmup_steps=1000, # number of warmup steps for learning rate scheduler
# weight_decay=0.01, # strength of weight decay
# logging_dir='./logs3', # directory for storing logs
# logging_steps=1000,
# evaluation_strategy="epoch",
# save_strategy="epoch",
# load_best_model_at_end=True
#)
tokenizer = AutoTokenizer.from_pretrained(model_name)
from transformers import BertForSequenceClassification, AdamW#, BertConfig
#from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
"""
train_dataset = load_dataset('csv', data_files={'train': twtCSVtrainCovClassPathTrain}, encoding = "utf-8")
train_dataset = train_dataset['train']
eval_dataset = load_dataset('csv', data_files={'test': twtCSVtrainCovClassPath}, encoding = "utf-8")
eval_dataset = eval_dataset['test']
"""
batch_size = 1
from torch.utils.data import Dataset
class PandasDataset(Dataset):
def __init__(self, dataframe, tokenizer, max_length):
self.dataframe = dataframe
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.dataframe)
def __getitem__(self, index):
row = self.dataframe.iloc[index]
text = row['text']
labels = row['labels_encoded']
encoded = self.tokenizer(text, max_length=self.max_length, padding="max_length", truncation=True)
input_ids = torch.tensor(encoded['input_ids'])
attention_mask = torch.tensor(encoded['attention_mask'])
return {
'input_ids': input_ids,
'attention_mask': attention_mask,
'labels': torch.tensor(labels) # Assuming labels are already encoded
}
train_dataset = PandasDataset(dfFakeClass_train, tokenizer, max_length)
train_dataloader = DataLoader(
train_dataset,
sampler=RandomSampler(train_dataset),
batch_size=batch_size
)
eval_dataset = PandasDataset(dfFakeClass_test, tokenizer, max_length)
validation_dataloader = DataLoader(
eval_dataset,
sampler=SequentialSampler(eval_dataset),
batch_size=batch_size
)
for idx, batch in enumerate(train_dataloader):
print('Batch index: ', idx)
print('Batch size: ', batch['input_ids'].size()) # Access 'input_ids' field
print('Batch label: ', batch['labels']) # Access 'labels' field
break
model = BertForSequenceClassification.from_pretrained(
model_name,
num_labels = 2, # The number of output labels--2 for binary classification.
# You can increase this for multi-class tasks.
output_attentions = False, # Whether the model returns attentions weights.
output_hidden_states = False, # Whether the model returns all hidden-states.
)
#trainer = Trainer(
# model=model, # the instantiated 🤗 Transformers model to be trained
# args=training_args, # training arguments, defined above
# train_dataset=train_dataset, # training dataset
# eval_dataset=eval_dataset # evaluation dataset
#)
# Note: AdamW is a class from the huggingface library (as opposed to pytorch)
# I believe the 'W' stands for 'Weight Decay fix"
optimizer = AdamW(model.parameters(),
lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
eps = 1e-8 # args.adam_epsilon - default is 1e-8.
)
from transformers import get_linear_schedule_with_warmup
# Number of training epochs. The BERT authors recommend between 2 and 4.
# We chose to run for 6
epochs = 6
# Total number of training steps is [number of batches] x [number of epochs].
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs
# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps = 0, # Default value in run_glue.py
num_training_steps = total_steps)
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
import time
import datetime
def format_time(elapsed):
'''
Takes a time in seconds and returns a string hh:mm:ss
'''
# Round to the nearest second.
elapsed_rounded = int(round((elapsed)))
# Format as hh:mm:ss
return str(datetime.timedelta(seconds=elapsed_rounded))
import random
# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128
# Set the seed value all over the place to make this reproducible.
seed_val = 12355
# If there's a GPU available...
if torch.cuda.is_available():
# Tell PyTorch to use the GPU.
device = torch.device("cuda")
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0))
#model.cuda()
# If not...
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
device = torch.device("cpu")
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
#%%
# Start training
# We'll store a number of quantities such as training and validation loss,
# validation accuracy, and timings.
training_stats = []
# Measure the total training time for the whole run.
total_t0 = time.time()
# For each epoch...
for epoch_i in range(0, epochs):
# ========================================
# Training
# ========================================
# Perform one full pass over the training set.
print("")
print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
print('{:>5,} steps per batch will be calculated.'.format(len(train_dataloader)))
print('Training...')
# Measure how long the training epoch takes.
t0 = time.time()
model.to(device)
# Reset the total loss for this epoch.
total_train_loss = 0
# Put the model into training mode. Don't be mislead--the call to
# `train` just changes the *mode*, it doesn't *perform* the training.
# `dropout` and `batchnorm` layers behave differently during training
# vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
model.train()
# For each batch of training data...
for step, batch in enumerate(train_dataloader):
# Progress update every 10 batches.
if step % 10 == 0 and not step == 0:
# Calculate elapsed time in minutes.
elapsed = format_time(time.time() - t0)
# Report progress.
print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
# Unpack this training batch from our dataloader.
#
# As we unpack the batch, we'll also copy each tensor to the GPU using the
# `to` method.
#
# `batch` contains three pytorch tensors:
# [0]: input ids
# [1]: attention masks
# [2]: labels
print("Batch keys:", batch.keys())
b_input_ids = batch['input_ids'].to(device)
b_input_mask = batch['attention_mask'].to(device)
b_labels = batch['labels'].to(device)
# Always clear any previously calculated gradients before performing a
# backward pass. PyTorch doesn't do this automatically because
# accumulating the gradients is "convenient while training RNNs".
# (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
model.zero_grad()
# Perform a forward pass (evaluate the model on this training batch).
# The documentation for this `model` function is here:
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
# It returns different numbers of parameters depending on what arguments
# arge given and what flags are set. For our useage here, it returns
# the loss (because we provided labels) and the "logits"--the model
# outputs prior to activation.
output = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
loss = output[0]
logits = output[1]
# Accumulate the training loss over all of the batches so that we can
# calculate the average loss at the end. `loss` is a Tensor containing a
# single value; the `.item()` function just returns the Python value
# from the tensor.
total_train_loss += loss.item()
# Perform a backward pass to calculate the gradients.
loss.backward()
# Clip the norm of the gradients to 1.0.
# This is to help prevent the "exploding gradients" problem.
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update parameters and take a step using the computed gradient.
# The optimizer dictates the "update rule"--how the parameters are
# modified based on their gradients, the learning rate, etc.
optimizer.step()
# Update the learning rate.
scheduler.step()
# Calculate the average loss over all of the batches.
avg_train_loss = total_train_loss / len(train_dataloader)
# Measure how long this epoch took.
training_time = format_time(time.time() - t0)
print("")
print(" Average training loss: {0:.2f}".format(avg_train_loss))
print(" Training epcoh took: {:}".format(training_time))
# ========================================
# Validation
# ========================================
# After the completion of each training epoch, measure our performance on
# our validation set.
print("")
print("Running Validation...")
t0 = time.time()
# Put the model in evaluation mode--the dropout layers behave differently
# during evaluation.
model.eval()
# Tracking variables
total_eval_accuracy = 0
total_eval_loss = 0
nb_eval_steps = 0
# Evaluate data for one epoch
for batch in validation_dataloader:
# Unpack this training batch from our dataloader.
#
# As we unpack the batch, we'll also copy each tensor to the GPU using
# the `to` method.
#
# `batch` contains three pytorch tensors:
# [0]: input ids
# [1]: attention masks
# [2]: labels
b_input_ids = batch['input_ids'].to(device)
b_input_mask = batch['attention_mask'].to(device)
b_labels = batch['labels'].to(device)
# Tell pytorch not to bother with constructing the compute graph during
# the forward pass, since this is only needed for backprop (training).
with torch.no_grad():
# Forward pass, calculate logit predictions.
# token_type_ids is the same as the "segment ids", which
# differentiates sentence 1 and 2 in 2-sentence tasks.
# The documentation for this `model` function is here:
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
# Get the "logits" output by the model. The "logits" are the output
# values prior to applying an activation function like the softmax.
output = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
loss = output[0]
logits = output[1]
# Accumulate the validation loss.
total_eval_loss += loss.item()
# Move logits and labels to CPU
logits = logits.detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
# Calculate the accuracy for this batch of test sentences, and
# accumulate it over all batches.
total_eval_accuracy += flat_accuracy(logits, label_ids)
# Report the final accuracy for this validation run.
avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
print(" Accuracy: {0:.2f}".format(avg_val_accuracy))
# Calculate the average loss over all of the batches.
avg_val_loss = total_eval_loss / len(validation_dataloader)
# Measure how long the validation run took.
validation_time = format_time(time.time() - t0)
print(" Validation Loss: {0:.2f}".format(avg_val_loss))
print(" Validation took: {:}".format(validation_time))
# Record all statistics from this epoch.
training_stats.append(
{
'epoch': epoch_i + 1,
'Training Loss': avg_train_loss,
'Valid. Loss': avg_val_loss,
'Valid. Accur.': avg_val_accuracy,
'Training Time': training_time,
'Validation Time': validation_time
}
)
print("")
print("Training complete!")
print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))
params = list(model.named_parameters())
print('The BERT model has {:} different named parameters.\n'.format(len(params)))
print('==== Embedding Layer ====\n')
for p in params[0:5]:
print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
print('\n==== First Transformer ====\n')
for p in params[5:21]:
print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
print('\n==== Output Layer ====\n')
for p in params[-4:]:
print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
import os
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
from datetime import datetime as dt
fTimeFormat = "%Y-%m-%d_%H-%M-%S"
now = dt.now().strftime(fTimeFormat)
output_dir = modFakeClassPath + now + "/"
# Create output directory if needed
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print("Saving model to %s" % output_dir)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
# Good practice: save your training arguments together with the trained model
# torch.save(args, os.path.join(output_dir, 'training_args.bin'))
import pandas as pd
# Display floats with two decimal places.
pd.set_option('display.precision', 2)
# Create a DataFrame from our training statistics.
df_stats = pd.DataFrame(data=training_stats)
# Use the 'epoch' as the row index.# Good practice: save your training arguments together with the trained model
df_stats = df_stats.set_index('epoch')
# A hack to force the column headers to wrap.
#df = df.style.set_table_styles([dict(selector="th",props=[('max-width', '70px')])])
# Display the table.
df_stats
df_stats.to_csv(output_dir + now + ".csv")

607
trainTopic.py Normal file
View File

@ -0,0 +1,607 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 12 12:25:18 2023
@author: michael
"""
#from datasets import load_dataset
#from transformers import Trainer
#from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
import torch
import numpy as np
from sklearn.model_selection import train_test_split # pip install scikit-learn
import pandas as pd
## Uses snippets from this guide:
# https://mccormickml.com/2019/07/22/BERT-fine-tuning/
###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
import sys
funs = wd+"funs"
sys.path.insert(1, funs)
import CleanTweets
# datafile input directory
di = "data/IN/"
# Tweet-datafile output directory
ud = "data/OUT/"
# Training CSV dataset
twtCSV = "SenatorsTweets-Training_WORKING-COPY-correct2"
twtCSVtrainCovClass = "SenatorsTweets-train-CovClassification"
twtCSVtrainFakeClass = "SenatorsTweets-train-FakeClassification"
statsTrainingTopicClass = "statsTopicClassification-"
# don't change this one
twtCSVPath = wd + ud + twtCSV + ".csv"
twtCSVtrainCovClassPath = wd + ud + twtCSVtrainCovClass + ".csv"
twtCSVtrainFakeClassPath = wd + ud + twtCSVtrainFakeClass + ".csv"
statsTrainingTopicClassPath = wd + ud + statsTrainingTopicClass
twtCSVtrainCovClassPathTrain = wd + ud + twtCSVtrainCovClass + "TRAIN.csv"
twtCSVtrainFakeClassPathTrain = wd + ud + twtCSVtrainFakeClass + "TRAIN.csv"
twtTSVtrainCovClassPathTrain = wd + ud + "cov-train.tsv"
twtTSVtrainFakeClassPathTrain = wd + ud + "fake-train.tsv"
twtTSVtrainCovClassPathEval = wd + ud + "cov-eval.tsv"
twtTSVtrainFakeClassPathEval = wd + ud + "fake-eval.tsv"
seed = 12355
# Model paths
modCovClassPath = wd + "models/CovClass/"
modFakeClassPath = wd + "models/FakeClass/"
model_name = "bvrau/covid-twitter-bert-v2-struth"
model_fake_name = 'bvrau/covid-twitter-bert-v2-struth'
# More models for fake detection:
# https://huggingface.co/justinqbui/bertweet-covid-vaccine-tweets-finetuned
tokenizer = AutoTokenizer.from_pretrained(model_name)
max_length = 64 # max token sentence length
#%%
# Create training and testing dataset
dfTest = pd.read_csv(twtCSVPath, dtype=(object), delimiter=";")
#dfTest = dfTest[:-900] # remove last 800 rows
#dfTest = dfTest.iloc[:,:-3] # remove last 800 rows
dfTest['text'] = dfTest['rawContent'].apply(CleanTweets.preprocess_roberta)
dfTest.drop(columns=['rawContent'], inplace=True)
# Only keep tweets that are longer than 3 words
dfTest['tweet_proc_length'] = [len(text.split(' ')) for text in dfTest['text']]
dfTest['tweet_proc_length'].value_counts()
dfTest = dfTest[dfTest['tweet_proc_length']>3]
dfTest = dfTest.drop_duplicates(subset=['text'])
dfTest = dfTest.drop(columns=['date', 'Unnamed: 0'])
# Create datasets for each classification
dfCovClass = dfTest
dfFakeClass = dfTest
dfCovClass = dfCovClass.drop(columns=['fake']) # fake column not neeeded in covid topic classification data
dfFakeClass = dfFakeClass[dfFakeClass['topicCovid']=='True'].drop(columns=['topicCovid']) # topicCovid column not neeeded in covid topic classification data
#type_map = {'Covid tweet': 'covid tweets', 'Noncovid tweet': 'noncovid tweet'}
dfCovClass.rename(index = str, columns={'topicCovid': 'labels', 'tid': 'id'}, inplace = True)
dfCovClass.labels = dfCovClass.labels.replace({"True": 'Covid', "False": 'NonCovid'})
#type_map = {'fake news tweet': 'fake news tweet', 'non-fake-news-tweet': 'non-fake-news-tweet'}
dfFakeClass.rename(index = str, columns={'fake': 'labels', 'tid': 'id'}, inplace = True)
dfFakeClass.labels = dfFakeClass.labels.replace({"True": 'Fake', "False": 'True'})
#%%
# Tokenize tweets
dfCovClass = dfCovClass[dfCovClass['labels'].notna()]
dfFakeClass = dfFakeClass[dfFakeClass['labels'].notna()]
dfCovClass['input_ids'] = dfCovClass['text'].apply(lambda x: tokenizer(x, max_length=max_length, padding="max_length",)['input_ids'])
dfFakeClass['input_ids'] = dfFakeClass['text'].apply(lambda x: tokenizer(x, max_length=max_length, padding="max_length",)['input_ids'])
def encode_labels(label):
if label == 'Covid':
return 1
elif label == 'NonCovid':
return 0
elif label == 'Fake':
return 1
elif label == 'True':
return 0
return 0
dfCovClass['labels_encoded'] = dfCovClass['labels'].apply(encode_labels)
dfFakeClass['labels_encoded'] = dfFakeClass['labels'].apply(encode_labels)
# get n of classes
print("# of Non-Covid tweets (coded 0):")
print(dfCovClass.groupby('labels_encoded', group_keys=False)['id'].nunique())
# 62 non-covid tweets, disproportionate sample for training has to be 124 tweets
print("# of Fake-news tweets (coded 1):")
print(dfFakeClass.groupby('labels_encoded', group_keys=False)['id'].nunique())
# create disproportionate sample - 50/50 of both
#dfCovClass.groupby('labels_encoded', group_keys=False)['id'].nunique()
#dfCovClass = dfCovClass.groupby('labels_encoded', group_keys=False).apply(lambda x: x.sample(164, random_state=seed))
# after a lot of tests, it seems that a sample in which non-fake news tweets are overrepresented leads to better results.
# because of this, performance limitations and time constraints, group 1 (covid topic) will be overrepresented (twice as many), which still doesn't reflect the real preoportions ~10/1
'''dfCovClassa = dfCovClass.groupby('labels_encoded', group_keys=False).get_group(1).sample(frac=1, replace=True).reset_index()
dfCovClassb = dfCovClass.groupby('labels_encoded', group_keys=False).get_group(0).sample(frac=1, replace=True).reset_index()
dfCovClassab= pd.concat([dfCovClassa,dfCovClassb])
dfCovClassab.reset_index(inplace=True)
dfCovClass_train, dfCovClass_test = train_test_split(dfCovClassab, test_size=0.1, random_state=seed, stratify=dfCovClassab['labels_encoded'])
'''
# create training and validation samples
dfCovClass_train, dfCovClass_test = train_test_split(dfCovClass, test_size=0.1, random_state=seed, stratify=dfCovClass['labels_encoded'])
# reset index and drop unnecessary columns
dfCovClass_train.reset_index(drop=True, inplace=True)
dfCovClass_train.drop(inplace=True, columns=['tweet_proc_length'])
dfCovClass_train.groupby('labels_encoded', group_keys=False)['id'].nunique()
dfCovClass_test.reset_index(drop=True, inplace=True)
dfCovClass_test.drop(inplace=True, columns=['tweet_proc_length'])
dfCovClass_test.groupby('labels_encoded', group_keys=False)['id'].nunique()
# save dfs as csvs and tsvs, for training and validation
# covid classification datafiles
# rows 0-41 = noncovid, 42-81 covid, therfore:
#dfCovClass = dfCovClass.drop(columns=['tweet_proc_length'])
#dfCovClass.reset_index(inplace=True, drop=True)
#dfCovClass.loc[np.r_[0:31, 42:71], :].reset_index(drop=True).to_csv(twtCSVtrainCovClassPathTrain, encoding='utf-8', sep=";")
#dfCovClass.loc[np.r_[0:31, 42:72], :].reset_index(drop=True).to_csv(twtTSVtrainCovClassPathTrain, encoding='utf-8', sep="\t")
#dfCovClass.loc[np.r_[31:41, 72:81], :].reset_index(drop=True).to_csv(twtCSVtrainCovClassPath, encoding='utf-8', sep=";")
#dfCovClass.loc[np.r_[31:41, 72:81], :].reset_index(drop=True).to_csv(twtTSVtrainCovClassPathEval, encoding='utf-8', sep="\t")
# fake news classification datafiles
#dfFakeClass = dfFakeClass.drop(columns=['tweet_proc_length'])
#dfFakeClass[200:1000].reset_index(drop=True).to_csv(twtCSVtrainFakeClassPathTrain, encoding='utf-8', sep=";")
#dfFakeClass[200:1000].reset_index(drop=True).to_csv(twtTSVtrainFakeClassPathTrain, encoding='utf-8', sep="\t")
#dfFakeClass[0:199].reset_index(drop=True).to_csv(twtCSVtrainFakeClassPath, encoding='utf-8', sep=";")
#dfFakeClass[0:199].reset_index(drop=True).to_csv(twtTSVtrainFakeClassPathEval, encoding='utf-8', sep="\t")
#%%
# Prepare trainer
#from transformers import TrainingArguments
#training_args = TrainingArguments(
# report_to = 'wandb',
# output_dir=wd+'results', # output directory/
# overwrite_output_dir = True,
# num_train_epochs=6, # total number of training epochs
# per_device_train_batch_size=8, # batch size per device during training
# per_device_eval_batch_size=16, # batch size for evaluation
# learning_rate=2e-5,
# warmup_steps=1000, # number of warmup steps for learning rate scheduler
# weight_decay=0.01, # strength of weight decay
# logging_dir='./logs3', # directory for storing logs
# logging_steps=1000,
# evaluation_strategy="epoch",
# save_strategy="epoch",
# load_best_model_at_end=True
#)
tokenizer = AutoTokenizer.from_pretrained(model_name)
from transformers import BertForSequenceClassification, AdamW#, BertConfig
#from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
"""
train_dataset = load_dataset('csv', data_files={'train': twtCSVtrainCovClassPathTrain}, encoding = "utf-8")
train_dataset = train_dataset['train']
eval_dataset = load_dataset('csv', data_files={'test': twtCSVtrainCovClassPath}, encoding = "utf-8")
eval_dataset = eval_dataset['test']
"""
batch_size = 1
from torch.utils.data import Dataset
class PandasDataset(Dataset):
def __init__(self, dataframe, tokenizer, max_length):
self.dataframe = dataframe
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.dataframe)
def __getitem__(self, index):
row = self.dataframe.iloc[index]
text = row['text']
labels = row['labels_encoded']
encoded = self.tokenizer(text, max_length=self.max_length, padding="max_length", truncation=True)
input_ids = torch.tensor(encoded['input_ids'])
attention_mask = torch.tensor(encoded['attention_mask'])
return {
'input_ids': input_ids,
'attention_mask': attention_mask,
'labels': torch.tensor(labels) # Assuming labels are already encoded
}
train_dataset = PandasDataset(dfCovClass_train, tokenizer, max_length)
train_dataloader = DataLoader(
train_dataset,
sampler=RandomSampler(train_dataset),
batch_size=batch_size
)
eval_dataset = PandasDataset(dfCovClass_test, tokenizer, max_length)
validation_dataloader = DataLoader(
eval_dataset,
sampler=SequentialSampler(eval_dataset),
batch_size=batch_size
)
for idx, batch in enumerate(train_dataloader):
print('Batch index: ', idx)
print('Batch size: ', batch['input_ids'].size()) # Access 'input_ids' field
print('Batch label: ', batch['labels']) # Access 'labels' field
break
model = BertForSequenceClassification.from_pretrained(
model_name,
num_labels = 2, # The number of output labels--2 for binary classification.
# You can increase this for multi-class tasks.
output_attentions = False, # Whether the model returns attentions weights.
output_hidden_states = False, # Whether the model returns all hidden-states.
)
#trainer = Trainer(
# model=model, # the instantiated 🤗 Transformers model to be trained
# args=training_args, # training arguments, defined above
# train_dataset=train_dataset, # training dataset
# eval_dataset=eval_dataset # evaluation dataset
#)
# Note: AdamW is a class from the huggingface library (as opposed to pytorch)
# I believe the 'W' stands for 'Weight Decay fix"
optimizer = AdamW(model.parameters(),
lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
eps = 1e-8 # args.adam_epsilon - default is 1e-8.
)
from transformers import get_linear_schedule_with_warmup
# Number of training epochs. The BERT authors recommend between 2 and 4.
# We chose to run for 6
epochs = 6
# Total number of training steps is [number of batches] x [number of epochs].
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs
# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps = 0, # Default value in run_glue.py
num_training_steps = total_steps)
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
import time
import datetime
def format_time(elapsed):
'''
Takes a time in seconds and returns a string hh:mm:ss
'''
# Round to the nearest second.
elapsed_rounded = int(round((elapsed)))
# Format as hh:mm:ss
return str(datetime.timedelta(seconds=elapsed_rounded))
import random
# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128
# Set the seed value all over the place to make this reproducible.
seed_val = 12355
# If there's a GPU available...
if torch.cuda.is_available():
# Tell PyTorch to use the GPU.
device = torch.device("cuda")
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0))
#model.cuda()
# If not...
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
device = torch.device("cpu")
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
#%%
# Start training
# We'll store a number of quantities such as training and validation loss,
# validation accuracy, and timings.
training_stats = []
# Measure the total training time for the whole run.
total_t0 = time.time()
# For each epoch...
for epoch_i in range(0, epochs):
# ========================================
# Training
# ========================================
# Perform one full pass over the training set.
print("")
print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
print('{:>5,} steps per batch will be calculated.'.format(len(train_dataloader)))
print('Training...')
# Measure how long the training epoch takes.
t0 = time.time()
model.to(device)
# Reset the total loss for this epoch.
total_train_loss = 0
# Put the model into training mode. Don't be mislead--the call to
# `train` just changes the *mode*, it doesn't *perform* the training.
# `dropout` and `batchnorm` layers behave differently during training
# vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
model.train()
# For each batch of training data...
for step, batch in enumerate(train_dataloader):
# Progress update every 10 batches.
if step % 10 == 0 and not step == 0:
# Calculate elapsed time in minutes.
elapsed = format_time(time.time() - t0)
# Report progress.
print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
# Unpack this training batch from our dataloader.
#
# As we unpack the batch, we'll also copy each tensor to the GPU using the
# `to` method.
#
# `batch` contains three pytorch tensors:
# [0]: input ids
# [1]: attention masks
# [2]: labels
print("Batch keys:", batch.keys())
b_input_ids = batch['input_ids'].to(device)
b_input_mask = batch['attention_mask'].to(device)
b_labels = batch['labels'].to(device)
# Always clear any previously calculated gradients before performing a
# backward pass. PyTorch doesn't do this automatically because
# accumulating the gradients is "convenient while training RNNs".
# (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
model.zero_grad()
# Perform a forward pass (evaluate the model on this training batch).
# The documentation for this `model` function is here:
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
# It returns different numbers of parameters depending on what arguments
# arge given and what flags are set. For our useage here, it returns
# the loss (because we provided labels) and the "logits"--the model
# outputs prior to activation.
output = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
loss = output[0]
logits = output[1]
# Accumulate the training loss over all of the batches so that we can
# calculate the average loss at the end. `loss` is a Tensor containing a
# single value; the `.item()` function just returns the Python value
# from the tensor.
total_train_loss += loss.item()
# Perform a backward pass to calculate the gradients.
loss.backward()
# Clip the norm of the gradients to 1.0.
# This is to help prevent the "exploding gradients" problem.
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update parameters and take a step using the computed gradient.
# The optimizer dictates the "update rule"--how the parameters are
# modified based on their gradients, the learning rate, etc.
optimizer.step()
# Update the learning rate.
scheduler.step()
# Calculate the average loss over all of the batches.
avg_train_loss = total_train_loss / len(train_dataloader)
# Measure how long this epoch took.
training_time = format_time(time.time() - t0)
print("")
print(" Average training loss: {0:.2f}".format(avg_train_loss))
print(" Training epcoh took: {:}".format(training_time))
# ========================================
# Validation
# ========================================
# After the completion of each training epoch, measure our performance on
# our validation set.
print("")
print("Running Validation...")
t0 = time.time()
# Put the model in evaluation mode--the dropout layers behave differently
# during evaluation.
model.eval()
# Tracking variables
total_eval_accuracy = 0
total_eval_loss = 0
nb_eval_steps = 0
# Evaluate data for one epoch
for batch in validation_dataloader:
# Unpack this training batch from our dataloader.
#
# As we unpack the batch, we'll also copy each tensor to the GPU using
# the `to` method.
#
# `batch` contains three pytorch tensors:
# [0]: input ids
# [1]: attention masks
# [2]: labels
b_input_ids = batch['input_ids'].to(device)
b_input_mask = batch['attention_mask'].to(device)
b_labels = batch['labels'].to(device)
# Tell pytorch not to bother with constructing the compute graph during
# the forward pass, since this is only needed for backprop (training).
with torch.no_grad():
# Forward pass, calculate logit predictions.
# token_type_ids is the same as the "segment ids", which
# differentiates sentence 1 and 2 in 2-sentence tasks.
# The documentation for this `model` function is here:
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
# Get the "logits" output by the model. The "logits" are the output
# values prior to applying an activation function like the softmax.
output = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
loss = output[0]
logits = output[1]
# Accumulate the validation loss.
total_eval_loss += loss.item()
# Move logits and labels to CPU
logits = logits.detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
# Calculate the accuracy for this batch of test sentences, and
# accumulate it over all batches.
total_eval_accuracy += flat_accuracy(logits, label_ids)
# Report the final accuracy for this validation run.
avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
print(" Accuracy: {0:.2f}".format(avg_val_accuracy))
# Calculate the average loss over all of the batches.
avg_val_loss = total_eval_loss / len(validation_dataloader)
# Measure how long the validation run took.
validation_time = format_time(time.time() - t0)
print(" Validation Loss: {0:.2f}".format(avg_val_loss))
print(" Validation took: {:}".format(validation_time))
# Record all statistics from this epoch.
training_stats.append(
{
'epoch': epoch_i + 1,
'Training Loss': avg_train_loss,
'Valid. Loss': avg_val_loss,
'Valid. Accur.': avg_val_accuracy,
'Training Time': training_time,
'Validation Time': validation_time
}
)
print("")
print("Training complete!")
print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))
params = list(model.named_parameters())
print('The BERT model has {:} different named parameters.\n'.format(len(params)))
print('==== Embedding Layer ====\n')
for p in params[0:5]:
print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
print('\n==== First Transformer ====\n')
for p in params[5:21]:
print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
print('\n==== Output Layer ====\n')
for p in params[-4:]:
print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
import os
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
from datetime import datetime as dt
fTimeFormat = "%Y-%m-%d_%H-%M-%S"
now = dt.now().strftime(fTimeFormat)
output_dir = modCovClassPath + now + "/"
# Create output directory if needed
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print("Saving model to %s" % output_dir)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
# Good practice: save your training arguments together with the trained model
# torch.save(args, os.path.join(output_dir, 'training_args.bin'))
import pandas as pd
# Display floats with two decimal places.
pd.set_option('display.precision', 2)
# Create a DataFrame from our training statistics.
df_stats = pd.DataFrame(data=training_stats)
# Use the 'epoch' as the row index.# Good practice: save your training arguments together with the trained model
df_stats = df_stats.set_index('epoch')
# A hack to force the column headers to wrap.
#df = df.style.set_table_styles([dict(selector="th",props=[('max-width', '70px')])])
# Display the table.
df_stats
df_stats.to_csv(output_dir + now + ".csv")