adds senator data scraper
This commit is contained in:
@ -63,4 +63,55 @@ def scrapeTweets(handle, keywords, td, tweetDFColumns, ts_beg, ts_end, suffix,
|
||||
# save short csv
|
||||
tweet_df.to_csv(csv_path, encoding='utf-8')
|
||||
# sleep 1 second to not get blocked because of excessive requests
|
||||
time.sleep(0.5)
|
||||
time.sleep(0.5)
|
||||
|
||||
def getHandles(di):
|
||||
"""grabs accounts from senators-raw.csv
|
||||
|
||||
Args:
|
||||
di (str): path to senators-raw.csv
|
||||
|
||||
Returns:
|
||||
list: list containing str of senator account handles
|
||||
"""
|
||||
accounts = pd.read_csv(f"{di}senators-raw.csv")["twitter_handle"].tolist()
|
||||
alt_accounts = pd.read_csv(f"{di}senators-raw.csv")["alt_handle"].tolist()
|
||||
alt_accounts = [x for x in alt_accounts if str(x) != 'nan'] # remove empty alt_accounts fields
|
||||
accounts.extend(alt_accounts)
|
||||
return accounts
|
||||
|
||||
def printHandles(accounts):
|
||||
"""returns string with all accounts in a readable way.
|
||||
|
||||
Args:
|
||||
accounts (list): list of str with handles
|
||||
|
||||
Returns:
|
||||
str: containing text that can be written to txtfile
|
||||
"""
|
||||
txt = ["Accounts to be scraped:\n"]
|
||||
for i, acc in enumerate(accounts): # print 5 accounts per line
|
||||
txt.append(f"{acc:^17}") # twitter handle max length = 15 chars
|
||||
if i % 5 == 4:
|
||||
txt.append(" \n")
|
||||
txt.append(f"\n{i} accounts in total.")
|
||||
return ''.join(txt)
|
||||
|
||||
def scrapeUsers(handle, userDFColumns, maxTweets=1):
|
||||
currentTime = datetime.now()
|
||||
userList = []
|
||||
print(f'{currentTime:<30} Fetching: {handle:>15}')
|
||||
query = f'from:{handle}'
|
||||
|
||||
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
|
||||
if i > maxTweets:
|
||||
break
|
||||
# Get user data and append to singleUserList
|
||||
userList = []
|
||||
for col in userDFColumns:
|
||||
singleUser = eval(f'tweet.user.{col}')
|
||||
userList.append(singleUser)
|
||||
|
||||
# Create dataframe using userList and userDFColumns
|
||||
#df = pd.DataFrame(userList, columns=userDFColumns)
|
||||
return userList
|
Reference in New Issue
Block a user