adds docstrings to functions. adds several comments.

This commit is contained in:
Michael Beck
2023-06-23 20:26:16 +02:00
parent e8ba02ca0f
commit dc2e17cc2f
3 changed files with 100 additions and 44 deletions

View File

@@ -3,13 +3,22 @@ import time
import pandas as pd
import snscrape.modules.twitter as sntwitter
def scrapeTweets(handle, slice_data, keywords, td, tweetDFColumns, maxTweets = 5000):
def scrapeTweets(handle, keywords, td, tweetDFColumns, ts_beg, ts_end, suffix, maxTweets = 5000):
"""Scrapes tweets from a specific account in a specific time span using snscrape.modules.twitter.
Args:
handle (str): twitter handle of account to be scraped
keywords (list): list of strings containing the keywords that the tweets shall be searched for
td (str): tweet file output path
tweetDFColumns (list): Columns for tweet dataframe. Parameters for snscrape.modules.twitter.Tweet
ts_beg (str): scrape from ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
ts_end (_type_): scrape until ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
suffix (str): suffix that shall be added to filename after the handle. Example: "-slice1" of handle "handle" will produce the file "Tweets-handle-slice1.csv"
maxTweets (int, optional): Maximum number of tweets to be scraped. Defaults to 5000.
"""
i = 0
currentTime = datetime.now()
ts_beg = slice_data['beg_time']
ts_end = slice_data['end_time']
suffix = slice_data['suffix']
tweetDataFilePath = td + f"Tweets-{handle}{suffix}.csv"
# create empty tweetlist that will be filled with tweets of current sen