adds docstrings to functions. adds several comments.
This commit is contained in:
@ -3,13 +3,22 @@ import time
|
||||
import pandas as pd
|
||||
import snscrape.modules.twitter as sntwitter
|
||||
|
||||
def scrapeTweets(handle, slice_data, keywords, td, tweetDFColumns, maxTweets = 5000):
|
||||
def scrapeTweets(handle, keywords, td, tweetDFColumns, ts_beg, ts_end, suffix, maxTweets = 5000):
|
||||
"""Scrapes tweets from a specific account in a specific time span using snscrape.modules.twitter.
|
||||
|
||||
Args:
|
||||
handle (str): twitter handle of account to be scraped
|
||||
keywords (list): list of strings containing the keywords that the tweets shall be searched for
|
||||
td (str): tweet file output path
|
||||
tweetDFColumns (list): Columns for tweet dataframe. Parameters for snscrape.modules.twitter.Tweet
|
||||
ts_beg (str): scrape from ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
|
||||
ts_end (_type_): scrape until ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
|
||||
suffix (str): suffix that shall be added to filename after the handle. Example: "-slice1" of handle "handle" will produce the file "Tweets-handle-slice1.csv"
|
||||
maxTweets (int, optional): Maximum number of tweets to be scraped. Defaults to 5000.
|
||||
"""
|
||||
i = 0
|
||||
|
||||
currentTime = datetime.now()
|
||||
ts_beg = slice_data['beg_time']
|
||||
ts_end = slice_data['end_time']
|
||||
suffix = slice_data['suffix']
|
||||
tweetDataFilePath = td + f"Tweets-{handle}{suffix}.csv"
|
||||
|
||||
# create empty tweetlist that will be filled with tweets of current sen
|
||||
|
@ -8,6 +8,16 @@ Created on Wed Jun 21 13:58:42 2023
|
||||
|
||||
# create slices
|
||||
def get_Tslices(ts_beg, ts_end, no_slices):
|
||||
"""Splits the time-period between two points in time into #no_slices and returns start and end time of each slice period.
|
||||
|
||||
Args:
|
||||
ts_beg (datetime): Datetime start of overall period to be sliced.
|
||||
ts_end (datetime): Datetime end of overall period to be sliced.
|
||||
no_slices (int): number of slices. 24 e.g. will produce 24 start and end dates each.
|
||||
|
||||
Returns:
|
||||
list[dict[str:datetime|str]]: One dict for each containing 'beg_time' 'end_time' and 'suffix' (e.g. -slice1)
|
||||
"""
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
ts_beg = datetime.strptime(ts_beg, '%Y-%m-%dT%H:%M:%SZ')
|
||||
@ -25,6 +35,16 @@ def get_Tslices(ts_beg, ts_end, no_slices):
|
||||
|
||||
# For log time conversions (seconds to days, hours, minutes)
|
||||
def convertTime(duration):
|
||||
"""Converts seconds to hours, minutes and seconds.
|
||||
|
||||
Args:
|
||||
duration (int): seconds
|
||||
|
||||
Returns:
|
||||
int: hours
|
||||
int: minutes
|
||||
int: seconds
|
||||
"""
|
||||
days, seconds = duration.days, duration.seconds
|
||||
hours = days * 24 + seconds // 3600
|
||||
minutes = (seconds % 3600) // 60
|
||||
|
Reference in New Issue
Block a user