adds docstrings to functions. adds several comments.

This commit is contained in:
Michael Beck
2023-06-23 20:26:16 +02:00
parent e8ba02ca0f
commit dc2e17cc2f
3 changed files with 100 additions and 44 deletions

View File

@ -3,13 +3,22 @@ import time
import pandas as pd
import snscrape.modules.twitter as sntwitter
def scrapeTweets(handle, slice_data, keywords, td, tweetDFColumns, maxTweets = 5000):
def scrapeTweets(handle, keywords, td, tweetDFColumns, ts_beg, ts_end, suffix, maxTweets = 5000):
"""Scrapes tweets from a specific account in a specific time span using snscrape.modules.twitter.
Args:
handle (str): twitter handle of account to be scraped
keywords (list): list of strings containing the keywords that the tweets shall be searched for
td (str): tweet file output path
tweetDFColumns (list): Columns for tweet dataframe. Parameters for snscrape.modules.twitter.Tweet
ts_beg (str): scrape from ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
ts_end (_type_): scrape until ... YYYY-MM-DDTHH:MM:SSZ from datetime: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
suffix (str): suffix that shall be added to filename after the handle. Example: "-slice1" of handle "handle" will produce the file "Tweets-handle-slice1.csv"
maxTweets (int, optional): Maximum number of tweets to be scraped. Defaults to 5000.
"""
i = 0
currentTime = datetime.now()
ts_beg = slice_data['beg_time']
ts_end = slice_data['end_time']
suffix = slice_data['suffix']
tweetDataFilePath = td + f"Tweets-{handle}{suffix}.csv"
# create empty tweetlist that will be filled with tweets of current sen

View File

@ -8,6 +8,16 @@ Created on Wed Jun 21 13:58:42 2023
# create slices
def get_Tslices(ts_beg, ts_end, no_slices):
"""Splits the time-period between two points in time into #no_slices and returns start and end time of each slice period.
Args:
ts_beg (datetime): Datetime start of overall period to be sliced.
ts_end (datetime): Datetime end of overall period to be sliced.
no_slices (int): number of slices. 24 e.g. will produce 24 start and end dates each.
Returns:
list[dict[str:datetime|str]]: One dict for each containing 'beg_time' 'end_time' and 'suffix' (e.g. -slice1)
"""
from datetime import datetime
from datetime import timedelta
ts_beg = datetime.strptime(ts_beg, '%Y-%m-%dT%H:%M:%SZ')
@ -25,6 +35,16 @@ def get_Tslices(ts_beg, ts_end, no_slices):
# For log time conversions (seconds to days, hours, minutes)
def convertTime(duration):
"""Converts seconds to hours, minutes and seconds.
Args:
duration (int): seconds
Returns:
int: hours
int: minutes
int: seconds
"""
days, seconds = duration.days, duration.seconds
hours = days * 24 + seconds // 3600
minutes = (seconds % 3600) // 60