46 lines
1.1 KiB
Python
46 lines
1.1 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
'''
|
|
Created on Wed Jun 21 13:58:42 2023
|
|
|
|
@author: michael
|
|
'''
|
|
|
|
## Setup directories
|
|
# WD Michael
|
|
wd = '/home/michael/Documents/PS/Data/collectTweets/'
|
|
# WD Server
|
|
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
|
|
|
|
# Tweet-datafile output directory
|
|
td = 'data/tweets/'
|
|
|
|
# Name of file that all tweets will be written to
|
|
file_alltweets = 'ALL-SENATORS-TWEETS.csv'
|
|
|
|
path_to_tweetdfs = wd + td
|
|
|
|
## Define Timespan
|
|
# Format: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
|
|
ts_beg = '2020-01-01T00:00:00Z' # start of scraping
|
|
ts_end = '2023-01-03T00:00:00Z' # end of straping
|
|
no_slices = 24 # Number of slices / time periods.
|
|
|
|
# Maximum tweets to be scraped by snscrape. Can be left untouched.
|
|
maxTweets = 5000
|
|
|
|
# Name of logfile
|
|
logfile = 'log/log_'
|
|
|
|
|
|
## Install snscrape from local git repo to make shure that it fits the used version.
|
|
# If snscrape is already installed, uncomment the following lines:
|
|
'''
|
|
import subprocess
|
|
os.chdir('snscrape/')
|
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.'])
|
|
os.chdir(wd)
|
|
'''
|
|
|
|
|