CollectUSSenatorTweets/profiler.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug  8 14:49:02 2023

@author: michael
"""

import pandas as pd
import pandas_profiling as pp
import numpy

###################
# Setup directories
# WD Michael
wd = "/home/michael/Documents/PS/Data/collectTweets/"
# WD Server
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'

# datafile input directory
di = "data/IN/"

# Tweet-datafile output directory
ud = "data/OUT/"

# Name of file that all senator data will be written to
senCSV = "ALL-SENATORS-TWEETS.csv"

# Name of file that all senator data will be written to
senDataset = "senators-raw.csv"

# Name of new datafile generated
senCSVc = "SenatorsTweets-Final"
senCSVcCov = "SenatorsTweets-OnlyCov"

# don't change this one
senCSVPath = wd + ud + senCSV
senCSVcPath = wd + ud + senCSVc + ".csv"
senCSVcCovPath = wd + ud + senCSVcCov + ".csv"
senSAVcPath = wd + ud + senCSV + ".sav"
senDTAcPath = wd + ud + senCSV + ".dta"
senDatasetPath = wd + di + senDataset

# forming dataframe and printing
df = pd.read_csv(senCSVPath, dtype=(object))

# forming ProfileReport and save
# as output.html file
profileAll = pp.ProfileReport(df, minimal=True)
profileAll.to_file("data/OUT/profiles/AllTweets.html")

df = pd.read_csv(senCSVcCovPath, dtype=(object))

profileAll = pp.ProfileReport(df, minimal=True)
profileAll.to_file("data/OUT/profiles/CovTweets.html")