adds dataset profiler
This commit is contained in:
parent
ed61d52182
commit
5a63c478e9
55
profiler.py
Normal file
55
profiler.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Tue Aug 8 14:49:02 2023
|
||||||
|
|
||||||
|
@author: michael
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import pandas_profiling as pp
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Setup directories
|
||||||
|
# WD Michael
|
||||||
|
wd = "/home/michael/Documents/PS/Data/collectTweets/"
|
||||||
|
# WD Server
|
||||||
|
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
|
||||||
|
|
||||||
|
# datafile input directory
|
||||||
|
di = "data/IN/"
|
||||||
|
|
||||||
|
# Tweet-datafile output directory
|
||||||
|
ud = "data/OUT/"
|
||||||
|
|
||||||
|
# Name of file that all senator data will be written to
|
||||||
|
senCSV = "ALL-SENATORS-TWEETS.csv"
|
||||||
|
|
||||||
|
# Name of file that all senator data will be written to
|
||||||
|
senDataset = "senators-raw.csv"
|
||||||
|
|
||||||
|
# Name of new datafile generated
|
||||||
|
senCSVc = "SenatorsTweets-Final"
|
||||||
|
senCSVcCov = "SenatorsTweets-OnlyCov"
|
||||||
|
|
||||||
|
# don't change this one
|
||||||
|
senCSVPath = wd + ud + senCSV
|
||||||
|
senCSVcPath = wd + ud + senCSVc + ".csv"
|
||||||
|
senCSVcCovPath = wd + ud + senCSVcCov + ".csv"
|
||||||
|
senSAVcPath = wd + ud + senCSV + ".sav"
|
||||||
|
senDTAcPath = wd + ud + senCSV + ".dta"
|
||||||
|
senDatasetPath = wd + di + senDataset
|
||||||
|
|
||||||
|
# forming dataframe and printing
|
||||||
|
df = pd.read_csv(senCSVPath, dtype=(object))
|
||||||
|
|
||||||
|
# forming ProfileReport and save
|
||||||
|
# as output.html file
|
||||||
|
profileAll = pp.ProfileReport(df, minimal=True)
|
||||||
|
profileAll.to_file("data/OUT/profiles/AllTweets.html")
|
||||||
|
|
||||||
|
df = pd.read_csv(senCSVcCovPath, dtype=(object))
|
||||||
|
|
||||||
|
profileAll = pp.ProfileReport(df, minimal=True)
|
||||||
|
profileAll.to_file("data/OUT/profiles/CovTweets.html")
|
Loading…
x
Reference in New Issue
Block a user