55 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			55 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| # -*- coding: utf-8 -*-
 | |
| """
 | |
| Created on Tue Aug  8 14:49:02 2023
 | |
| 
 | |
| @author: michael
 | |
| """
 | |
| 
 | |
| import pandas as pd
 | |
| import pandas_profiling as pp
 | |
| import numpy
 | |
|   
 | |
| ###################
 | |
| # Setup directories
 | |
| # WD Michael
 | |
| wd = "/home/michael/Documents/PS/Data/collectTweets/"
 | |
| # WD Server
 | |
| # wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
 | |
| 
 | |
| # datafile input directory
 | |
| di = "data/IN/"
 | |
| 
 | |
| # Tweet-datafile output directory
 | |
| ud = "data/OUT/"
 | |
| 
 | |
| # Name of file that all senator data will be written to
 | |
| senCSV = "ALL-SENATORS-TWEETS.csv"
 | |
| 
 | |
| # Name of file that all senator data will be written to
 | |
| senDataset = "senators-raw.csv"
 | |
| 
 | |
| # Name of new datafile generated
 | |
| senCSVc = "SenatorsTweets-Final"
 | |
| senCSVcCov = "SenatorsTweets-OnlyCov"
 | |
| 
 | |
| # don't change this one
 | |
| senCSVPath = wd + ud + senCSV
 | |
| senCSVcPath = wd + ud + senCSVc + ".csv"
 | |
| senCSVcCovPath = wd + ud + senCSVcCov + ".csv"
 | |
| senSAVcPath = wd + ud + senCSV + ".sav"
 | |
| senDTAcPath = wd + ud + senCSV + ".dta"
 | |
| senDatasetPath = wd + di + senDataset
 | |
|   
 | |
| # forming dataframe and printing
 | |
| df = pd.read_csv(senCSVPath, dtype=(object))
 | |
|   
 | |
| # forming ProfileReport and save
 | |
| # as output.html file
 | |
| profileAll = pp.ProfileReport(df, minimal=True)
 | |
| profileAll.to_file("data/OUT/profiles/AllTweets.html")
 | |
| 
 | |
| df = pd.read_csv(senCSVcCovPath, dtype=(object))
 | |
| 
 | |
| profileAll = pp.ProfileReport(df, minimal=True)
 | |
| profileAll.to_file("data/OUT/profiles/CovTweets.html") | 
