adds dataset profiler
This commit is contained in:
		
							
								
								
									
										55
									
								
								profiler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								profiler.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| #!/usr/bin/env python3 | ||||
| # -*- coding: utf-8 -*- | ||||
| """ | ||||
| Created on Tue Aug  8 14:49:02 2023 | ||||
|  | ||||
| @author: michael | ||||
| """ | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas_profiling as pp | ||||
| import numpy | ||||
|    | ||||
| ################### | ||||
| # Setup directories | ||||
| # WD Michael | ||||
| wd = "/home/michael/Documents/PS/Data/collectTweets/" | ||||
| # WD Server | ||||
| # wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/' | ||||
|  | ||||
| # datafile input directory | ||||
| di = "data/IN/" | ||||
|  | ||||
| # Tweet-datafile output directory | ||||
| ud = "data/OUT/" | ||||
|  | ||||
| # Name of file that all senator data will be written to | ||||
| senCSV = "ALL-SENATORS-TWEETS.csv" | ||||
|  | ||||
| # Name of file that all senator data will be written to | ||||
| senDataset = "senators-raw.csv" | ||||
|  | ||||
| # Name of new datafile generated | ||||
| senCSVc = "SenatorsTweets-Final" | ||||
| senCSVcCov = "SenatorsTweets-OnlyCov" | ||||
|  | ||||
| # don't change this one | ||||
| senCSVPath = wd + ud + senCSV | ||||
| senCSVcPath = wd + ud + senCSVc + ".csv" | ||||
| senCSVcCovPath = wd + ud + senCSVcCov + ".csv" | ||||
| senSAVcPath = wd + ud + senCSV + ".sav" | ||||
| senDTAcPath = wd + ud + senCSV + ".dta" | ||||
| senDatasetPath = wd + di + senDataset | ||||
|    | ||||
| # forming dataframe and printing | ||||
| df = pd.read_csv(senCSVPath, dtype=(object)) | ||||
|    | ||||
| # forming ProfileReport and save | ||||
| # as output.html file | ||||
| profileAll = pp.ProfileReport(df, minimal=True) | ||||
| profileAll.to_file("data/OUT/profiles/AllTweets.html") | ||||
|  | ||||
| df = pd.read_csv(senCSVcCovPath, dtype=(object)) | ||||
|  | ||||
| profileAll = pp.ProfileReport(df, minimal=True) | ||||
| profileAll.to_file("data/OUT/profiles/CovTweets.html") | ||||
		Reference in New Issue
	
	Block a user
	 Michael Beck
					Michael Beck