adds comments. changes logfile format to .log
This commit is contained in:
		
							
								
								
									
										10
									
								
								collect.py
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								collect.py
									
									
									
									
									
								
							| @@ -7,8 +7,6 @@ Created on Thu Jun  8 01:08:21 2023 | ||||
| # https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html | ||||
|  | ||||
| Following files are necessary: | ||||
|     config.py | ||||
|         Used to configure everything that's needed for this script. | ||||
|     funs/TimeSlice.py | ||||
|         Function get_Tslices slices the defined timespan in config.py into N  | ||||
|         slices. Is necessary due to possible blocking of requests by twitter.  | ||||
| @@ -17,6 +15,8 @@ Following files are necessary: | ||||
|         Function deDupe reads each line of inFile and removes duplicate lines. | ||||
|         A file outFile is saved without the duplicate lines. Generates  | ||||
|         "keywords.txt". | ||||
|     funs/Scrape.py | ||||
|         scrapes using snscrape.modules.twitter. See docstring. | ||||
|     data/keywords-raw.txt | ||||
|         Contains all keywords that are used to detect whether a tweet contains | ||||
|         information about Covid19. | ||||
| @@ -152,8 +152,8 @@ from funs.Scrape import scrapeTweets | ||||
| # Create logfile & log all outputs | ||||
| #   there are three logfile types to be found in /log. | ||||
| #   should be self explanatory. | ||||
| logfilen = logfile + datetime.now().strftime(fTimeFormat) + ".txt" | ||||
| logfileErrors = logfile + datetime.now().strftime(fTimeFormat) + "_err" + ".txt" | ||||
| logfilen = logfile + datetime.now().strftime(fTimeFormat) + ".log" | ||||
| logfileErrors = logfile + datetime.now().strftime(fTimeFormat) + "_err" + ".log" | ||||
| sys.stderr = open(logfileErrors, "w") | ||||
| sys.stdout = open(logfilen, "w") | ||||
|  | ||||
| @@ -240,7 +240,7 @@ for handle in accounts: | ||||
|         suffix = tslice['suffix'] | ||||
|         AllFilesList.append(f"Tweets-{handle}{suffix}.csv")  | ||||
| # report missing files to "log_*_missing.txt" | ||||
| with open(f"{logfile}"+timeStartScrape.strftime(fTimeFormat)+"_missing.txt", "w") as fout: | ||||
| with open(f"{logfile}"+timeStartScrape.strftime(fTimeFormat)+"_missing.log", "w") as fout: | ||||
|     for file in AllFilesList: | ||||
|         if file not in tweetfiles: | ||||
|             fout.write(f'Missing: {file}.\n') # if file is not in tweetfiles, print error message. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Michael Beck
					Michael Beck