corrects import of own functions that didn't work anymore because of a newer python version.

2023-08-30 21:45:27 +02:00
parent 1c6d9d5415
commit d8136909c8
3 changed files with 19 additions and 17 deletions
--- a/cleanTweets.py
+++ b/cleanTweets.py
@@ -9,7 +9,8 @@ Created on Mon Jun 26 20:36:43 2023
 import pandas as pd
 # import pyreadstat
 import numpy as np
-from funs.ClearDupes import deDupe
+import sys
 # Seet for training dataset generation
 seed = 86431891
@@ -49,6 +50,11 @@ senDatasetPath = wd + di + senDataset
 df = pd.read_csv(senCSVPath, dtype=(object))
 ## Import own functions
 funs = wd+"funs"
 sys.path.insert(1, funs)
 from ClearDupes import deDupe
 mixed_columns = df.columns[df.nunique() != len(df)]
 print(mixed_columns)
--- a/collect.py
+++ b/collect.py
@@ -66,7 +66,6 @@ which is the final output.
 import os
 import pandas as pd
 import glob
 import time
 import sys
 from datetime import datetime
 import concurrent.futures
@@ -149,10 +148,12 @@ tweetDFColumns = [
 ################## do NOT change anything below this line ###################
 #############################################################################
-## Import functions
+## Import own functions
-from funs.TimeSlice import *
+funs = wd+"funs"
-from funs.ClearDupes import deDupe
+sys.path.insert(1, funs)
-from funs.Scrape import scrapeTweets
+from TimeSlice import get_Tslices
 from ClearDupes import deDupe
 from Scrape import scrapeTweets
 ################### 
 # Create logfile & log all outputs
--- a/preTestClassification.py
+++ b/preTestClassification.py
@@ -1,13 +1,8 @@
 import re
 import string
 import numpy as np
 import pandas as pd
 from datetime import datetime
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from datasets import load_dataset
 from transformers.pipelines.pt_utils import KeyDataset
 from funs.CleanTweets import remove_URL, remove_emoji, remove_html, remove_punct
 #%%
 # prepare
@@ -40,7 +35,6 @@ senCSVPretest = "Pretest.csv"
 senCSVPretestPrep = "Pretest-Prep.csv"
 senCSVPretestResult = "Pretest-Results.csv"
 # don't change this one
 senCSVPath = wd + ud + senCSV
 senCSVcPath = wd + ud + senCSVc
@@ -50,6 +44,11 @@ senCSVcPretestResultPath = wd + ud + senCSVPretestResult
 preTestIDsFakePath = wd + di + preTestIDsFake
 preTestIDsNotPath = wd + di + preTestIDsNot
 import sys
 funs = wd+"funs"
 sys.path.insert(1, funs)
 import CleanTweets
 # List of IDs to select
 # Read the IDs from a file
 preTestIDsFakeL = []
@@ -85,11 +84,7 @@ tokenizer = AutoTokenizer.from_pretrained("bvrau/covid-twitter-bert-v2-struth")
 # Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
-dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(remove_URL)
+dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(CleanTweets.preprocess_text)
 dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_emoji)
 dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_html)
 dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_punct)
 dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(lambda x: x.lower())
 #%%
 timeStart = datetime.now() # start counting execution time