corrects import of own functions that didn't work anymore because of a newer python version.

This commit is contained in:
Michael Beck 2023-08-30 21:45:27 +02:00
parent 1c6d9d5415
commit d8136909c8
3 changed files with 19 additions and 17 deletions

View File

@ -9,7 +9,8 @@ Created on Mon Jun 26 20:36:43 2023
import pandas as pd import pandas as pd
# import pyreadstat # import pyreadstat
import numpy as np import numpy as np
from funs.ClearDupes import deDupe import sys
# Seet for training dataset generation # Seet for training dataset generation
seed = 86431891 seed = 86431891
@ -49,6 +50,11 @@ senDatasetPath = wd + di + senDataset
df = pd.read_csv(senCSVPath, dtype=(object)) df = pd.read_csv(senCSVPath, dtype=(object))
## Import own functions
funs = wd+"funs"
sys.path.insert(1, funs)
from ClearDupes import deDupe
mixed_columns = df.columns[df.nunique() != len(df)] mixed_columns = df.columns[df.nunique() != len(df)]
print(mixed_columns) print(mixed_columns)

View File

@ -66,7 +66,6 @@ which is the final output.
import os import os
import pandas as pd import pandas as pd
import glob import glob
import time
import sys import sys
from datetime import datetime from datetime import datetime
import concurrent.futures import concurrent.futures
@ -149,10 +148,12 @@ tweetDFColumns = [
################## do NOT change anything below this line ################### ################## do NOT change anything below this line ###################
############################################################################# #############################################################################
## Import functions ## Import own functions
from funs.TimeSlice import * funs = wd+"funs"
from funs.ClearDupes import deDupe sys.path.insert(1, funs)
from funs.Scrape import scrapeTweets from TimeSlice import get_Tslices
from ClearDupes import deDupe
from Scrape import scrapeTweets
################### ###################
# Create logfile & log all outputs # Create logfile & log all outputs

View File

@ -1,13 +1,8 @@
import re
import string
import numpy as np
import pandas as pd import pandas as pd
from datetime import datetime from datetime import datetime
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from datasets import load_dataset from datasets import load_dataset
from transformers.pipelines.pt_utils import KeyDataset from transformers.pipelines.pt_utils import KeyDataset
from funs.CleanTweets import remove_URL, remove_emoji, remove_html, remove_punct
#%% #%%
# prepare # prepare
@ -40,7 +35,6 @@ senCSVPretest = "Pretest.csv"
senCSVPretestPrep = "Pretest-Prep.csv" senCSVPretestPrep = "Pretest-Prep.csv"
senCSVPretestResult = "Pretest-Results.csv" senCSVPretestResult = "Pretest-Results.csv"
# don't change this one # don't change this one
senCSVPath = wd + ud + senCSV senCSVPath = wd + ud + senCSV
senCSVcPath = wd + ud + senCSVc senCSVcPath = wd + ud + senCSVc
@ -50,6 +44,11 @@ senCSVcPretestResultPath = wd + ud + senCSVPretestResult
preTestIDsFakePath = wd + di + preTestIDsFake preTestIDsFakePath = wd + di + preTestIDsFake
preTestIDsNotPath = wd + di + preTestIDsNot preTestIDsNotPath = wd + di + preTestIDsNot
import sys
funs = wd+"funs"
sys.path.insert(1, funs)
import CleanTweets
# List of IDs to select # List of IDs to select
# Read the IDs from a file # Read the IDs from a file
preTestIDsFakeL = [] preTestIDsFakeL = []
@ -85,11 +84,7 @@ tokenizer = AutoTokenizer.from_pretrained("bvrau/covid-twitter-bert-v2-struth")
# Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert # Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(remove_URL) dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(CleanTweets.preprocess_text)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_emoji)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_html)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_punct)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(lambda x: x.lower())
#%% #%%
timeStart = datetime.now() # start counting execution time timeStart = datetime.now() # start counting execution time