corrects import of own functions that didn't work anymore because of a newer python version.

This commit is contained in:
Michael Beck 2023-08-30 21:45:27 +02:00
parent 1c6d9d5415
commit d8136909c8
3 changed files with 19 additions and 17 deletions

View File

@ -9,7 +9,8 @@ Created on Mon Jun 26 20:36:43 2023
import pandas as pd
# import pyreadstat
import numpy as np
from funs.ClearDupes import deDupe
import sys
# Seet for training dataset generation
seed = 86431891
@ -49,6 +50,11 @@ senDatasetPath = wd + di + senDataset
df = pd.read_csv(senCSVPath, dtype=(object))
## Import own functions
funs = wd+"funs"
sys.path.insert(1, funs)
from ClearDupes import deDupe
mixed_columns = df.columns[df.nunique() != len(df)]
print(mixed_columns)

View File

@ -66,7 +66,6 @@ which is the final output.
import os
import pandas as pd
import glob
import time
import sys
from datetime import datetime
import concurrent.futures
@ -149,10 +148,12 @@ tweetDFColumns = [
################## do NOT change anything below this line ###################
#############################################################################
## Import functions
from funs.TimeSlice import *
from funs.ClearDupes import deDupe
from funs.Scrape import scrapeTweets
## Import own functions
funs = wd+"funs"
sys.path.insert(1, funs)
from TimeSlice import get_Tslices
from ClearDupes import deDupe
from Scrape import scrapeTweets
###################
# Create logfile & log all outputs

View File

@ -1,13 +1,8 @@
import re
import string
import numpy as np
import pandas as pd
from datetime import datetime
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from datasets import load_dataset
from transformers.pipelines.pt_utils import KeyDataset
from funs.CleanTweets import remove_URL, remove_emoji, remove_html, remove_punct
#%%
# prepare
@ -40,7 +35,6 @@ senCSVPretest = "Pretest.csv"
senCSVPretestPrep = "Pretest-Prep.csv"
senCSVPretestResult = "Pretest-Results.csv"
# don't change this one
senCSVPath = wd + ud + senCSV
senCSVcPath = wd + ud + senCSVc
@ -50,6 +44,11 @@ senCSVcPretestResultPath = wd + ud + senCSVPretestResult
preTestIDsFakePath = wd + di + preTestIDsFake
preTestIDsNotPath = wd + di + preTestIDsNot
import sys
funs = wd+"funs"
sys.path.insert(1, funs)
import CleanTweets
# List of IDs to select
# Read the IDs from a file
preTestIDsFakeL = []
@ -85,11 +84,7 @@ tokenizer = AutoTokenizer.from_pretrained("bvrau/covid-twitter-bert-v2-struth")
# Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(remove_URL)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_emoji)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_html)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_punct)
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(lambda x: x.lower())
dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(CleanTweets.preprocess_text)
#%%
timeStart = datetime.now() # start counting execution time