corrects import of own functions that didn't work anymore because of a newer python version.
This commit is contained in:
parent
1c6d9d5415
commit
d8136909c8
@ -9,7 +9,8 @@ Created on Mon Jun 26 20:36:43 2023
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
# import pyreadstat
|
# import pyreadstat
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from funs.ClearDupes import deDupe
|
import sys
|
||||||
|
|
||||||
|
|
||||||
# Seet for training dataset generation
|
# Seet for training dataset generation
|
||||||
seed = 86431891
|
seed = 86431891
|
||||||
@ -49,6 +50,11 @@ senDatasetPath = wd + di + senDataset
|
|||||||
|
|
||||||
df = pd.read_csv(senCSVPath, dtype=(object))
|
df = pd.read_csv(senCSVPath, dtype=(object))
|
||||||
|
|
||||||
|
## Import own functions
|
||||||
|
funs = wd+"funs"
|
||||||
|
sys.path.insert(1, funs)
|
||||||
|
from ClearDupes import deDupe
|
||||||
|
|
||||||
mixed_columns = df.columns[df.nunique() != len(df)]
|
mixed_columns = df.columns[df.nunique() != len(df)]
|
||||||
print(mixed_columns)
|
print(mixed_columns)
|
||||||
|
|
||||||
|
11
collect.py
11
collect.py
@ -66,7 +66,6 @@ which is the final output.
|
|||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import glob
|
import glob
|
||||||
import time
|
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
@ -149,10 +148,12 @@ tweetDFColumns = [
|
|||||||
################## do NOT change anything below this line ###################
|
################## do NOT change anything below this line ###################
|
||||||
#############################################################################
|
#############################################################################
|
||||||
|
|
||||||
## Import functions
|
## Import own functions
|
||||||
from funs.TimeSlice import *
|
funs = wd+"funs"
|
||||||
from funs.ClearDupes import deDupe
|
sys.path.insert(1, funs)
|
||||||
from funs.Scrape import scrapeTweets
|
from TimeSlice import get_Tslices
|
||||||
|
from ClearDupes import deDupe
|
||||||
|
from Scrape import scrapeTweets
|
||||||
|
|
||||||
###################
|
###################
|
||||||
# Create logfile & log all outputs
|
# Create logfile & log all outputs
|
||||||
|
@ -1,13 +1,8 @@
|
|||||||
import re
|
|
||||||
import string
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from transformers.pipelines.pt_utils import KeyDataset
|
from transformers.pipelines.pt_utils import KeyDataset
|
||||||
from funs.CleanTweets import remove_URL, remove_emoji, remove_html, remove_punct
|
|
||||||
|
|
||||||
|
|
||||||
#%%
|
#%%
|
||||||
# prepare
|
# prepare
|
||||||
@ -40,7 +35,6 @@ senCSVPretest = "Pretest.csv"
|
|||||||
senCSVPretestPrep = "Pretest-Prep.csv"
|
senCSVPretestPrep = "Pretest-Prep.csv"
|
||||||
senCSVPretestResult = "Pretest-Results.csv"
|
senCSVPretestResult = "Pretest-Results.csv"
|
||||||
|
|
||||||
|
|
||||||
# don't change this one
|
# don't change this one
|
||||||
senCSVPath = wd + ud + senCSV
|
senCSVPath = wd + ud + senCSV
|
||||||
senCSVcPath = wd + ud + senCSVc
|
senCSVcPath = wd + ud + senCSVc
|
||||||
@ -50,6 +44,11 @@ senCSVcPretestResultPath = wd + ud + senCSVPretestResult
|
|||||||
preTestIDsFakePath = wd + di + preTestIDsFake
|
preTestIDsFakePath = wd + di + preTestIDsFake
|
||||||
preTestIDsNotPath = wd + di + preTestIDsNot
|
preTestIDsNotPath = wd + di + preTestIDsNot
|
||||||
|
|
||||||
|
import sys
|
||||||
|
funs = wd+"funs"
|
||||||
|
sys.path.insert(1, funs)
|
||||||
|
import CleanTweets
|
||||||
|
|
||||||
# List of IDs to select
|
# List of IDs to select
|
||||||
# Read the IDs from a file
|
# Read the IDs from a file
|
||||||
preTestIDsFakeL = []
|
preTestIDsFakeL = []
|
||||||
@ -85,11 +84,7 @@ tokenizer = AutoTokenizer.from_pretrained("bvrau/covid-twitter-bert-v2-struth")
|
|||||||
|
|
||||||
# Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
|
# Source https://www.kaggle.com/code/daotan/tweet-analysis-with-transformers-bert
|
||||||
|
|
||||||
dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(remove_URL)
|
dfPreTest['cleanContent'] = dfPreTest['rawContent'].apply(CleanTweets.preprocess_text)
|
||||||
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_emoji)
|
|
||||||
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_html)
|
|
||||||
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(remove_punct)
|
|
||||||
dfPreTest['cleanContent'] = dfPreTest['cleanContent'].apply(lambda x: x.lower())
|
|
||||||
|
|
||||||
#%%
|
#%%
|
||||||
timeStart = datetime.now() # start counting execution time
|
timeStart = datetime.now() # start counting execution time
|
||||||
|
Loading…
x
Reference in New Issue
Block a user