From 4e08cde317a7b79f81efb09f44a9d2d2bbf6e51a Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Wed, 16 Aug 2023 10:06:16 +0200 Subject: [PATCH] finishes classification scripts --- ClassificationFake.py | 10 ++++++++++ ClassificationTopic.py | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/ClassificationFake.py b/ClassificationFake.py index b48aa9e..dd0cda7 100644 --- a/ClassificationFake.py +++ b/ClassificationFake.py @@ -43,6 +43,16 @@ import CleanTweets #%% # get datafra,e dfClassify = pd.read_csv(senCSVPath, dtype=(object)) +def encode_labels(label): + if label == 'True': + return 'False' + elif label == 'False': + return 'True' + return 0 +dfClassify['output_label_topicCov'] = dfClassify['output_label_topicCov'].apply(encode_labels) +dfClassify.to_csv("/home/michael/Documents/PS/Data/collectTweets/data/OUT/Tweets-Classified-Topic-Results.csv", encoding='utf-8') + +dfClassify = dfClassify[dfClassify['output_label_topicCov']=='True'] # dataframe from csv dfClassify['fake'] = False diff --git a/ClassificationTopic.py b/ClassificationTopic.py index 4605834..b00a94f 100644 --- a/ClassificationTopic.py +++ b/ClassificationTopic.py @@ -110,3 +110,14 @@ print(f"Time per tweet classification: {timePerTweet}") dfClassify.to_csv(senCSVcClassificationResultPath, encoding='utf-8') # %% +## corrections +def encode_labels(label): + if label == 'real': + return 'True' + elif label == 'fake': + return 'False' + return 0 +dfClassify['output_label_topicCov'] = dfClassify['output_label_topicCov'].apply(encode_labels) +dfClassify.to_csv(senCSVcClassificationResultPath, encoding='utf-8') +#still wrong, will be corrected in ClassificationFake.py +