From df5fd51a5fbfde2d144c2d8005206ac3c8934149 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Tue, 15 Aug 2023 14:30:13 +0200 Subject: [PATCH] repairs stupid --- repairmystupidity.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 repairmystupidity.py diff --git a/repairmystupidity.py b/repairmystupidity.py new file mode 100644 index 0000000..e7c09b9 --- /dev/null +++ b/repairmystupidity.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Aug 14 20:47:22 2023 + +@author: michael +""" +import pandas as pd + +wd = "/home/michael/Documents/PS/Data/collectTweets/" + +# datafile input directory +di = "data/IN/" + +# Tweet-datafile output directory +ud = "data/OUT/" + +falsch = wd + ud + "SenatorsTweets-Training_WORKING-COPY-correct.csv" +richtig = wd + ud + "SenatorsTweets-Training.csv" +correct = wd + ud + "SenatorsTweets-Training_WORKING-COPY-correct2.csv" + +# Name of new datafile generated +senCSVprep = "SenatorsTweets-Training_WORKING-COPY-prepared" + +# don't change this one +falsch = pd.read_csv(falsch, dtype=(object), sep=";") +richtig = pd.read_csv(richtig, dtype=(object)) + +df = pd.merge(falsch,richtig[['tid','rawContent', 'date']],on='tid', how='left') +df.drop(columns=['rawContent_x', 'date_x'], inplace=True) +df.rename(columns={'tid_y':'tid', 'rawContent_y':'rawContent', 'date_y':'date'}, inplace=True) +df = df[['tid','date','topicCovid','fake','rawContent','Unnamed: 6']] +df.rename(columns={'Unnamed: 6':'comment'}, inplace=True) + +df.to_csv(correct, encoding='utf-8', sep=";")