diff --git a/collect.py b/collect.py index 084da75..a8e7db9 100644 --- a/collect.py +++ b/collect.py @@ -69,8 +69,11 @@ wd = "/home/michael/Documents/PS/Data/collectTweets/" # WD Server # wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/' +# datafile input directory +di = "data/IN/" + # Tweet-datafile output directory -td = "data/tweets/" +td = "data/OUT/" # Name of file that all tweets will be written to file_alltweets = "ALL-SENATORS-TWEETS.csv" @@ -171,9 +174,9 @@ print("---") # read keywords from a file and write to list. keywords = [] # Remove duplicate Keywords and save all non-duplicates to 'data/keywords.txt' -deDupe("data/keywords-raw.txt", "data/keywords.txt") +deDupe(f"{di}keywords-raw.txt", f"{di}keywords.txt") # Read the keywords from a file -with open("data/keywords.txt", "r") as file: +with open(f"{di}keywords.txt", "r") as file: lines = file.readlines() for line in lines: keyword = line.strip() # Remove the newline character diff --git a/data/keywords-raw.txt b/data/IN/keywords-raw.txt similarity index 100% rename from data/keywords-raw.txt rename to data/IN/keywords-raw.txt diff --git a/data/senators-raw.csv b/data/IN/senators-raw.csv similarity index 100% rename from data/senators-raw.csv rename to data/IN/senators-raw.csv diff --git a/data/keywords.txt b/data/keywords.txt deleted file mode 100644 index b9752bb..0000000 --- a/data/keywords.txt +++ /dev/null @@ -1,140 +0,0 @@ -Coronavirus -Koronavirus -Corona -CDC -Wuhancoronavirus -Wuhanlockdown -Ncov -Wuhan -N95 -Kungflu -Epidemic -outbreak -Sinophobia -China -covid-19 -corona virus -covid -covid19 -sars-cov-2 -COVIDー19 -COVD -pandemic -coronapocalypse -canceleverything -Coronials -SocialDistancingNow -Social Distancing -SocialDistancing -panicbuy -panic buy -panicbuying -panic buying -14DayQuarantine -DuringMy14DayQuarantine -panic shop -panic shopping -panicshop -InMyQuarantineSurvivalKit -panic-buy -panic-shop -coronakindness -quarantinelife -chinese virus -chinesevirus -stayhomechallenge -stay home challenge -sflockdown -DontBeASpreader -lockdown -lock down -shelteringinplace -sheltering in place -staysafestayhome -stay safe stay home -trumppandemic -trump pandemic -flattenthecurve -flatten the curve -china virus -chinavirus -quarentinelife -PPEshortage -saferathome -stayathome -stay at home -stay home -stayhome -GetMePPE -covidiot -epitwitter -pandemie -wear a mask -wearamask -kung flu -covididiot -COVID__19 -omicron -variant -vaccine -travel ban -corona -coronavirus -sarscov2 -sars cov2 -sars cov 2 -covid_19 -ncov -ncov2019 -2019-ncov -pandemic 2019ncov -2019ncov -quarantine -flattening the curve -flatteningthecurve -flattenthecurve -hand sanitizer -handsanitizer -social distancing -socialdistancing -work from home -workfromhome -working from home -workingfromhome -ppe -n95 -covidiots -herd immunity -herdimmunity -pneumonia -wuhan virus -wuhanvirus -kungflu -vaccines -corona vaccine -corona vaccines -coronavaccine -coronavaccines -face shield -faceshield -face shields -faceshields -health worker -healthworker -health workers -healthworkers -stayhomestaysafe -coronaupdate -frontlineheroes -coronawarriors -homeschool -homeschooling -hometasking -masks4all -wfh -wash ur hands -wash your hands -washurhands -washyourhands -selfisolating -self isolating \ No newline at end of file diff --git a/data/tweets/.gitignore b/data/tweets/.gitignore deleted file mode 100644 index a6844c7..0000000 --- a/data/tweets/.gitignore +++ /dev/null @@ -1,24 +0,0 @@ -/ALL-SENATORS-LONG-LONG.csv -/ALL-SENATORS.csv -/CoryGardner-LONG.csv -/CoryGardner.csv -/DavidPerdueGA-LONG.csv -/DavidPerdueGA.csv -/DougJones-LONG.csv -/DougJones.csv -/KLoeffler-LONG.csv -/KLoeffler.csv -/MarthaMcSallyAZ-LONG.csv -/MarthaMcSallyAZ.csv -/SenAlexander-LONG.csv -/SenAlexander.csv -/SenPatRoberts-LONG.csv -/SenPatRoberts.csv -/SenatorEnzi-LONG.csv -/SenatorEnzi.csv -/SenatorIsakson-LONG.csv -/SenatorIsakson.csv -/SenatorTomUdall-LONG.csv -/SenatorTomUdall.csv -/VP-LONG.csv -/VP.csv