adds typerror controls

This commit is contained in:
Michael Beck 2023-08-15 14:19:33 +02:00
parent 7c6b618272
commit b89b5969ec

View File

@ -1,8 +1,19 @@
import re
import string
def preprocess_roberta(text): # https://huggingface.co/cardiffnlp/twitter-roberta-base-sep2022
preprocessed_text = []
for t in text.split():
if len(t) > 1:
t = '@user' if t[0] == '@' and t.count('@') == 1 else t
t = 'http' if t.startswith('http') else t
preprocessed_text.append(t)
return ' '.join(preprocessed_text)
def remove_URL(text):
try:
url = re.compile(r'https?://\S+|www\.\S+')
except: print(text)
return url.sub(r'', text)
def remove_emoji(text):