adds typerror controls
This commit is contained in:
parent
7c6b618272
commit
b89b5969ec
@ -1,8 +1,19 @@
|
||||
import re
|
||||
import string
|
||||
|
||||
def preprocess_roberta(text): # https://huggingface.co/cardiffnlp/twitter-roberta-base-sep2022
|
||||
preprocessed_text = []
|
||||
for t in text.split():
|
||||
if len(t) > 1:
|
||||
t = '@user' if t[0] == '@' and t.count('@') == 1 else t
|
||||
t = 'http' if t.startswith('http') else t
|
||||
preprocessed_text.append(t)
|
||||
return ' '.join(preprocessed_text)
|
||||
|
||||
def remove_URL(text):
|
||||
url = re.compile(r'https?://\S+|www\.\S+')
|
||||
try:
|
||||
url = re.compile(r'https?://\S+|www\.\S+')
|
||||
except: print(text)
|
||||
return url.sub(r'', text)
|
||||
|
||||
def remove_emoji(text):
|
||||
|
Loading…
x
Reference in New Issue
Block a user