adds typerror controls
This commit is contained in:
parent
7c6b618272
commit
b89b5969ec
@ -1,8 +1,19 @@
|
|||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
def preprocess_roberta(text): # https://huggingface.co/cardiffnlp/twitter-roberta-base-sep2022
|
||||||
|
preprocessed_text = []
|
||||||
|
for t in text.split():
|
||||||
|
if len(t) > 1:
|
||||||
|
t = '@user' if t[0] == '@' and t.count('@') == 1 else t
|
||||||
|
t = 'http' if t.startswith('http') else t
|
||||||
|
preprocessed_text.append(t)
|
||||||
|
return ' '.join(preprocessed_text)
|
||||||
|
|
||||||
def remove_URL(text):
|
def remove_URL(text):
|
||||||
url = re.compile(r'https?://\S+|www\.\S+')
|
try:
|
||||||
|
url = re.compile(r'https?://\S+|www\.\S+')
|
||||||
|
except: print(text)
|
||||||
return url.sub(r'', text)
|
return url.sub(r'', text)
|
||||||
|
|
||||||
def remove_emoji(text):
|
def remove_emoji(text):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user