finishes classification scripts
This commit is contained in:
@@ -1,12 +1,9 @@
|
||||
import re
|
||||
import string
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
||||
from datasets import load_dataset
|
||||
from transformers.pipelines.pt_utils import KeyDataset
|
||||
from funs.CleanTweets import remove_URL, remove_emoji, remove_html, remove_punct
|
||||
|
||||
|
||||
#%%
|
||||
@@ -99,8 +96,8 @@ for out in pipe(KeyDataset(dataset['train'], "cleanContent"), batch_size=8, trun
|
||||
# Exactly the same output as before, but the content are passed
|
||||
# as batches to the model
|
||||
# %%
|
||||
dfClassify['output_label'] = output_labels
|
||||
dfClassify['output_score'] = output_score
|
||||
dfClassify['output_label_topicCov'] = output_labels
|
||||
dfClassify['output_score_topicCov'] = output_score
|
||||
|
||||
timeEnd = datetime.now()
|
||||
timeTotal = timeEnd - timeStart
|
||||
|
||||
Reference in New Issue
Block a user