Fine Tuning BERT for Hate Speech Classifiation¶
1. Data Pre-processing & EDA¶
In [ ]:
import pandas as pd
import re
import string
In [ ]:
# Read the dataset
df = pd.read_csv('HateSpeechDatasetBalanced.csv')
In [ ]:
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import nltk
from nltk.corpus import stopwords
# Download stopwords if not already downloaded
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Package stopwords is already up-to-date!
True
In [ ]:
# Shuffle the dataset
df_sampled = df.sample(n=len(df))
df_sampled = df_sampled.reset_index(drop=True)
pd.set_option('display.max_colwidth', None)
df_sampled.head()
Content | Label | |
---|---|---|
0 | thanks this article on the office of pm is superb i am sure that it is correct and it is well written especially the opening summary from a historical perspective thanks to all involved suits my purpose very well indeed | 0 |
1 | but there were some shitty my ass fake chinky fireworks | 1 |
2 | it is easier to be stay in touch back then take to the beauties of the future the only issue with mine is there are many people about this world that are far from being beautiful in particular so i am targeting like local immigrants | 1 |
3 | nisu was be razor treble | 1 |
4 | decade nostalgia i believe that you deleted half the bloody article and yes i do consider that vandalism watch yourself and i do not think that there is a high chance of my being blocked what with this occurring quite a while ago anyway i am checking in on the decade nostalgia page as i am going to evaluate your latest edits i am also adding you to my watch list | 0 |
In [ ]:
plt.figure(figsize=(6,4))
sns.countplot(x='Label', data=df_sampled, palette='viridis')
plt.title('Distribution of Labels')
plt.xlabel('Label')
plt.ylabel('Count')
plt.xticks([0, 1], ['Non-Hate Speech', 'Hate Speech'])
plt.show()
In [ ]:
# Calculate text lengths
df_sampled['Text_Length'] = df_sampled['Content'].apply(lambda x: len(x.split()))
plt.figure(figsize=(10,6))
sns.histplot(data=df_sampled, x='Text_Length', hue='Label', bins=50, palette='viridis', kde=True)
plt.title('Distribution of Text Lengths by Label')
plt.xlabel('Number of Words')
plt.ylabel('Frequency')
plt.legend(labels=['Non-Hate Speech', 'Hate Speech'])
plt.show()
In [ ]:
# Function to preprocess text for word cloud
def preprocess_text(text):
text = text.lower() # Convert to lowercase
text = re.sub(f'[{re.escape(string.punctuation)}]', '', text) # Remove punctuation
tokens = text.split()
tokens = [word for word in tokens if word not in stopwords.words('english')] # Remove stopwords
return ' '.join(tokens)
# Preprocess texts
df_sampled['Clean_Content'] = df_sampled['Content'].apply(preprocess_text)
# Generate word clouds
hate_speech_text = ' '.join(df_sampled[df_sampled['Label'] == 1]['Clean_Content'])
non_hate_speech_text = ' '.join(df_sampled[df_sampled['Label'] == 0]['Clean_Content'])
# Create WordCloud objects
wordcloud_hate = WordCloud(width=800, height=400, background_color='white').generate(hate_speech_text)
wordcloud_non_hate = WordCloud(width=800, height=400, background_color='white').generate(non_hate_speech_text)
# Plot Word Clouds
plt.figure(figsize=(15,7))
plt.subplot(1, 2, 1)
plt.imshow(wordcloud_non_hate, interpolation='bilinear')
plt.title('Non-Hate Speech Word Cloud', fontsize=16)
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(wordcloud_hate, interpolation='bilinear')
plt.title('Hate Speech Word Cloud', fontsize=16)
plt.axis('off')
plt.show()
In [ ]:
from collections import Counter
# Function to get most common words
def get_most_common_words(text, n=20):
words = text.split()
counter = Counter(words)
common = counter.most_common(n)
return common
# Get most common words
common_non_hate = get_most_common_words(non_hate_speech_text)
common_hate = get_most_common_words(hate_speech_text)
# Convert to DataFrame
df_common_non_hate = pd.DataFrame(common_non_hate, columns=['Word', 'Frequency'])
df_common_hate = pd.DataFrame(common_hate, columns=['Word', 'Frequency'])
# Plot
plt.figure(figsize=(14,6))
plt.subplot(1, 2, 1)
sns.barplot(x='Frequency', y='Word', data=df_common_non_hate, palette='viridis')
plt.title('Top 20 Words in Non-Hate Speech')
plt.xlabel('Frequency')
plt.ylabel('Word')
plt.subplot(1, 2, 2)
sns.barplot(x='Frequency', y='Word', data=df_common_hate, palette='viridis')
plt.title('Top 20 Words in Hate Speech')
plt.xlabel('Frequency')
plt.ylabel('Word')
plt.tight_layout()
plt.show()
2. Data Preperation for Model Training and Testing¶
In [ ]:
# Uncomment out this line when testing this file
#pip install optuna
Collecting optuna Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB) Collecting alembic>=1.5.0 (from optuna) Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB) Collecting colorlog (from optuna) Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB) Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from optuna) (1.26.4) Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (24.1) Requirement already satisfied: sqlalchemy>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (2.0.35) Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from optuna) (4.66.5) Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from optuna) (6.0.2) Collecting Mako (from alembic>=1.5.0->optuna) Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB) Requirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna) (4.12.2) Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy>=1.3.0->optuna) (3.1.1) Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from Mako->alembic>=1.5.0->optuna) (2.1.5) Downloading optuna-4.0.0-py3-none-any.whl (362 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 362.8/362.8 kB 12.3 MB/s eta 0:00:00 Downloading alembic-1.13.3-py3-none-any.whl (233 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 233.2/233.2 kB 20.7 MB/s eta 0:00:00 Downloading colorlog-6.8.2-py3-none-any.whl (11 kB) Downloading Mako-1.3.5-py3-none-any.whl (78 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.6/78.6 kB 8.5 MB/s eta 0:00:00 Installing collected packages: Mako, colorlog, alembic, optuna Successfully installed Mako-1.3.5 alembic-1.13.3 colorlog-6.8.2 optuna-4.0.0
In [ ]:
# Imports
import optuna
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd
import re
import string
In [ ]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
Num GPUs Available: 1
2.1 Load and Preprocess Data¶
In [ ]:
# Load the dataset
df = pd.read_csv("HateSpeechDatasetBalanced.csv")
# Ensure the dataset has 'text' and 'label' columns
print(df.head())
# Preprocess text
def preprocess_text(text):
# Remove URLs
text = re.sub(r'http\S+', '', text)
# Remove mentions and hashtags
text = re.sub(r'@\w+|#\w+', '', text)
# Remove punctuation
text = text.translate(str.maketrans('', '', string.punctuation))
# Lowercase
text = text.lower()
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text).strip()
return text
df['clean_text'] = df['Content'].apply(preprocess_text)
# Sample 25,000 observations from Label 0
df_label0 = df[df['Label'] == 0].sample(n=25000, random_state=42)
# Sample 25,000 observations from Label 1
df_label1 = df[df['Label'] == 1].sample(n=25000, random_state=42)
# Combine the sampled data into a balanced DataFrame
balanced_df = pd.concat([df_label0, df_label1]).reset_index(drop=True)
# Shuffle the balanced dataset to mix label 0 and label 1 samples
balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)
balanced_df.head()
Content Label 0 denial of normal the con be asked to comment o... 1 1 just by being able to tweet this insufferable ... 1 2 that is retarded you too cute to be single tha... 1 3 thought of a real badass mongol style declarat... 1 4 afro american basho 1
Content | Label | clean_text | |
---|---|---|---|
0 | my changes do not affect any of the concocted ... | 1 | my changes do not affect any of the concocted ... |
1 | rfc propose whatever rename the article with a... | 0 | rfc propose whatever rename the article with a... |
2 | i re wrote of your aids because you were askin... | 0 | i re wrote of your aids because you were askin... |
3 | googling is useless because of the absence of ... | 0 | googling is useless because of the absence of ... |
4 | martina garcia relic a mi hnas li ti key je en... | 1 | martina garcia relic a mi hnas li ti key je en... |
2.2 Split the Data¶
In [ ]:
# Split into training and temp (which will be split into validation and test)
train_texts, temp_texts, train_labels, temp_labels = train_test_split(
balanced_df['clean_text'].tolist(), balanced_df['Label'].tolist(), test_size=0.3, random_state=42
)
# Split temp into validation and test sets
val_texts, test_texts, val_labels, test_labels = train_test_split(
temp_texts, temp_labels, test_size=0.5, random_state=42
)
print(f"Training samples: {len(train_texts)}")
print(f"Validation samples: {len(val_texts)}")
print(f"Testing samples: {len(test_texts)}")
Training samples: 35000 Validation samples: 7500 Testing samples: 7500
3. Baseline Model Evaluation¶
3.1 Load the Pretrained Model¶
In [ ]:
# Load the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
3.2 Prepare the Data¶
In [ ]:
def tokenize(texts):
return tokenizer(
texts,
padding=True,
truncation=True,
max_length=128,
return_tensors='tf'
)
# Tokenize the test texts
test_encodings = tokenize(test_texts)
# Create TensorFlow dataset for testing
test_dataset = tf.data.Dataset.from_tensor_slices((
dict(test_encodings),
test_labels
)).batch(16)
# Tokenize the training and validation texts
train_encodings = tokenize(train_texts)
val_encodings = tokenize(val_texts)
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
dict(train_encodings),
train_labels
)).shuffle(1000).batch(16)
val_dataset = tf.data.Dataset.from_tensor_slices((
dict(val_encodings),
val_labels
)).batch(16)
In [ ]:
def objective(trial):
# Hyperparameters to tune
learning_rate = trial.suggest_float('learning_rate', 1e-5, 5e-5, log=True)
batch_size = trial.suggest_categorical('batch_size', [32,64])
freeze_layers = trial.suggest_int('freeze_layers', 3, 9, step=3)
# Re-initialize the model to ensure we're starting fresh
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
# Freeze the specified number of layers
for layer in model.bert.encoder.layer[:freeze_layers]:
layer.trainable = False
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
# Prepare data with the current batch size
train_dataset = tf.data.Dataset.from_tensor_slices((
dict(train_encodings),
train_labels
)).shuffle(1000).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((
dict(val_encodings),
val_labels
)).batch(batch_size)
# Train the model
history = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=3,
callbacks=[
tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1, restore_best_weights=True)
],
verbose=0 # Set verbose to 0 to reduce output during tuning
)
# Get the best validation accuracy of this trial
val_accuracy = max(history.history['val_accuracy'])
# Report the validation accuracy to Optuna
return val_accuracy
3.3 Running Optuna Study¶
In [ ]:
# Create an Optuna study
study = optuna.create_study(direction='maximize')
# Optimize the objective function
study.optimize(objective, n_trials=10)
[I 2024-10-01 15:29:36,353] A new study created in memory with name: no-name-c77fdf93-c2d7-460e-beb1-e8586925ef48 All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 15:57:25,899] Trial 0 finished with value: 0.8481333255767822 and parameters: {'learning_rate': 3.5724301244604476e-05, 'batch_size': 32, 'freeze_layers': 6}. Best is trial 0 with value: 0.8481333255767822. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 16:39:01,442] Trial 1 finished with value: 0.8526666760444641 and parameters: {'learning_rate': 3.714740669592939e-05, 'batch_size': 64, 'freeze_layers': 3}. Best is trial 1 with value: 0.8526666760444641. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 17:18:02,432] Trial 2 finished with value: 0.8346666693687439 and parameters: {'learning_rate': 1.0655247100708095e-05, 'batch_size': 32, 'freeze_layers': 9}. Best is trial 1 with value: 0.8526666760444641. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 17:57:11,858] Trial 3 finished with value: 0.8573333621025085 and parameters: {'learning_rate': 3.812957299804759e-05, 'batch_size': 64, 'freeze_layers': 6}. Best is trial 3 with value: 0.8573333621025085. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 18:39:03,714] Trial 4 finished with value: 0.8570666909217834 and parameters: {'learning_rate': 3.5541051355733926e-05, 'batch_size': 64, 'freeze_layers': 3}. Best is trial 3 with value: 0.8573333621025085. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 19:06:57,049] Trial 5 finished with value: 0.8479999899864197 and parameters: {'learning_rate': 1.83360530592937e-05, 'batch_size': 64, 'freeze_layers': 3}. Best is trial 3 with value: 0.8573333621025085. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 19:46:07,517] Trial 6 finished with value: 0.8493333458900452 and parameters: {'learning_rate': 2.0061597374699786e-05, 'batch_size': 64, 'freeze_layers': 6}. Best is trial 3 with value: 0.8573333621025085. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 20:14:08,785] Trial 7 finished with value: 0.8546666502952576 and parameters: {'learning_rate': 2.731618478204917e-05, 'batch_size': 32, 'freeze_layers': 6}. Best is trial 3 with value: 0.8573333621025085. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 20:50:59,587] Trial 8 finished with value: 0.8453333377838135 and parameters: {'learning_rate': 1.2644888785816918e-05, 'batch_size': 64, 'freeze_layers': 9}. Best is trial 3 with value: 0.8573333621025085. All PyTorch model weights were used when initializing TFBertForSequenceClassification. Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [I 2024-10-01 21:31:57,211] Trial 9 finished with value: 0.8569333553314209 and parameters: {'learning_rate': 1.1722481227700947e-05, 'batch_size': 32, 'freeze_layers': 6}. Best is trial 3 with value: 0.8573333621025085.
3.4 Return best Hyperparameters and Examine Them¶
In [ ]:
print('Number of finished trials:', len(study.trials))
print('Best trial:')
trial = study.best_trial
print(f" Value: {trial.value}")
print(" Params: ")
for key, value in trial.params.items():
print(f" {key}: {value}")
Number of finished trials: 10 Best trial: Value: 0.8573333621025085 Params: learning_rate: 3.812957299804759e-05 batch_size: 64 freeze_layers: 6
4. Training the Final Model with Best Hyperparameters¶
In [ ]:
# Extract the best hyperparameters
best_learning_rate = trial.params['learning_rate']
best_batch_size = trial.params['batch_size']
best_freeze_layers = trial.params['freeze_layers']
print("\nTraining the final model with best hyperparameters...")
# Re-initialize the model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
# Freeze the specified number of layers
for layer in model.bert.encoder.layer[:best_freeze_layers]:
layer.trainable = False
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=best_learning_rate)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
# Prepare data with the best batch size
train_dataset = tf.data.Dataset.from_tensor_slices((
dict(train_encodings),
train_labels
)).shuffle(1000).batch(best_batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((
dict(val_encodings),
val_labels
)).batch(best_batch_size)
# Train the model
history = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=3,
callbacks=[
tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1, restore_best_weights=True)
],
verbose=1
)
Training the final model with best hyperparameters...
Epoch 1/3 547/547 [==============================] - 805s 1s/step - loss: 0.4071 - accuracy: 0.8131 - val_loss: 0.3495 - val_accuracy: 0.8417 Epoch 2/3 547/547 [==============================] - 775s 1s/step - loss: 0.2775 - accuracy: 0.8818 - val_loss: 0.3549 - val_accuracy: 0.8539
5. Evaluation and Visualization¶
In [ ]:
# Extract accuracy values
train_acc = [acc * 100 for acc in history.history['accuracy']]
val_acc = [acc * 100 for acc in history.history['val_accuracy']]
# Epochs
epochs = range(1, len(train_acc) + 1)
# Plot
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_acc, 'b-o', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r-o', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)
plt.show()
In [ ]:
# Prepare the test dataset with the best batch size
test_dataset = tf.data.Dataset.from_tensor_slices((
dict(test_encodings),
test_labels
)).batch(best_batch_size)
# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
# Make predictions
predictions = model.predict(test_dataset)
pred_logits = predictions.logits
pred_labels = np.argmax(pred_logits, axis=1)
# True labels
true_labels = np.array(test_labels)
# Compute confusion matrix
cm = confusion_matrix(true_labels, pred_labels)
# Classification report
report = classification_report(true_labels, pred_labels, target_names=['Not Hate Speech', 'Hate Speech'])
print("\nClassification Report:")
print(report)
# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['Not Hate Speech', 'Hate Speech'],
yticklabels=['Not Hate Speech', 'Hate Speech'])
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title('Confusion Matrix')
plt.show()
118/118 [==============================] - 62s 521ms/step - loss: 0.3421 - accuracy: 0.8456 Test Accuracy: 84.56% 118/118 [==============================] - 64s 520ms/step Classification Report: precision recall f1-score support Not Hate Speech 0.83 0.88 0.85 3813 Hate Speech 0.87 0.81 0.84 3687 accuracy 0.85 7500 macro avg 0.85 0.85 0.85 7500 weighted avg 0.85 0.85 0.85 7500
In [ ]:
missclassified_indicies = np.where(pred_labels != true_labels)[0]
# Decode test texts
def decode_text(encoded_text):
return tokenizer.decode(encoded_text, skip_special_tokens=True)
# Assuming 'test_encodings' contains the token IDs
# Extract the input_ids from test_encodings
test_text_ids = test_encodings['input_ids']
decoded_test_texts = [decode_text(text_ids) for text_ids in test_text_ids] # Decode token IDs to text
# Extract false positives and false negatives
false_positives = []
false_negatives = []
# Mapping of label indices to label names
label_map = {0: 'Not Hate Speech', 1: 'Hate Speech'}
for idx in missclassified_indicies:
if true_labels[idx] == 0: # Not Hate Speech
false_positives.append((decoded_test_texts[idx], label_map[pred_labels[idx]], label_map[true_labels[idx]]))
else: # Hate Speech
false_negatives.append((decoded_test_texts[idx], label_map[pred_labels[idx]],
label_map[true_labels[idx]]))
# Display a few examples of false positives
print("\nExamples of False Positives:")
for text, pred_label, true_label in false_positives[:5]:
print(f"Text: {text}")
print(f"Predicted Label: {pred_label}")
print(f"True Label: {true_label}")
# Display a few examles of false negatives
print("\nExamples of False Negatives:")
for text, pred_label, true_label in false_negatives[:5]:
print(f"Text: {text}")
print(f"Predicted Label: {pred_label}")
print(f"True Label: {true_label}")
Examples of False Positives: Text: no one gets preferential treatment at the best universities all their policies state that everyone is equal everywhere and any further disagreements like the jewish men are being propped up by society at the detriment of the white man is ridiculous Predicted Label: Hate Speech True Label: Not Hate Speech Text: roland ti is tame Predicted Label: Hate Speech True Label: Not Hate Speech Text: wikiproject lgbt studies class stub person project biography Predicted Label: Hate Speech True Label: Not Hate Speech Text: golly gompers and dad gum son you lying cheating scoundrels fucking did it man up and own it ya chicken shit pussy republican Predicted Label: Hate Speech True Label: Not Hate Speech Text: the meps bear that travelled to sandy hook in with hope that another gun massacre would never occur again Predicted Label: Hate Speech True Label: Not Hate Speech Examples of False Negatives: Text: retweet bitches be fallen so in a w niggass says she might get a new partner they try go get a good wife to find to forget him ya Predicted Label: Not Hate Speech True Label: Hate Speech Text: i what a slut so hoping the girls were going to pull something out and they did a breast slightly unexpected mr Predicted Label: Not Hate Speech True Label: Hate Speech Text: stop when you have family bitch face bitch you have have ass face Predicted Label: Not Hate Speech True Label: Hate Speech Text: changing lives matter is a threat within this society Predicted Label: Not Hate Speech True Label: Hate Speech Text: message of bob hope for newsier a business to terminate ted talks drugging Predicted Label: Not Hate Speech True Label: Hate Speech
In [ ]:
# Save the model and tokenizer
model.save_pretrained("finetuned_bert_hatespeech")
tokenizer.save_pretrained("finetuned_bert_hatespeech")
('finetuned_bert_hatespeech/tokenizer_config.json', 'finetuned_bert_hatespeech/special_tokens_map.json', 'finetuned_bert_hatespeech/vocab.txt', 'finetuned_bert_hatespeech/added_tokens.json')
In [ ]:
# Load the model and tokenizer
loaded_model = TFBertForSequenceClassification.from_pretrained("finetuned_bert_hatespeech")
loaded_tokenizer = BertTokenizer.from_pretrained("finetuned_bert_hatespeech")
In [ ]:
import numpy as np
import re
import string
# Example of an inferenece pipeline
def predict_hate_speech(text, loaded_model, loaded_tokenizer):
"""Predicts if a given text contains hate speech using a fine-tuned BERT model.
Args:
text (str): The input text to analyze.
loaded_model: The loaded fine-tuned BERT model.
loaded_tokenizer: The loaded tokenizer associated with the model.
Returns:
int: The predicted label (0 for "Not Hate Speech", 1 for "Hate Speech").
"""
def preprocess_text(text):
# Remove URLs
text = re.sub(r'http\S+', '', text)
# Remove mentions and hashtags
text = re.sub(r'@\w+|#\w+', '', text)
# Remove punctuation
text = text.translate(str.maketrans('', '', string.punctuation))
# Lowercase
text = text.lower()
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text).strip()
return text
preprocessed_text = preprocess_text(text)
inputs = loaded_tokenizer(preprocessed_text, return_tensors="tf")
predictions = loaded_model(**inputs)
predicted_label = np.argmax(predictions.logits, axis=1)[0] # Extract the single predicted label
return predicted_label
In [ ]:
# Example of test messages
test_texts = [
"I absolutely love this new phone! It's so sleek and fast.", # Expected: Not Hate Speech
"All immigrants are criminals and should be deported.", # Expected: Hate Speech
"This movie was a complete waste of time and money.", # Expected: Not Hate Speech
"I hate people of that race, they're all disgusting.", # Expected: Hate Speech
]
label = ["Not Hate Speech","Hate Speech","Not Hate Speech","Hate Speech"]
for i in range(len(test_texts)):
prediction = predict_hate_speech(test_texts[i], loaded_model, loaded_tokenizer)
if prediction == 0:
print(f"'{test_texts[i]}' \nPredicted: Not Hate Speech")
else:
print(f"'{test_texts[i]}' \nPredicted: Hate Speech")
print(f"Expected: {label[i]}\n")
'I absolutely love this new phone! It's so sleek and fast.' Predicted: Not Hate Speech Expected: Not Hate Speech 'All immigrants are criminals and should be deported.' Predicted: Hate Speech Expected: Hate Speech 'This movie was a complete waste of time and money.' Predicted: Not Hate Speech Expected: Not Hate Speech 'I hate people of that race, they're all disgusting.' Predicted: Hate Speech Expected: Hate Speech