In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# 5cfcb5e8ef8458be6e85d57c45c7573477e2ad6a

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings("ignore")

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
df = pd.read_csv("/kaggle/input/2025-sep-dl-gen-ai-project/train.csv") #training set
dt = pd.read_csv("/kaggle/input/2025-sep-dl-gen-ai-project/test.csv")  #test set

In [4]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# wdb_t = user_secrets.get_secret("WB_TOKEN")
# import wandb
# wandb.login(key=wdb_t)
# # wandb.init(project="22f3001086-t32025", name = "BERT+Classifier head")

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mvaishnavib[0m ([33mvaishnavib-iitm-jntuh-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [8]:
# Imports and device setup
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForMaskedLM, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split # Used for splitting later
import gc
from torch.cuda.amp import autocast, GradScaler
from nltk.corpus import wordnet
import warnings
warnings.filterwarnings("ignore")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
## Data Loading, Preparation, and Configuration (Combined from Original Cells 2 & 12)

# Load data
df = pd.read_csv("/kaggle/input/2025-sep-dl-gen-ai-project/train.csv")
dt = pd.read_csv("/kaggle/input/2025-sep-dl-gen-ai-project/test.csv")

# Ensure 'disgust' exists and is 0 (Important for 6-label backbone)
df = df.drop(columns=["emotions"])
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
if 'disgust' not in df.columns:
    df['disgust'] = 0

base_label_cols = ['anger','fear','joy','sadness','surprise']
label_cols = base_label_cols + ['disgust']  # 6 for backbone

X = df['text'].values
y = df[label_cols].values

# Splitting data (Replaced missing iterative_train_test_split with standard train_test_split)
# Using 80/20 split as implied by the original code's intent (test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
) 

train_texts = X_train
val_texts = X_val
train_labels = y_train
val_labels = y_val
test_texts = dt['text'].values

# Configuration Constants (adjusted MAX_LEN_TAPT/FT to match usage later)
MODEL_NAME = "AnkitAI/deberta-v3-small-base-emotions-classifier"
MAX_LEN_TAPT = 64  # Short for TAPT to save memory
MAX_LEN_FT = 128   # Used for Fine-Tuning
BATCH_SIZE = 4     # Used for Fine-Tuning

In [11]:
## Data Augmentation Functions (Original Cell 13)

def synonym_replacement(text, n=1):
    words = text.split()
    new_words = words.copy()
    # Find words with synonyms
    random_word_list = list(set([word for word in words if wordnet.synsets(word)]))
    random.shuffle(random_word_list)
    num_replaced = 0
    for random_word in random_word_list:
        synonyms = wordnet.synsets(random_word)
        if len(synonyms) >= 1:
            # Replace with the first lemma/synonym
            synonym = synonyms[0].lemmas()[0].name()
            new_words = [synonym if word == random_word else word for word in new_words]
            num_replaced += 1
        if num_replaced >= n:
            break
    return ' '.join(new_words)

def random_deletion(text, p=0.1):
    words = text.split()
    if len(words) == 1:
        return text
    new_words = [word for word in words if random.uniform(0,1) > p]
    if len(new_words) == 0:
        new_words = [random.choice(words)] # ensure not empty
    return ' '.join(new_words)

def augment(text):
    if random.random() < 0.3:
        text = synonym_replacement(text, n=1)
    if random.random() < 0.3:
        text = random_deletion(text, p=0.1)
    return text

In [12]:
## TAPT - Text-Adaptive Pre-training (Original Cell 15)

# ---------- ULTRA-LIGHT TAPT (MLM, SAFE) ----------
mlm_epochs = 1
mlm_lr = 5e-5
BATCH_SIZE_TAPT = 4

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

class MLMDataset(Dataset):
    def __init__(self, texts, tokenizer, max_len):
        self.texts = texts
        self.tok = tokenizer
        self.max_len = max_len
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        enc = self.tok(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors='pt'
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        return item

mlm_dataset = MLMDataset(train_texts, tokenizer, MAX_LEN_TAPT)
mlm_loader = DataLoader(mlm_dataset, batch_size=BATCH_SIZE_TAPT, shuffle=True)

mlm_model = AutoModelForMaskedLM.from_pretrained(MODEL_NAME).to(DEVICE)
mlm_model.config.use_cache = False  # small memory help

mlm_optimizer = AdamW(mlm_model.parameters(), lr=mlm_lr)
mlm_total_steps = len(mlm_loader) * mlm_epochs
mlm_scheduler = get_linear_schedule_with_warmup(
    mlm_optimizer, num_warmup_steps=100, num_training_steps=mlm_total_steps
)

# WANDB setup for TAPT (reinit=True to start a new run)
import wandb
wandb.init(project="22f3001086-t32025", name="TAPT-MLM-safe", reinit=True)
wandb.config.update({
    "phase": "TAPT",
    "epochs": mlm_epochs,
    "batch_size": BATCH_SIZE_TAPT,
    "lr": mlm_lr,
    "max_len": MAX_LEN_TAPT
})

mlm_model.train()
for epoch in range(mlm_epochs):
    total_loss = 0.0
    for batch in mlm_loader:
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        mlm_optimizer.zero_grad()
        # Use input_ids as labels for MLM
        outputs = mlm_model(**batch, labels=batch["input_ids"]) 
        loss = outputs.loss
        loss.backward()
        mlm_optimizer.step()
        mlm_scheduler.step()
        total_loss += loss.item()
        wandb.log({"tapt/train_loss": loss.item()})
    print(f"TAPT Epoch {epoch+1}, Loss {total_loss/len(mlm_loader):.4f}")

mlm_model.save_pretrained("tapt_deberta_emotions")
tokenizer.save_pretrained("tapt_deberta_emotions")
wandb.finish()

del mlm_model, mlm_dataset, mlm_loader
torch.cuda.empty_cache()

tokenizer_config.json: 0.00B [00:00, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

2025-11-30 09:22:43.136711: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764494563.376405      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764494563.441990      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

model.safetensors:   0%|          | 0.00/568M [00:00<?, ?B/s]

Some weights of DebertaV2ForMaskedLM were not initialized from the model checkpoint at AnkitAI/deberta-v3-small-base-emotions-classifier and are newly initialized: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TAPT Epoch 1, Loss 1.1689


0,1
tapt/train_loss,█▇▅▅▅▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
tapt/train_loss,0.02918


In [13]:
## Define Weighted BCE + Exclusivity Loss (Original Cell 20)

# Weights are already calculated in the combined data setup cell, but recalculated here for safety/modularity
pos_weights = []
for i, col in enumerate(label_cols):
    pos = train_labels[:, i].sum()
    neg = len(train_labels) - pos
    w = 1.0 if pos == 0 else neg / pos
    pos_weights.append(torch.tensor(w))
pos_weights = torch.stack(pos_weights).float().to(DEVICE)

class WeightedBCEWithExclusivityLoss(nn.Module):
    def __init__(self, class_weights, lambda_excl=0.1):
        super().__init__()
        # nn.BCEWithLogitsLoss expects pos_weight for class imbalance
        self.bce = nn.BCEWithLogitsLoss(pos_weight=class_weights) 
        self.lambda_excl = lambda_excl

    def forward(self, outputs, targets):
        logits = outputs
        probs = torch.sigmoid(logits)
        bce_loss = self.bce(logits, targets)
        # Exclusivity penalty: difference between the sum of predicted probabilities
        # and the sum of true labels (encourages predicting only the necessary number of labels)
        penalty = ((probs.sum(1) - targets.sum(1)).abs()).mean()
        loss = bce_loss + self.lambda_excl * penalty
        return loss

criterion = WeightedBCEWithExclusivityLoss(pos_weights)

In [14]:
## Load TAPT Checkpoint and Setup DataLoaders for Fine-Tuning (Combined from Original Cells 16, 19)

# Reload tokenizer and classification model starting from TAPT weights
tokenizer = AutoTokenizer.from_pretrained("tapt_deberta_emotions")
model = AutoModelForSequenceClassification.from_pretrained(
    "tapt_deberta_emotions",
    num_labels=len(label_cols)
).to(DEVICE)

MAX_LEN = MAX_LEN_FT # Using 128 for Fine-Tuning

class EmotionDataset(Dataset):
    def __init__(self, texts, labels=None, tokenizer=None, max_len=MAX_LEN, augment=False):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.augment = augment

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        if self.augment:
            text = augment(text)
        enc = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt",
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        if self.labels is not None:
            label = torch.tensor(self.labels[idx]).float()
            return item, label
        else:
            return item

train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN, augment=True)
val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN, augment=False)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at tapt_deberta_emotions and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
import gc
from torch.cuda.amp import autocast, GradScaler

# Free up memory before training
gc.collect()
torch.cuda.empty_cache()

def train_one_epoch(model, loader, optimizer, scheduler, scaler, accumulation_steps):
    model.train()
    total_loss = 0
    optimizer.zero_grad()
    for step, (inputs, labels) in enumerate(loader):
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
        labels = labels.to(DEVICE)

        with autocast():
            logits = model(**inputs).logits
            loss = criterion(logits, labels)
            loss = loss / accumulation_steps

        scaler.scale(loss).backward()

        # Gradient accumulation step
        if (step + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()

        total_loss += loss.item() * accumulation_steps

    # Handle remaining gradients if batches not divisible by accumulation_steps
    if len(loader) % accumulation_steps != 0:
        # Only step if gradients exist
        if torch.is_grad_enabled():
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()

    avg_loss = total_loss / len(loader)
    return avg_loss

def eval_model(model, loader):
    model.eval()
    all_probs = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
            outputs = model(**inputs).logits
            probs = torch.sigmoid(outputs).cpu().numpy()
            all_probs.append(probs)
            all_labels.append(labels.numpy())

    all_probs = np.vstack(all_probs)
    all_labels = np.vstack(all_labels)
    preds_bin = (all_probs > 0.5).astype(int)
    macro_f1 = f1_score(all_labels[:, :5], preds_bin[:, :5], average="macro")
    return macro_f1, all_probs, all_labels

In [None]:
## Full Training Loop for Multiple Seeds (Original Cell 25)

EPOCHS = 8
LEARNING_RATE = 3e-5
GRAD_ACCUM_STEPS = 4
seeds = [42, 7, 2026]

def train_loop(seed):
    # Set seed for reproducibility
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    
    # Reload model from TAPT checkpoint
    model = AutoModelForSequenceClassification.from_pretrained(
        "tapt_deberta_emotions", num_labels=len(label_cols)
    ).to(DEVICE)
    
    optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
    # Calculate total steps for scheduler
    total_steps = len(train_loader) // GRAD_ACCUM_STEPS * EPOCHS
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=200, num_training_steps=total_steps)
    scaler = GradScaler()
    
    best_f1 = 0
    no_improve = 0
    patience = 1
    
    # WANDB setup for Fine-Tuning
    import wandb
    wandb.init(project="22f3001086-t32025", name=f"FT-Deberta-Seed{seed}", reinit=True)
    wandb.config.update({
        "phase": "Fine-Tuning",
        "seed": seed,
        "epochs": EPOCHS,
        "lr": LEARNING_RATE,
        "accum_steps": GRAD_ACCUM_STEPS,
        "max_len": MAX_LEN
    })
    
    for epoch in range(EPOCHS):
        train_loss = train_one_epoch(model, train_loader, optimizer, scheduler, scaler, GRAD_ACCUM_STEPS)
        val_f1, val_probs, val_labels_curr = eval_model(model, val_loader)
        
        wandb.log({"train/loss": train_loss, "val/macro_f1": val_f1, "epoch": epoch + 1})
        
        print(f"Seed {seed} Epoch {epoch + 1} - Train Loss: {train_loss:.4f}, Val F1: {val_f1:.4f}")
        
        if val_f1 > best_f1:
            best_f1 = val_f1
            no_improve = 0
            # Save the best model state dict for ensembling
            torch.save(model.state_dict(), f"best_model_seed{seed}.pt") 
        else:
            no_improve += 1
            if no_improve > patience:
                print("Early stopping.")
                break
                
    # Load the best model weights for the current seed
    model.load_state_dict(torch.load(f"best_model_seed{seed}.pt", map_location=DEVICE))
    model.eval()
    wandb.finish()
    
    # Recalculate validation probabilities using the best weights
    val_f1, val_probs, val_labels_curr = eval_model(model, val_loader) 
    
    return model, val_probs, val_labels_curr

models = []
val_probs_list = []
val_labels_list = []

for sd in seeds:
    model, val_probs, val_labels_curr = train_loop(sd)
    models.append(model)
    val_probs_list.append(val_probs)
    # The labels should be the same across runs, but keeping the check for safety
    val_labels_list.append(val_labels_curr) 

# Take the labels from the first run (they should all be the same)
val_labels = val_labels_list[0]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at tapt_deberta_emotions and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Seed 42 Epoch 1 - Train Loss: 0.8718, Val F1: 0.5713
Seed 42 Epoch 2 - Train Loss: 0.6149, Val F1: 0.7145
Seed 42 Epoch 3 - Train Loss: 0.4542, Val F1: 0.7586
Seed 42 Epoch 4 - Train Loss: 0.3543, Val F1: 0.7818
Seed 42 Epoch 5 - Train Loss: 0.2786, Val F1: 0.7990
Seed 42 Epoch 6 - Train Loss: 0.2296, Val F1: 0.8010
Seed 42 Epoch 7 - Train Loss: 0.1995, Val F1: 0.8087
Seed 42 Epoch 8 - Train Loss: 0.1822, Val F1: 0.8116


0,1
epoch,▁▂▃▄▅▆▇█
train/loss,█▅▄▃▂▁▁▁
val/macro_f1,▁▅▆▇████

0,1
epoch,8.0
train/loss,0.1822
val/macro_f1,0.81157


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at tapt_deberta_emotions and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Seed 7 Epoch 1 - Train Loss: 0.8642, Val F1: 0.6557
Seed 7 Epoch 2 - Train Loss: 0.6061, Val F1: 0.7098
Seed 7 Epoch 3 - Train Loss: 0.4608, Val F1: 0.7463
Seed 7 Epoch 4 - Train Loss: 0.3539, Val F1: 0.7778
Seed 7 Epoch 5 - Train Loss: 0.2834, Val F1: 0.7997
Seed 7 Epoch 6 - Train Loss: 0.2320, Val F1: 0.8036
Seed 7 Epoch 7 - Train Loss: 0.2057, Val F1: 0.8101
Seed 7 Epoch 8 - Train Loss: 0.1835, Val F1: 0.8084


0,1
epoch,▁▂▃▄▅▆▇█
train/loss,█▅▄▃▂▁▁▁
val/macro_f1,▁▃▅▇████

0,1
epoch,8.0
train/loss,0.18348
val/macro_f1,0.80841


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at tapt_deberta_emotions and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Seed 2026 Epoch 1 - Train Loss: 0.8685, Val F1: 0.6247
Seed 2026 Epoch 2 - Train Loss: 0.6143, Val F1: 0.7144
Seed 2026 Epoch 3 - Train Loss: 0.4554, Val F1: 0.7619
Seed 2026 Epoch 4 - Train Loss: 0.3600, Val F1: 0.7841
Seed 2026 Epoch 5 - Train Loss: 0.2907, Val F1: 0.7973


In [23]:
## Ensembling and Temperature Scaling (Combined from Original Cells 26, 27)

# --- Collect Logits for Ensembling ---

def collect_logits(model, loader):
    logits_list = []
    with torch.no_grad():
        # Iterate over validation loader with both inputs and labels
        for inputs, _ in loader: 
            inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
            logits = model(**inputs).logits
            logits_list.append(logits.cpu())
    return torch.cat(logits_list, dim=0)

# Ensemble logits for validation set (average across seeds)
val_logits_ensemble = torch.zeros(len(val_texts), len(label_cols))
for m in models:
    val_logits_ensemble += collect_logits(m, val_loader).cpu()
val_logits_ensemble /= len(models)
val_logits_ensemble = val_logits_ensemble.to(DEVICE)
val_labels_tensor = torch.tensor(val_labels).to(DEVICE).float()

# --- Temperature Scaling ---

class ModelWithTemperature(nn.Module):
    def __init__(self):
        super().__init__()
        self.temperature = nn.Parameter(torch.ones(1) * 1.0)
    def forward(self, logits):
        return logits / self.temperature

temp_model = ModelWithTemperature().to(DEVICE)
optimizer_t = AdamW(temp_model.parameters(), lr=0.01)

# Optimize temperature
for _ in range(200):
    optimizer_t.zero_grad()
    scaled_logits = temp_model(val_logits_ensemble)
    # Use BCEWithLogitsLoss (or equivalent) for calibration
    loss_t = nn.functional.binary_cross_entropy_with_logits(scaled_logits, val_labels_tensor) 
    loss_t.backward()
    optimizer_t.step()

T = temp_model.temperature.item()
print(f"Optimal temperature: {T:.4f}")

# Function to convert logits to calibrated probabilities
def logits_to_probs(logits, T):
    return torch.sigmoid(logits / T)

# Apply temperature to ensemble logits
val_probs_cal = logits_to_probs(val_logits_ensemble, T).cpu().numpy()

Optimal temperature: 1.3561


In [24]:
## Per-label Threshold Optimization (Original Cell 29)

def optimize_thresholds(y_true, y_probs, n_labels=5):
    # Search space for thresholds
    thresholds = np.linspace(0.05, 0.89, 85) 
    best_t = np.full(n_labels, 0.5)
    for i in range(n_labels):
        best_f1 = 0.0
        # Iterate through possible thresholds for label i
        for t in thresholds: 
            preds = (y_probs[:, i] > t).astype(int)
            f1 = f1_score(y_true[:, i], preds)
            if f1 > best_f1:
                best_f1 = f1
                best_t[i] = t
    return best_t

# Optimize on the first 5 labels (excluding 'disgust')
optimal_thresholds = optimize_thresholds(val_labels[:, :5], val_probs_cal[:, :5], n_labels=5) 
print(f"Optimal thresholds (anger, fear, joy, sadness, surprise): {optimal_thresholds}")

Optimal thresholds (anger, fear, joy, sadness, surprise): [0.87 0.43 0.57 0.64 0.3 ]


Optimal temperature: 1.6917624473571777
Optimal thresholds: [0.46 0.44 0.74 0.6  0.6 ]

Optimal thresholds (anger, fear, joy, sadness, surprise): [0.87 0.43 0.57 0.64 0.3 ]

In [28]:
## Test Data Preparation (Original Cell 30)

test_df = dt # Use dt (loaded test data)
test_texts = test_df["text"].tolist()

class TestDataset(Dataset):
    def __init__(self, texts, tokenizer, max_len=MAX_LEN):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_len = max_len
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt",
        )
        return {k: v.squeeze(0) for k, v in enc.items()}

test_dataset = TestDataset(test_texts, tokenizer)
# Use the same batch size as training/validation
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [29]:
# --- Inference on test set ---
test_logits_ensemble = []

with torch.no_grad():
    for inputs in test_loader:
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
        logits_sum = 0
        for m in models:
            logits_sum += m(**inputs).logits
        logits_mean = logits_sum / len(models)
        test_logits_ensemble.append(logits_mean.cpu())

test_logits_ensemble = torch.cat(test_logits_ensemble, dim=0)

# --- Apply Calibration and Thresholds ---
test_probs_cal = logits_to_probs(test_logits_ensemble.to(DEVICE), T).cpu().numpy()

# Initialize binary predictions
test_preds_bin = np.zeros_like(test_probs_cal, dtype=int)
# The optimal_thresholds array only has 5 values, so we use a default for 'disgust' (index 5)
thresholds_for_inference = np.append(optimal_thresholds, 0.5)

# Apply thresholds to all 6 columns
for i in range(len(label_cols)):
    test_preds_bin[:, i] = (test_probs_cal[:, i] > thresholds_for_inference[i]).astype(int)

# --- Prepare and save submission ---
submission_data = {
    "id": test_df["id"],
    # Only include the 5 required emotions in the submission
    **{label: test_preds_bin[:, i] for i, label in enumerate(base_label_cols)}
}

submission = pd.DataFrame(submission_data)
submission.to_csv("submission.csv", index=False)