In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# 5cfcb5e8ef8458be6e85d57c45c7573477e2ad6a

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings("ignore")

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/2025-sep-dl-gen-ai-project/sample_submission.csv
/kaggle/input/2025-sep-dl-gen-ai-project/train.csv
/kaggle/input/2025-sep-dl-gen-ai-project/test.csv


# Initial work

In [3]:
df = pd.read_csv("/kaggle/input/2025-sep-dl-gen-ai-project/train.csv") #training set
dt = pd.read_csv("/kaggle/input/2025-sep-dl-gen-ai-project/test.csv")  #test set

In [8]:
from sklearn.model_selection import train_test_split

label_cols = ['anger','fear','joy','sadness','surprise']

xtrain, xval, ytrain, yval = train_test_split(
    df['text'],
    df[label_cols].values,
    test_size=0.2,
    random_state=42,
    # shuffle=True
)

In [6]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# wdb_t = user_secrets.get_secret("WB_TOKEN")
# import wandb
# wandb.login(key=wdb_t)
# # wandb.init(project="22f3001086-t32025", name = "BERT+Classifier head")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Model n Definitions

In [10]:
import torch
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score
import numpy as np
import random
import wandb
print("imports done!")

imports done!


In [11]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

MODEL_NAME = "google/electra-base-discriminator"  # ‚Üê ELECTRA (was deberta-v3-base)
MAX_LEN = 160
BATCH_SIZE = 16  # ‚Üê ELECTRA is smaller ‚Üí can use 16 (was 8)
EPOCHS = 4
LR = 2e-5
label_cols = ['anger','fear','joy','sadness','surprise']
num_labels = len(label_cols)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(42)

In [12]:
# ytrain, yval already exist from your earlier split in final-robert.ipynb
ytrain_arr = np.array(ytrain)

# per-label pos_weight = (N_pos_reversed style) to upweight minority labels
label_counts = ytrain_arr.sum(axis=0)
total_counts = label_counts.sum()
pos_weight = (total_counts - label_counts) / (label_counts + 1e-6)
pos_weight = torch.tensor(pos_weight, dtype=torch.float32).to(DEVICE)

print("Label counts:", dict(zip(label_cols, label_counts)))
print("pos_weight:", pos_weight.cpu().numpy())

Label counts: {'anger': 647, 'fear': 3102, 'joy': 1339, 'sadness': 1740, 'surprise': 1611}
pos_weight: [12.043277   1.7205029  5.3024645  3.85       4.2383614]


In [14]:
class EmotionDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = list(texts)
        self.labels = np.array(labels, dtype=np.float32)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        enc = tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=MAX_LEN,
            return_tensors="pt"
        )
        item = {
            "input_ids": enc["input_ids"].squeeze(0),
            "attention_mask": enc["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx], dtype=torch.float32),
        }
        return item

train_ds = EmotionDataset(xtrain, ytrain_arr)
val_ds   = EmotionDataset(xval,   np.array(yval, dtype=np.float32))

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False)

In [16]:
class RobertaClassifier(nn.Module):
    def __init__(self, model_name, num_labels, dropout=0.1):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        hidden_size = self.encoder.config.hidden_size
        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(hidden_size, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        pooled = outputs.last_hidden_state[:, 0]  # CLS
        pooled = self.dropout(pooled)
        logits = self.out(pooled)
        return logits

model = RobertaClassifier(MODEL_NAME, num_labels).to(DEVICE)

In [17]:
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
num_train_steps = len(train_loader) * EPOCHS
num_warmup_steps = int(0.1 * num_train_steps)

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_train_steps
)

In [18]:
wandb.init(
    project="22f3001086-t32025",
    name="final_electra_posweigh",
    config={
        "model_name": MODEL_NAME,
        "max_len": MAX_LEN,
        "batch_size": BATCH_SIZE,
        "epochs": EPOCHS,
        "lr": LR,
    }
)
wandb.watch(model, log="all", log_freq=100)

In [19]:
def train_one_epoch(epoch):
    model.train()
    total_loss = 0.0

    for step, batch in enumerate(tqdm(train_loader, desc=f"Train epoch {epoch+1}")):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)

        optimizer.zero_grad()
        logits = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        if (step + 1) % 50 == 0:
            avg_loss = total_loss / (step + 1)
            print(f"Epoch {epoch+1} | Step {step+1}/{len(train_loader)} | Train loss: {avg_loss:.4f}")
            wandb.log({"train_step_loss": avg_loss})

    avg_epoch_loss = total_loss / len(train_loader)
    return avg_epoch_loss

In [20]:
from sklearn.metrics import precision_score, recall_score

def validate(epoch, thresholds=None):
    model.eval()
    total_loss = 0.0
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Valid epoch {epoch+1}"):
            input_ids = batch["input_ids"].to(DEVICE)
            attention_mask = batch["attention_mask"].to(DEVICE)
            labels = batch["labels"].to(DEVICE)

            logits = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(logits, labels)

            probs = torch.sigmoid(logits).cpu().numpy()
            all_probs.append(probs)
            all_labels.append(labels.cpu().numpy())

            total_loss += loss.item()

    avg_loss = total_loss / len(val_loader)
    all_probs = np.concatenate(all_probs, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    if thresholds is None:
        thresholds = np.full(num_labels, 0.5, dtype=float)

    preds = (all_probs >= thresholds[None, :]).astype(int)

    macro_f1 = f1_score(all_labels, preds, average="macro")
    macro_precision = precision_score(all_labels, preds, average="macro", zero_division=0)
    macro_recall = recall_score(all_labels, preds, average="macro", zero_division=0)
    mean_support = all_labels.sum(axis=0).mean()  # avg positives per label

    print(
        f"Epoch {epoch+1} | Val loss: {avg_loss:.4f} | "
        f"Macro F1: {macro_f1:.4f} | Macro P: {macro_precision:.4f} | Macro R: {macro_recall:.4f}"
    )

    wandb.log({
        "val_loss": avg_loss,
        "val_macro_f1": macro_f1,
        "val_macro_precision": macro_precision,
        "val_macro_recall": macro_recall,
        "val_mean_support": mean_support,
    })

    per_label_f1 = f1_score(all_labels, preds, average=None)
    for lbl, score in zip(label_cols, per_label_f1):
        wandb.log({f"val_f1_{lbl}": score})

    return avg_loss, macro_f1, all_probs, all_labels

In [21]:
best_f1 = 0.0
best_state = None

for epoch in range(EPOCHS):
    print(f"\n=== Epoch {epoch+1}/{EPOCHS} ===")
    train_loss = train_one_epoch(epoch)
    val_loss, val_f1, val_probs, val_labels = validate(epoch)
    
    wandb.log({
        "train_epoch_loss": train_loss,
        "epoch": epoch + 1
    })
    
    if val_f1 > best_f1:
        best_f1 = val_f1
        best_state = {
            "model_state_dict": model.state_dict(),
            "model_name": MODEL_NAME,
            "label_cols": label_cols
        }
        torch.save(best_state, "electra_best.pt")
        print(f"‚úÖ New best model saved! Macro F1 = {best_f1:.4f}")
    
    # Save checkpoint every epoch to avoid losing progress
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "best_f1": best_f1
    }, "checkpoint_e.pt")
    
    print(f"Best validation Macro F1 so far: {best_f1:.4f}")

print("üèÅ Training complete!")


=== Epoch 1/4 ===


Train epoch 1:  15%|‚ñà‚ñç        | 50/342 [00:13<01:17,  3.77it/s]

Epoch 1 | Step 50/342 | Train loss: 1.3079


Train epoch 1:  29%|‚ñà‚ñà‚ñâ       | 100/342 [00:29<04:07,  1.02s/it]

Epoch 1 | Step 100/342 | Train loss: 1.2819


Train epoch 1:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 150/342 [00:42<00:50,  3.79it/s]

Epoch 1 | Step 150/342 | Train loss: 1.2530


Train epoch 1:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 200/342 [00:57<02:18,  1.03it/s]

Epoch 1 | Step 200/342 | Train loss: 1.2100


Train epoch 1:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 250/342 [01:11<00:24,  3.79it/s]

Epoch 1 | Step 250/342 | Train loss: 1.1580


Train epoch 1:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 300/342 [01:25<00:32,  1.29it/s]

Epoch 1 | Step 300/342 | Train loss: 1.1097


Train epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 342/342 [01:36<00:00,  3.53it/s]
Valid epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 86/86 [00:08<00:00, 10.20it/s]


Epoch 1 | Val loss: 0.7273 | Macro F1: 0.6793 | Macro P: 0.5459 | Macro R: 0.9122
‚úÖ New best model saved! Macro F1 = 0.6793
Best validation Macro F1 so far: 0.6793

=== Epoch 2/4 ===


Train epoch 2:  15%|‚ñà‚ñç        | 50/342 [00:13<01:16,  3.80it/s]

Epoch 2 | Step 50/342 | Train loss: 0.6728


Train epoch 2:  29%|‚ñà‚ñà‚ñâ       | 100/342 [00:28<01:03,  3.79it/s]

Epoch 2 | Step 100/342 | Train loss: 0.6711


Train epoch 2:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 150/342 [00:41<00:50,  3.79it/s]

Epoch 2 | Step 150/342 | Train loss: 0.6725


Train epoch 2:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 200/342 [00:55<00:37,  3.80it/s]

Epoch 2 | Step 200/342 | Train loss: 0.6576


Train epoch 2:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 250/342 [01:09<00:24,  3.79it/s]

Epoch 2 | Step 250/342 | Train loss: 0.6498


Train epoch 2:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 300/342 [01:23<00:11,  3.80it/s]

Epoch 2 | Step 300/342 | Train loss: 0.6427


Train epoch 2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 342/342 [01:34<00:00,  3.61it/s]
Valid epoch 2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 86/86 [00:08<00:00, 10.23it/s]


Epoch 2 | Val loss: 0.5945 | Macro F1: 0.7577 | Macro P: 0.6549 | Macro R: 0.9010
‚úÖ New best model saved! Macro F1 = 0.7577
Best validation Macro F1 so far: 0.7577

=== Epoch 3/4 ===


Train epoch 3:  15%|‚ñà‚ñç        | 50/342 [00:14<01:29,  3.27it/s]

Epoch 3 | Step 50/342 | Train loss: 0.4881


Train epoch 3:  29%|‚ñà‚ñà‚ñâ       | 100/342 [00:27<01:03,  3.80it/s]

Epoch 3 | Step 100/342 | Train loss: 0.4815


Train epoch 3:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 150/342 [00:42<00:59,  3.25it/s]

Epoch 3 | Step 150/342 | Train loss: 0.4691


Train epoch 3:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 200/342 [00:55<00:37,  3.79it/s]

Epoch 3 | Step 200/342 | Train loss: 0.4575


Train epoch 3:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 250/342 [01:10<00:28,  3.23it/s]

Epoch 3 | Step 250/342 | Train loss: 0.4532


Train epoch 3:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 300/342 [01:24<00:11,  3.79it/s]

Epoch 3 | Step 300/342 | Train loss: 0.4436


Train epoch 3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 342/342 [01:35<00:00,  3.59it/s]
Valid epoch 3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 86/86 [00:08<00:00, 10.27it/s]


Epoch 3 | Val loss: 0.5771 | Macro F1: 0.7633 | Macro P: 0.6575 | Macro R: 0.9190
‚úÖ New best model saved! Macro F1 = 0.7633
Best validation Macro F1 so far: 0.7633

=== Epoch 4/4 ===


Train epoch 4:  15%|‚ñà‚ñç        | 50/342 [00:14<01:16,  3.79it/s]

Epoch 4 | Step 50/342 | Train loss: 0.3770


Train epoch 4:  29%|‚ñà‚ñà‚ñâ       | 100/342 [00:27<01:03,  3.79it/s]

Epoch 4 | Step 100/342 | Train loss: 0.3695


Train epoch 4:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 150/342 [00:42<00:50,  3.80it/s]

Epoch 4 | Step 150/342 | Train loss: 0.3648


Train epoch 4:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 200/342 [00:55<00:37,  3.79it/s]

Epoch 4 | Step 200/342 | Train loss: 0.3607


Train epoch 4:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 250/342 [01:10<00:24,  3.80it/s]

Epoch 4 | Step 250/342 | Train loss: 0.3566


Train epoch 4:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 300/342 [01:24<00:11,  3.79it/s]

Epoch 4 | Step 300/342 | Train loss: 0.3556


Train epoch 4: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 342/342 [01:36<00:00,  3.55it/s]
Valid epoch 4: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 86/86 [00:08<00:00, 10.33it/s]


Epoch 4 | Val loss: 0.5707 | Macro F1: 0.7909 | Macro P: 0.7057 | Macro R: 0.8997
‚úÖ New best model saved! Macro F1 = 0.7909
Best validation Macro F1 so far: 0.7909
üèÅ Training complete!


# Inferance

In [22]:
import numpy as np
from sklearn.metrics import f1_score

# Get validation predictions (run this after training completes)
def find_optimal_thresholds(val_probs, val_labels, label_cols, num_trials=100):
    """Find per-label thresholds that maximize Macro F1"""
    best_f1 = 0.0
    best_thresholds = np.full(len(label_cols), 0.5)
    
    # Test thresholds from 0.1 to 0.9
    thresholds = np.linspace(0.1, 0.9, num_trials)
    
    for thresh_comb in thresholds:
        # Test same threshold for all labels first
        preds = (val_probs >= thresh_comb).astype(int)
        macro_f1 = f1_score(val_labels, preds, average='macro')
        
        if macro_f1 > best_f1:
            best_f1 = macro_f1
            best_thresholds = np.full(len(label_cols), thresh_comb)
    
    # Per-label optimization (more precise)
    for i, label in enumerate(label_cols):
        label_probs = val_probs[:, i]
        label_true = val_labels[:, i]
        
        label_f1s = []
        for thresh in thresholds:
            pred = (label_probs >= thresh).astype(int)
            f1 = f1_score(label_true, pred)
            label_f1s.append(f1)
        
        best_thresh_idx = np.argmax(label_f1s)
        best_thresholds[i] = thresholds[best_thresh_idx]
    
    final_preds = (val_probs >= best_thresholds).astype(int)
    final_macro_f1 = f1_score(val_labels, final_preds, average='macro')
    
    print("üéØ Optimal Thresholds Found:")
    for i, (label, thresh) in enumerate(zip(label_cols, best_thresholds)):
        print(f"  {label}: {thresh:.3f}")
    
    print(f"\nüìä Results Comparison:")
    print(f"Default (0.5):  {f1_score(val_labels, (val_probs>=0.5).astype(int), average='macro'):.4f}")
    print(f"Optimized:      {final_macro_f1:.4f} (+{final_macro_f1 - 0.8085:+.4f})")
    
    return best_thresholds, final_macro_f1

# Run optimization (uses val_probs, val_labels from your last epoch)
optimal_thresholds, opt_f1 = find_optimal_thresholds(val_probs, val_labels, label_cols)
wandb.log({"val_macro_f1_optimized": opt_f1})

üéØ Optimal Thresholds Found:
  anger: 0.852
  fear: 0.722
  joy: 0.884
  sadness: 0.682
  surprise: 0.730

üìä Results Comparison:
Default (0.5):  0.7909
Optimized:      0.8181 (++0.0096)


In [None]:
berta_thresholds = [0.9, 0.561, 0.625, 0.795, 0.771]
electra_thresholds = [0.852, 0.722, 0.884, 0.682, 0.730]

In [24]:
# Test predictions with optimal thresholds
test_ds = EmotionDataset(dt['text'], np.zeros((len(dt), num_labels)))
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

all_test_probs = []
model.eval()
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Test inference"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        logits = model(input_ids, attention_mask)
        probs = torch.sigmoid(logits).cpu().numpy()
        all_test_probs.append(probs)

test_probs = np.concatenate(all_test_probs)

# Apply optimal thresholds
submission = dt[['id']].copy()
for i, col in enumerate(label_cols):
    submission[col] = (test_probs[:, i] >= optimal_thresholds[i]).astype(int)

submission.to_csv("submission_electra_optimized.csv", index=False)
print("‚úÖ Optimized submission saved!")
print("\nFirst 5 predictions:")
submission.head()

Test inference: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 107/107 [00:10<00:00, 10.61it/s]

‚úÖ Optimized submission saved!

First 5 predictions:





Unnamed: 0,id,anger,fear,joy,sadness,surprise
0,0,1,1,0,0,0
1,1,0,0,0,0,0
2,2,1,1,0,0,0
3,3,0,1,0,0,0
4,4,0,1,0,0,1


In [25]:
# 1. Delete current model completely
del model
torch.cuda.empty_cache()

# 2. Recreate FRESH model with EXACT training architecture 
MODEL_NAME = "google/electra-base-discriminator"  # Your DeBERTa training model or roberta-base
model = RobertaClassifier(MODEL_NAME, num_labels=num_labels).to(DEVICE)

# 3. Load with strict=False (ignores pooler mismatches)
best_state = torch.load("electra_best.pt")  # Your DeBERTa checkpoint
model.load_state_dict(best_state["model_state_dict"], strict=False)
model.eval()

print("‚úÖ Model loaded successfully!")

‚úÖ Model loaded successfully!


In [26]:
# DeBERTa test inference (same pattern)
MODEL_NAME = "google/electra-base-discriminator"
model = RobertaClassifier(MODEL_NAME, num_labels).to(DEVICE)
best_state = torch.load("electra_best.pt")
model.load_state_dict(best_state["model_state_dict"], strict=False)
model.eval()

test_ds = EmotionDataset(dt['text'], np.zeros((len(dt), num_labels)))
test_loader = DataLoader(test_ds, batch_size=8, shuffle=False)  # DeBERTa batch=8

all_test_probs = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="DeBERTa test"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        logits = model(input_ids, attention_mask)
        probs = torch.sigmoid(logits).cpu().numpy()
        all_test_probs.append(probs)

np.save('electra_test_probs.npy', np.concatenate(all_test_probs))
print("‚úÖ ELECTRA probs saved!")

DeBERTa test: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:09<00:00, 22.36it/s]

‚úÖ ELECTRA probs saved!





In [None]:
# import pandas as pd
# import numpy as np

# # Load your submission
# submission = pd.read_csv("/kaggle/working/submission_roberta_optimized.csv")
# label_cols = ['anger', 'fear', 'joy', 'sadness', 'surprise']

# # Convert binary predictions to probability estimates
# # High confidence for predicted labels, low for others
# binary_preds = submission[label_cols].values
# roberta_test_probs = np.zeros_like(binary_preds, dtype=np.float32)

# # Map 0‚Üí0.25, 1‚Üí0.75 (good for ensemble weighting)
# roberta_test_probs[binary_preds == 1] = 0.75
# roberta_test_probs[binary_preds == 0] = 0.25

# # Save for ensemble
# np.save('roberta_test_probs.npy', roberta_test_probs)
# print("‚úÖ roberta_test_probs.npy created from submission.csv!")
# print(f"Shape: {roberta_test_probs.shape}")
# print(f"Sample:\n{roberta_test_probs[:3]}")
# print("\nFirst 5 rows match submission:")
# print(submission[label_cols].head())

In [27]:
wandb.finish()

0,1
epoch,‚ñÅ‚ñÉ‚ñÜ‚ñà
train_epoch_loss,‚ñà‚ñÑ‚ñÇ‚ñÅ
train_step_loss,‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
val_f1_anger,‚ñÅ‚ñá‚ñÖ‚ñà
val_f1_fear,‚ñÅ‚ñÜ‚ñÜ‚ñà
val_f1_joy,‚ñÅ‚ñÖ‚ñà‚ñà
val_f1_sadness,‚ñÅ‚ñÜ‚ñá‚ñà
val_f1_surprise,‚ñÅ‚ñÖ‚ñÖ‚ñà
val_loss,‚ñà‚ñÇ‚ñÅ‚ñÅ
val_macro_f1,‚ñÅ‚ñÜ‚ñÜ‚ñà

0,1
epoch,4.0
train_epoch_loss,0.35532
train_step_loss,0.35556
val_f1_anger,0.71545
val_f1_fear,0.83245
val_f1_joy,0.80166
val_f1_sadness,0.78884
val_f1_surprise,0.81591
val_loss,0.5707
val_macro_f1,0.79086


In [None]:
import pandas as pd
import os

# 1. Load some CSV from the input directory (example path)
input_path = "/kaggle/input/sub-files-dlgenai/submission_bbl_tuned.csv"
df_sub = pd.read_csv(input_path)

# Optional: quick sanity check
print(df_sub.head())
print("\nColumns :")
print(df_sub.columns)

# 2. Save it as the competition submission file in the working/output dir
df_sub.to_csv("submission.csv", index=False)

print("Done!")