{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[{"sourceId":115439,"databundleVersionId":13800781,"sourceType":"competition"}],"dockerImageVersionId":31193,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n# 5cfcb5e8ef8458be6e85d57c45c7573477e2ad6a\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\nimport warnings\nwarnings.filterwarnings(\"ignore\")\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:46:11.690202Z","iopub.execute_input":"2025-11-29T15:46:11.690758Z","iopub.status.idle":"2025-11-29T15:46:11.947963Z","shell.execute_reply.started":"2025-11-29T15:46:11.690736Z","shell.execute_reply":"2025-11-29T15:46:11.947356Z"}},"outputs":[],"execution_count":1},{"cell_type":"code","source":"df = pd.read_csv(\"/kaggle/input/2025-sep-dl-gen-ai-project/train.csv\") #training set\ndt = pd.read_csv(\"/kaggle/input/2025-sep-dl-gen-ai-project/test.csv\")  #test set","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:46:15.098527Z","iopub.execute_input":"2025-11-29T15:46:15.099350Z","iopub.status.idle":"2025-11-29T15:46:15.142927Z","shell.execute_reply.started":"2025-11-29T15:46:15.099325Z","shell.execute_reply":"2025-11-29T15:46:15.142413Z"}},"outputs":[],"execution_count":2},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\n\nlabel_cols = ['anger','fear','joy','sadness','surprise']\n\nxtrain, xval, ytrain, yval = train_test_split(\n    df['text'],\n    df[label_cols].values,\n    test_size=0.2,\n    random_state=42,\n    shuffle=True\n)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:46:17.795665Z","iopub.execute_input":"2025-11-29T15:46:17.796317Z","iopub.status.idle":"2025-11-29T15:46:18.381338Z","shell.execute_reply.started":"2025-11-29T15:46:17.796293Z","shell.execute_reply":"2025-11-29T15:46:18.380475Z"}},"outputs":[],"execution_count":3},{"cell_type":"code","source":"# from kaggle_secrets import UserSecretsClient\n# user_secrets = UserSecretsClient()\n# wdb_t = user_secrets.get_secret(\"WB_TOKEN\")\n# import wandb\n# wandb.login(key=wdb_t)\n# # wandb.init(project=\"22f3001086-t32025\", name = \"BERT+Classifier head\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:46:23.042168Z","iopub.execute_input":"2025-11-29T15:46:23.042858Z","iopub.status.idle":"2025-11-29T15:46:33.137045Z","shell.execute_reply.started":"2025-11-29T15:46:23.042832Z","shell.execute_reply":"2025-11-29T15:46:33.136427Z"}},"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n\u001b[34m\u001b[1mwandb\u001b[0m: No netrc file found, creating one.\n\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mvaishnavib\u001b[0m (\u001b[33mvaishnavib-iitm-jntuh-\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n","output_type":"stream"},{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}],"execution_count":4},{"cell_type":"code","source":"# !pip install nlpaug","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:46:47.972454Z","iopub.execute_input":"2025-11-29T15:46:47.973200Z","iopub.status.idle":"2025-11-29T15:46:52.889050Z","shell.execute_reply.started":"2025-11-29T15:46:47.973175Z","shell.execute_reply":"2025-11-29T15:46:52.888303Z"}},"outputs":[{"name":"stdout","text":"Collecting nlpaug\n  Downloading nlpaug-1.1.11-py3-none-any.whl.metadata (14 kB)\nRequirement already satisfied: numpy>=1.16.2 in /usr/local/lib/python3.11/dist-packages (from nlpaug) (1.26.4)\nRequirement already satisfied: pandas>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from nlpaug) (2.2.3)\nRequirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.11/dist-packages (from nlpaug) (2.32.5)\nRequirement already satisfied: gdown>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from nlpaug) (5.2.0)\nRequirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.11/dist-packages (from gdown>=4.0.0->nlpaug) (4.13.4)\nRequirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from gdown>=4.0.0->nlpaug) (3.20.0)\nRequirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from gdown>=4.0.0->nlpaug) (4.67.1)\nRequirement already satisfied: mkl_fft in /usr/local/lib/python3.11/dist-packages (from numpy>=1.16.2->nlpaug) (1.3.8)\nRequirement already satisfied: mkl_random in /usr/local/lib/python3.11/dist-packages (from numpy>=1.16.2->nlpaug) (1.2.4)\nRequirement already satisfied: mkl_umath in /usr/local/lib/python3.11/dist-packages (from numpy>=1.16.2->nlpaug) (0.1.1)\nRequirement already satisfied: mkl in /usr/local/lib/python3.11/dist-packages (from numpy>=1.16.2->nlpaug) (2025.3.0)\nRequirement already satisfied: tbb4py in /usr/local/lib/python3.11/dist-packages (from numpy>=1.16.2->nlpaug) (2022.3.0)\nRequirement already satisfied: mkl-service in /usr/local/lib/python3.11/dist-packages (from numpy>=1.16.2->nlpaug) (2.4.1)\nRequirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.2.0->nlpaug) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.2.0->nlpaug) (2025.2)\nRequirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.2.0->nlpaug) (2025.2)\nRequirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.22.0->nlpaug) (3.4.4)\nRequirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.22.0->nlpaug) (3.11)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.22.0->nlpaug) (2.5.0)\nRequirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.22.0->nlpaug) (2025.10.5)\nRequirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas>=1.2.0->nlpaug) (1.17.0)\nRequirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4->gdown>=4.0.0->nlpaug) (2.7)\nRequirement already satisfied: typing-extensions>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4->gdown>=4.0.0->nlpaug) (4.15.0)\nRequirement already satisfied: onemkl-license==2025.3.0 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.16.2->nlpaug) (2025.3.0)\nRequirement already satisfied: intel-openmp<2026,>=2024 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.16.2->nlpaug) (2024.2.0)\nRequirement already satisfied: tbb==2022.* in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.16.2->nlpaug) (2022.3.0)\nRequirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.11/dist-packages (from tbb==2022.*->mkl->numpy>=1.16.2->nlpaug) (1.4.0)\nRequirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.11/dist-packages (from mkl_umath->numpy>=1.16.2->nlpaug) (2024.2.0)\nRequirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.11/dist-packages (from requests[socks]->gdown>=4.0.0->nlpaug) (1.7.1)\nRequirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.11/dist-packages (from intel-openmp<2026,>=2024->mkl->numpy>=1.16.2->nlpaug) (2024.2.0)\nDownloading nlpaug-1.1.11-py3-none-any.whl (410 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.5/410.5 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: nlpaug\nSuccessfully installed nlpaug-1.1.11\n","output_type":"stream"}],"execution_count":5},{"cell_type":"code","source":"# # --- SETUP AND IMPORTS ---\n# import torch\n# import torch.nn as nn\n# from torch.utils.data import Dataset, DataLoader\n# from sklearn.metrics import f1_score\n# from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup\n# from torch.optim import AdamW\n# from tqdm.auto import tqdm\n# import nlpaug.augmenter.word as naw\n# import wandb\n\n# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n# print(f\"Using device: {DEVICE}\")\n\n# CONFIG = {\n#     'MODEL_NAME': 'microsoft/deberta-v3-base',   # DeBERTa\n#     'LEARNING_RATE': 2e-5,\n#     'BATCH_SIZE': 32,\n#     'EPOCHS': 6,\n#     'MAX_LEN': 128,\n#     'RANDOM_SEED': 42,\n#     'DROPOUT': 0.1,\n#     'WEIGHT_DECAY': 0.01,\n#     'WARMUP_RATIO': 0.1,\n#     'OUTPUT_DIM': 5\n# }\n\n# torch.manual_seed(CONFIG['RANDOM_SEED'])\n# np.random.seed(CONFIG['RANDOM_SEED'])\n\n# # --- NLP AUGMENTER (same as RoBERTa) ---\n# import nltk\n# nltk.download('averaged_perceptron_tagger_eng')\n# syn_aug = naw.SynonymAug(aug_max=1, stopwords=['i', 'am', 'the', 'a', 'to', 'is'])\n\n# def augment_sample(row, aug_count=1):\n#     new_rows = []\n#     text = row['text']\n#     for _ in range(aug_count):\n#         aug_text = syn_aug.augment(text)[0]\n#         new_row = row.copy()\n#         new_row['text'] = aug_text\n#         new_rows.append(new_row)\n#     return pd.DataFrame(new_rows)\n\n# # Build full train/val dataframes\n# df_full = df.copy()\n# df_train, df_val = train_test_split(\n#     df_full,\n#     test_size=0.2,\n#     random_state=CONFIG['RANDOM_SEED'],\n#     shuffle=True\n# )\n\n# print(f\"Initial Training Samples: {len(df_train)}\")\n\n# # --- TARGETED AUGMENTATION FOR ANGER & JOY ---\n# anger_samples = df_train[df_train['anger'] == 1]\n# joy_samples   = df_train[df_train['joy'] == 1]\n\n# print(f\"Initial Anger Samples: {len(anger_samples)}\")\n# print(f\"Initial Joy Samples: {len(joy_samples)}\")\n\n# augmented_parts = []\n\n# # anger: 2x augmentation\n# for _, row in anger_samples.iterrows():\n#     augmented_parts.append(augment_sample(row, aug_count=2))\n\n# # joy: 1x augmentation\n# for _, row in joy_samples.iterrows():\n#     augmented_parts.append(augment_sample(row, aug_count=1))\n\n# if augmented_parts:\n#     df_aug = pd.concat(augmented_parts, ignore_index=True)\n#     df_train_aug = pd.concat([df_train, df_aug], ignore_index=True)\n# else:\n#     df_train_aug = df_train.copy()\n\n# print(f\"Training samples after augmentation: {len(df_train_aug)}\")\n\n# # --- POSITIVE CLASS WEIGHTS FOR BCE ---\n# CLASS_COUNTS = {\n#     'anger': 808,\n#     'fear': 3860,\n#     'joy': 1660,\n#     'sadness': 2171,\n#     'surprise': 1999\n# }\n\n# N_TRAIN = len(df_train_aug)\n# pos_weight_list = []\n# for label in label_cols:\n#     total_count_i = CLASS_COUNTS[label]\n#     count_i = max(1, int(total_count_i * 0.8))  # same heuristic as RoBERTa script\n#     weight = (N_TRAIN - count_i) / count_i\n#     pos_weight_list.append(weight)\n\n# POS_WEIGHTS = torch.tensor(pos_weight_list, dtype=torch.float).to(DEVICE)\n# print(\"pos_weights:\", POS_WEIGHTS.tolist())","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:52:43.417561Z","iopub.execute_input":"2025-11-29T15:52:43.418196Z","iopub.status.idle":"2025-11-29T15:52:46.673150Z","shell.execute_reply.started":"2025-11-29T15:52:43.418171Z","shell.execute_reply":"2025-11-29T15:52:46.672415Z"}},"outputs":[{"name":"stdout","text":"Using device: cuda\nInitial Training Samples: 5461\nInitial Anger Samples: 647\nInitial Joy Samples: 1339\n","output_type":"stream"},{"name":"stderr","text":"[nltk_data] Downloading package averaged_perceptron_tagger_eng to\n[nltk_data]     /usr/share/nltk_data...\n[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-\n[nltk_data]       date!\n","output_type":"stream"},{"name":"stdout","text":"Training samples after augmentation: 8094\npos_weights: [11.529411315917969, 1.6211140155792236, 5.094879627227783, 3.662442445755005, 4.06191349029541]\n","output_type":"stream"}],"execution_count":7},{"cell_type":"markdown","source":"Training samples after augmentation: 8094\npos_weights: [11.529411315917969, 1.6211140155792236, 5.094879627227783, 3.662442445755005, 4.06191349029541]","metadata":{}},{"cell_type":"code","source":"# # --- DATASET ---\n# class EmotionDataset(Dataset):\n#     def __init__(self, df, tokenizer, max_len):\n#         self.texts = df['text'].values\n#         self.labels = df[label_cols].values\n#         self.tokenizer = tokenizer\n#         self.max_len = max_len\n\n#     def __len__(self):\n#         return len(self.texts)\n\n#     def __getitem__(self, index):\n#         text = str(self.texts[index])\n#         labels = self.labels[index]\n\n#         enc = self.tokenizer.encode_plus(\n#             text,\n#             add_special_tokens=True,\n#             max_length=self.max_len,\n#             padding='max_length',\n#             truncation=True,\n#             return_attention_mask=True,\n#             return_tensors='pt',\n#         )\n\n#         return {\n#             'input_ids': enc['input_ids'].flatten(),\n#             'attention_mask': enc['attention_mask'].flatten(),\n#             'labels': torch.tensor(labels, dtype=torch.float)\n#         }\n\n# tokenizer = AutoTokenizer.from_pretrained(CONFIG['MODEL_NAME'])\n\n# train_ds = EmotionDataset(df_train_aug, tokenizer, CONFIG['MAX_LEN'])\n# val_ds   = EmotionDataset(df_val,       tokenizer, CONFIG['MAX_LEN'])\n\n# train_loader = DataLoader(train_ds, batch_size=CONFIG['BATCH_SIZE'], shuffle=True,  num_workers=2)\n# val_loader   = DataLoader(val_ds,   batch_size=CONFIG['BATCH_SIZE'], shuffle=False, num_workers=2)\n\n# # --- DEBERTA CLASSIFIER ---\n# class DebertaClassifier(nn.Module):\n#     def __init__(self, n_classes, model_name, dropout):\n#         super(DebertaClassifier, self).__init__()\n#         self.backbone = AutoModel.from_pretrained(model_name)\n#         self.drop = nn.Dropout(p=dropout)\n#         self.classifier = nn.Linear(self.backbone.config.hidden_size, n_classes)\n\n#     def forward(self, input_ids, attention_mask):\n#         out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)\n#         pooled = out.last_hidden_state[:, 0, :]\n#         pooled = self.drop(pooled)\n#         logits = self.classifier(pooled)\n#         return logits\n\n# # --- THRESHOLD UTILITIES ---\n# def find_optimal_thresholds(y_true, y_probs, label_cols, num_thresholds=100):\n#     optimal_thresholds = {}\n#     for i, label in enumerate(label_cols):\n#         best_f1, best_t = 0.0, 0.5\n#         for t in np.linspace(0.0, 1.0, num_thresholds):\n#             y_pred_l = (y_probs[:, i] > t).astype(int)\n#             f1 = f1_score(y_true[:, i], y_pred_l, zero_division=0)\n#             if f1 > best_f1:\n#                 best_f1, best_t = f1, t\n#         optimal_thresholds[label] = best_t\n#         print(f\" > Optimal threshold for {label}: {best_t:.4f} (Label F1: {best_f1:.4f})\")\n#     return optimal_thresholds\n\n# def calculate_macro_f1(y_true, y_probs, optimal_thresholds, label_cols):\n#     y_pred = np.zeros_like(y_true)\n#     for i, label in enumerate(label_cols):\n#         t = optimal_thresholds[label]\n#         y_pred[:, i] = (y_probs[:, i] > t).astype(int)\n#     return f1_score(y_true, y_pred, average='macro', zero_division=0)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:54:06.363823Z","iopub.execute_input":"2025-11-29T15:54:06.364392Z","iopub.status.idle":"2025-11-29T15:54:07.353163Z","shell.execute_reply.started":"2025-11-29T15:54:06.364368Z","shell.execute_reply":"2025-11-29T15:54:07.352293Z"}},"outputs":[],"execution_count":9},{"cell_type":"code","source":"# def train_model():\n#     LABEL_COLS = label_cols\n#     wandb.init(project=\"22f3001086-t32025\", name = \"Finetuned DeBERTa\", config=CONFIG)\n#     cfg = wandb.config\n\n#     model = DebertaClassifier(\n#         n_classes=cfg.OUTPUT_DIM,\n#         model_name=cfg.MODEL_NAME,\n#         dropout=cfg.DROPOUT\n#     ).to(DEVICE)\n\n#     loss_fn = nn.BCEWithLogitsLoss(pos_weight=POS_WEIGHTS).to(DEVICE)\n\n#     optimizer = AdamW(\n#         model.parameters(),\n#         lr=cfg.LEARNING_RATE,\n#         weight_decay=cfg.WEIGHT_DECAY\n#     )\n\n#     total_steps = len(train_loader) * cfg.EPOCHS\n#     warmup_steps = int(total_steps * cfg.WARMUP_RATIO)\n#     scheduler = get_linear_schedule_with_warmup(\n#         optimizer,\n#         num_warmup_steps=warmup_steps,\n#         num_training_steps=total_steps\n#     )\n\n#     best_macro_f1 = -1.0\n\n#     for epoch in range(cfg.EPOCHS):\n#         # TRAIN\n#         model.train()\n#         total_loss = 0.0\n#         for batch in tqdm(train_loader, desc=f\"Epoch {epoch+1} Training\", leave=False):\n#             optimizer.zero_grad()\n#             input_ids = batch['input_ids'].to(DEVICE)\n#             attention_mask = batch['attention_mask'].to(DEVICE)\n#             labels = batch['labels'].to(DEVICE)\n\n#             logits = model(input_ids=input_ids, attention_mask=attention_mask)\n#             loss = loss_fn(logits, labels)\n#             total_loss += loss.item()\n\n#             loss.backward()\n#             nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n#             optimizer.step()\n#             scheduler.step()\n\n#         avg_train_loss = total_loss / len(train_loader)\n\n#         # VALIDATION\n#         model.eval()\n#         all_labels, all_probs = [], []\n#         with torch.no_grad():\n#             for batch in tqdm(val_loader, desc=\"Validation\", leave=False):\n#                 input_ids = batch['input_ids'].to(DEVICE)\n#                 attention_mask = batch['attention_mask'].to(DEVICE)\n#                 labels = batch['labels'].cpu().numpy()\n\n#                 logits = model(input_ids=input_ids, attention_mask=attention_mask)\n#                 probs = torch.sigmoid(logits).cpu().numpy()\n\n#                 all_labels.append(labels)\n#                 all_probs.append(probs)\n\n#         y_true = np.vstack(all_labels)\n#         y_probs = np.vstack(all_probs)\n\n#         print(f\"\\n--- Finding Optimal Thresholds for Epoch {epoch+1} ---\")\n#         optimal_thresholds = find_optimal_thresholds(y_true, y_probs, LABEL_COLS)\n#         macro_f1_val = calculate_macro_f1(y_true, y_probs, optimal_thresholds, LABEL_COLS)\n\n#         print(f\"\\n--- Epoch {epoch+1} Results ---\")\n#         print(f\"Train Loss: {avg_train_loss:.4f}\")\n#         print(f\"Validation Macro F1 (Optimal Thresholds): {macro_f1_val:.4f}\")\n\n#         wandb.log({\n#             \"epoch\": epoch + 1,\n#             \"train_loss\": avg_train_loss,\n#             \"val_macro_f1\": macro_f1_val,\n#             \"learning_rate\": optimizer.param_groups[0][\"lr\"],\n#             **{f\"T_{k}\": v for k, v in optimal_thresholds.items()}\n#         })\n\n#         if macro_f1_val > best_macro_f1:\n#             best_macro_f1 = macro_f1_val\n#             print(f\"New best model! Saving with Macro F1: {best_macro_f1:.4f}\")\n#             torch.save(model.state_dict(), f\"best_deberta_{best_macro_f1:.4f}.pt\")\n#             np.save(\"optimal_thresholds_deberta.npy\", optimal_thresholds)\n\n#     wandb.finish()\n\n# train_model()\n# print(\"\\nTraining complete.\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T15:57:13.693197Z","iopub.execute_input":"2025-11-29T15:57:13.693509Z","iopub.status.idle":"2025-11-29T16:10:49.726763Z","shell.execute_reply.started":"2025-11-29T15:57:13.693476Z","shell.execute_reply":"2025-11-29T16:10:49.725860Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Finishing previous runs because reinit is set to 'default'."},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run <strong style=\"color:#cdcd00\">Finetuned DeBERTa</strong> at: <a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025/runs/6wqjilmc' target=\"_blank\">https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025/runs/6wqjilmc</a><br> View project at: <a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025' target=\"_blank\">https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025</a><br>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Find logs at: <code>./wandb/run-20251129_155414-6wqjilmc/logs</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.21.0"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20251129_155713-4vxh17pt</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025/runs/4vxh17pt' target=\"_blank\">Finetuned DeBERTa</a></strong> to <a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025' target=\"_blank\">https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025/runs/4vxh17pt' target=\"_blank\">https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025/runs/4vxh17pt</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Epoch 1 Training:   0%|          | 0/253 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validation:   0%|          | 0/43 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"\n--- Finding Optimal Thresholds for Epoch 1 ---\n > Optimal threshold for anger: 0.9091 (Label F1: 0.7173)\n > Optimal threshold for fear: 0.5253 (Label F1: 0.8120)\n > Optimal threshold for joy: 0.8485 (Label F1: 0.7515)\n > Optimal threshold for sadness: 0.6667 (Label F1: 0.7670)\n > Optimal threshold for surprise: 0.7879 (Label F1: 0.7581)\n\n--- Epoch 1 Results ---\nTrain Loss: 1.0864\nValidation Macro F1 (Optimal Thresholds): 0.7612\nNew best model! Saving with Macro F1: 0.7612\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Epoch 2 Training:   0%|          | 0/253 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validation:   0%|          | 0/43 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"\n--- Finding Optimal Thresholds for Epoch 2 ---\n > Optimal threshold for anger: 0.8788 (Label F1: 0.7601)\n > Optimal threshold for fear: 0.7677 (Label F1: 0.8465)\n > Optimal threshold for joy: 0.5051 (Label F1: 0.8051)\n > Optimal threshold for sadness: 0.8182 (Label F1: 0.8000)\n > Optimal threshold for surprise: 0.6566 (Label F1: 0.8177)\n\n--- Epoch 2 Results ---\nTrain Loss: 0.5477\nValidation Macro F1 (Optimal Thresholds): 0.8059\nNew best model! Saving with Macro F1: 0.8059\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Epoch 3 Training:   0%|          | 0/253 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validation:   0%|          | 0/43 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"\n--- Finding Optimal Thresholds for Epoch 3 ---\n > Optimal threshold for anger: 0.6768 (Label F1: 0.7697)\n > Optimal threshold for fear: 0.6869 (Label F1: 0.8701)\n > Optimal threshold for joy: 0.9091 (Label F1: 0.8406)\n > Optimal threshold for sadness: 0.8788 (Label F1: 0.8331)\n > Optimal threshold for surprise: 0.5556 (Label F1: 0.8418)\n\n--- Epoch 3 Results ---\nTrain Loss: 0.3380\nValidation Macro F1 (Optimal Thresholds): 0.8311\nNew best model! Saving with Macro F1: 0.8311\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Epoch 4 Training:   0%|          | 0/253 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validation:   0%|          | 0/43 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"\n--- Finding Optimal Thresholds for Epoch 4 ---\n > Optimal threshold for anger: 0.7475 (Label F1: 0.8091)\n > Optimal threshold for fear: 0.6768 (Label F1: 0.8797)\n > Optimal threshold for joy: 0.9091 (Label F1: 0.8540)\n > Optimal threshold for sadness: 0.6465 (Label F1: 0.8521)\n > Optimal threshold for surprise: 0.9091 (Label F1: 0.8533)\n\n--- Epoch 4 Results ---\nTrain Loss: 0.2248\nValidation Macro F1 (Optimal Thresholds): 0.8496\nNew best model! Saving with Macro F1: 0.8496\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Epoch 5 Training:   0%|          | 0/253 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validation:   0%|          | 0/43 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"\n--- Finding Optimal Thresholds for Epoch 5 ---\n > Optimal threshold for anger: 0.5758 (Label F1: 0.7962)\n > Optimal threshold for fear: 0.6263 (Label F1: 0.8817)\n > Optimal threshold for joy: 0.9192 (Label F1: 0.8668)\n > Optimal threshold for sadness: 0.7980 (Label F1: 0.8548)\n > Optimal threshold for surprise: 0.7576 (Label F1: 0.8622)\n\n--- Epoch 5 Results ---\nTrain Loss: 0.1617\nValidation Macro F1 (Optimal Thresholds): 0.8523\nNew best model! Saving with Macro F1: 0.8523\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Epoch 6 Training:   0%|          | 0/253 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stderr","text":"Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>\nTraceback (most recent call last):\nException ignored in:   File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\n<function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>\n    Traceback (most recent call last):\nself._shutdown_workers()\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\n      File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\nself._shutdown_workers()\n      File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\nif w.is_alive():\n     if w.is_alive():\n           ^  ^^^^^^^^^^^^^^^^^^^^^^^\n\n  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n    assert self._parent_pid == os.getpid(), 'can only test a child process'  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n\n     assert self._parent_pid == os.getpid(), 'can only test a child process'  \n                ^ ^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n^^AssertionError: ^can only test a child process\n\nAssertionError: can only test a child processException ignored in: \n<function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>Exception ignored in: \n<function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>Traceback (most recent call last):\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\n\n    Traceback (most recent call last):\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\nself._shutdown_workers()    \nself._shutdown_workers()  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\n\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\n        if w.is_alive():if w.is_alive():\n \n             ^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n^    \nassert self._parent_pid == os.getpid(), 'can only test a child process'  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n\n     assert self._parent_pid == os.getpid(), 'can only test a child process'\n                 ^ ^ ^ ^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n^AssertionError^: can only test a child process^\n^Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>\n^Traceback (most recent call last):\n^^  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\n^    ^^self._shutdown_workers()\nAssertionError\n:   File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\ncan only test a child process\n    if w.is_alive():Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>\nTraceback (most recent call last):\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\n    self._shutdown_workers()\n\n   File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\n      if w.is_alive(): \n      ^ ^^   ^^^^^^^^^^^^^^^^^^\n^^  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n^    assert self._parent_pid == os.getpid(), 'can only test a child process'\n  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n\n      assert self._parent_pid == os.getpid(), 'can only test a child process' \n              ^  ^ ^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n^AssertionError^: ^can only test a child process\n^^Exception ignored in: \n<function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>AssertionError\n: Traceback (most recent call last):\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\ncan only test a child process    \nself._shutdown_workers()Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>\n\nTraceback (most recent call last):\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\n        self._shutdown_workers()if w.is_alive():\n\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\n      if w.is_alive(): \n       ^^ ^ ^ ^ ^^^^^^^^^^^^^^^\n^  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n^    assert self._parent_pid == os.getpid(), 'can only test a child process'^\n^   \n  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n     assert self._parent_pid == os.getpid(), 'can only test a child process' \n            ^^ ^ ^ ^ ^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n^^AssertionError^: \nAssertionErrorcan only test a child process: \ncan only test a child process\nException ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>\nTraceback (most recent call last):\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\nException ignored in:     <function _MultiProcessingDataLoaderIter.__del__ at 0x7a23cd5d2de0>self._shutdown_workers()\n\nTraceback (most recent call last):\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1618, in __del__\n  File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\n        if w.is_alive():self._shutdown_workers()\n\n   File \"/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py\", line 1601, in _shutdown_workers\n      if w.is_alive():\n          ^ ^^^^^^^^^^^^^^^^^^^^^^\n^\n  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n    assert self._parent_pid == os.getpid(), 'can only test a child process'  File \"/usr/lib/python3.11/multiprocessing/process.py\", line 160, in is_alive\n    \nassert self._parent_pid == os.getpid(), 'can only test a child process'   \n                ^ ^^ ^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n^^AssertionError^: ^can only test a child process^\n^^^^^^^^\nAssertionError: can only test a child process\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Validation:   0%|          | 0/43 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"\n--- Finding Optimal Thresholds for Epoch 6 ---\n > Optimal threshold for anger: 0.8586 (Label F1: 0.7987)\n > Optimal threshold for fear: 0.6162 (Label F1: 0.8850)\n > Optimal threshold for joy: 0.8687 (Label F1: 0.8754)\n > Optimal threshold for sadness: 0.7778 (Label F1: 0.8552)\n > Optimal threshold for surprise: 0.8283 (Label F1: 0.8646)\n\n--- Epoch 6 Results ---\nTrain Loss: 0.1260\nValidation Macro F1 (Optimal Thresholds): 0.8558\nNew best model! Saving with Macro F1: 0.8558\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"<br>    <style><br>        .wandb-row {<br>            display: flex;<br>            flex-direction: row;<br>            flex-wrap: wrap;<br>            justify-content: flex-start;<br>            width: 100%;<br>        }<br>        .wandb-col {<br>            display: flex;<br>            flex-direction: column;<br>            flex-basis: 100%;<br>            flex: 1;<br>            padding: 10px;<br>        }<br>    </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>T_anger</td><td>█▇▃▅▁▇</td></tr><tr><td>T_fear</td><td>▁█▆▅▄▄</td></tr><tr><td>T_joy</td><td>▇▁███▇</td></tr><tr><td>T_sadness</td><td>▂▆█▁▆▅</td></tr><tr><td>T_surprise</td><td>▆▃▁█▅▆</td></tr><tr><td>epoch</td><td>▁▂▄▅▇█</td></tr><tr><td>learning_rate</td><td>█▇▅▄▂▁</td></tr><tr><td>train_loss</td><td>█▄▃▂▁▁</td></tr><tr><td>val_macro_f1</td><td>▁▄▆███</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>T_anger</td><td>0.85859</td></tr><tr><td>T_fear</td><td>0.61616</td></tr><tr><td>T_joy</td><td>0.86869</td></tr><tr><td>T_sadness</td><td>0.77778</td></tr><tr><td>T_surprise</td><td>0.82828</td></tr><tr><td>epoch</td><td>6</td></tr><tr><td>learning_rate</td><td>0</td></tr><tr><td>train_loss</td><td>0.12599</td></tr><tr><td>val_macro_f1</td><td>0.85576</td></tr></table><br/></div></div>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run <strong style=\"color:#cdcd00\">Finetuned DeBERTa</strong> at: <a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025/runs/4vxh17pt' target=\"_blank\">https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025/runs/4vxh17pt</a><br> View project at: <a href='https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025' target=\"_blank\">https://wandb.ai/vaishnavib-iitm-jntuh-/22f3001086-t32025</a><br>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Find logs at: <code>./wandb/run-20251129_155713-4vxh17pt/logs</code>"},"metadata":{}},{"name":"stdout","text":"\nTraining complete.\n","output_type":"stream"}],"execution_count":11},{"cell_type":"markdown","source":"--- Finding Optimal Thresholds for Epoch 1 ---\n > Optimal threshold for anger: 0.9091 (Label F1: 0.7173)\n > Optimal threshold for fear: 0.5253 (Label F1: 0.8120)\n > Optimal threshold for joy: 0.8485 (Label F1: 0.7515)\n > Optimal threshold for sadness: 0.6667 (Label F1: 0.7670)\n > Optimal threshold for surprise: 0.7879 (Label F1: 0.7581)\n\n--- Epoch 1 Results ---\nTrain Loss: 1.0864\nValidation Macro F1 (Optimal Thresholds): 0.7612\nNew best model! Saving with Macro F1: 0.7612\n\n--- Finding Optimal Thresholds for Epoch 2 ---\n > Optimal threshold for anger: 0.8788 (Label F1: 0.7601)\n > Optimal threshold for fear: 0.7677 (Label F1: 0.8465)\n > Optimal threshold for joy: 0.5051 (Label F1: 0.8051)\n > Optimal threshold for sadness: 0.8182 (Label F1: 0.8000)\n > Optimal threshold for surprise: 0.6566 (Label F1: 0.8177)\n\n--- Epoch 2 Results ---\nTrain Loss: 0.5477\nValidation Macro F1 (Optimal Thresholds): 0.8059\nNew best model! Saving with Macro F1: 0.8059\n\n--- Finding Optimal Thresholds for Epoch 3 ---\n > Optimal threshold for anger: 0.6768 (Label F1: 0.7697)\n > Optimal threshold for fear: 0.6869 (Label F1: 0.8701)\n > Optimal threshold for joy: 0.9091 (Label F1: 0.8406)\n > Optimal threshold for sadness: 0.8788 (Label F1: 0.8331)\n > Optimal threshold for surprise: 0.5556 (Label F1: 0.8418)\n\n--- Epoch 3 Results ---\nTrain Loss: 0.3380\nValidation Macro F1 (Optimal Thresholds): 0.8311\nNew best model! Saving with Macro F1: 0.8311\n\n--- Finding Optimal Thresholds for Epoch 4 ---\n > Optimal threshold for anger: 0.7475 (Label F1: 0.8091)\n > Optimal threshold for fear: 0.6768 (Label F1: 0.8797)\n > Optimal threshold for joy: 0.9091 (Label F1: 0.8540)\n > Optimal threshold for sadness: 0.6465 (Label F1: 0.8521)\n > Optimal threshold for surprise: 0.9091 (Label F1: 0.8533)\n\n--- Epoch 4 Results ---\nTrain Loss: 0.2248\nValidation Macro F1 (Optimal Thresholds): 0.8496\nNew best model! Saving with Macro F1: 0.8496\n\n--- Finding Optimal Thresholds for Epoch 5 ---\n > Optimal threshold for anger: 0.5758 (Label F1: 0.7962)\n > Optimal threshold for fear: 0.6263 (Label F1: 0.8817)\n > Optimal threshold for joy: 0.9192 (Label F1: 0.8668)\n > Optimal threshold for sadness: 0.7980 (Label F1: 0.8548)\n > Optimal threshold for surprise: 0.7576 (Label F1: 0.8622)\n\n--- Epoch 5 Results ---\nTrain Loss: 0.1617\nValidation Macro F1 (Optimal Thresholds): 0.8523\nNew best model! Saving with Macro F1: 0.8523\n\n--- Finding Optimal Thresholds for Epoch 6 ---\n > Optimal threshold for anger: 0.8586 (Label F1: 0.7987)\n > Optimal threshold for fear: 0.6162 (Label F1: 0.8850)\n > Optimal threshold for joy: 0.8687 (Label F1: 0.8754)\n > Optimal threshold for sadness: 0.7778 (Label F1: 0.8552)\n > Optimal threshold for surprise: 0.8283 (Label F1: 0.8646)\n\n--- Epoch 6 Results ---\nTrain Loss: 0.1260\nValidation Macro F1 (Optimal Thresholds): 0.8558\nNew best model! Saving with Macro F1: 0.8558","metadata":{}},{"cell_type":"code","source":"# import torch\n# import pandas as pd\n# import numpy as np\n# from torch.utils.data import DataLoader\n# from transformers import AutoTokenizer, AutoModel\n# import torch.nn as nn\n# from tqdm.auto import tqdm\n\n# CONFIG_INF = {\n#     'MODEL_NAME': 'microsoft/deberta-v3-base',\n#     'MAX_LEN': 128,\n#     'OUTPUT_DIM': 5\n# }\n# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n# LABEL_COLS = ['anger','fear','joy','sadness','surprise']\n\n# class DebertaClassifier(nn.Module):\n#     def __init__(self, n_classes, model_name, dropout):\n#         super(DebertaClassifier, self).__init__()\n#         self.backbone = AutoModel.from_pretrained(model_name)\n#         self.drop = nn.Dropout(p=dropout)\n#         self.classifier = nn.Linear(self.backbone.config.hidden_size, n_classes)\n\n#     def forward(self, input_ids, attention_mask):\n#         out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)\n#         pooled = out.last_hidden_state[:, 0, :]\n#         pooled = self.drop(pooled)\n#         logits = self.classifier(pooled)\n#         return logits\n\n# class EmotionTestDataset(torch.utils.data.Dataset):\n#     def __init__(self, df, tokenizer, max_len):\n#         self.texts = df['text'].values\n#         self.tokenizer = tokenizer\n#         self.max_len = max_len\n\n#     def __len__(self):\n#         return len(self.texts)\n\n#     def __getitem__(self, index):\n#         text = str(self.texts[index])\n#         enc = self.tokenizer.encode_plus(\n#             text,\n#             add_special_tokens=True,\n#             max_length=self.max_len,\n#             padding='max_length',\n#             truncation=True,\n#             return_attention_mask=True,\n#             return_tensors='pt',\n#         )\n#         return {\n#             'input_ids': enc['input_ids'].flatten(),\n#             'attention_mask': enc['attention_mask'].flatten()\n#         }\n\n# def predict_test_probs(model_path, batch_size=64):\n#     print(f\"Loading tokenizer and model from {model_path}...\")\n#     tokenizer = AutoTokenizer.from_pretrained(CONFIG_INF['MODEL_NAME'])\n\n#     model = DebertaClassifier(\n#         n_classes=CONFIG_INF['OUTPUT_DIM'],\n#         model_name=CONFIG_INF['MODEL_NAME'],\n#         dropout=0.1\n#     )\n#     model.load_state_dict(torch.load(model_path, map_location=DEVICE))\n#     model.to(DEVICE)\n#     model.eval()\n\n#     df_test = dt.copy()\n#     test_ds = EmotionTestDataset(df_test, tokenizer, CONFIG_INF['MAX_LEN'])\n#     test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2)\n\n#     all_probs = []\n#     with torch.no_grad():\n#         for batch in tqdm(test_loader, desc='Generating Test Predictions', leave=False):\n#             input_ids = batch['input_ids'].to(DEVICE)\n#             attention_mask = batch['attention_mask'].to(DEVICE)\n#             logits = model(input_ids=input_ids, attention_mask=attention_mask)\n#             probs = torch.sigmoid(logits).cpu().numpy()\n#             all_probs.append(probs)\n\n#     test_probs = np.vstack(all_probs)\n#     np.save(\"deberta_test_probs.npy\", test_probs)\n#     print(f\"\\nPredictions saved to deberta_test_probs.npy. Shape: {test_probs.shape}\")\n#     return test_probs, df_test\n\n# # use the best checkpoint name printed during training\n# BEST_MODEL_PATH = \"/kaggle/working/best_deberta_0.8558.pt\" \n# deberta_test_probs, df_test = predict_test_probs(BEST_MODEL_PATH)\n\n# try:\n#     optimal_thresholds = np.load(\"/kaggle/working/optimal_thresholds_deberta.npy\", allow_pickle=True).item()\n#     print(\"\\nLoaded optimal thresholds:\", optimal_thresholds)\n# except FileNotFoundError:\n#     print(\"\\nWARNING: thresholds file not found, using 0.5.\")\n#     optimal_thresholds = {k: 0.5 for k in LABEL_COLS}\n\n# df_submission = df_test[['id']].copy()\n# for i, label in enumerate(LABEL_COLS):\n#     thr = optimal_thresholds.get(label, 0.5)\n#     df_submission[label] = (deberta_test_probs[:, i] > thr).astype(int)\n\n# df_submission.to_csv(\"submission_deberta.csv\", index=False)\n# df_submission.head()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-11-29T16:16:55.251681Z","iopub.execute_input":"2025-11-29T16:16:55.252229Z","iopub.status.idle":"2025-11-29T16:17:06.158186Z","shell.execute_reply.started":"2025-11-29T16:16:55.252204Z","shell.execute_reply":"2025-11-29T16:17:06.157170Z"}},"outputs":[{"name":"stdout","text":"Loading tokenizer and model from /kaggle/working/best_deberta_0.8558.pt...\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Generating Test Predictions:   0%|          | 0/27 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"\nPredictions saved to deberta_test_probs.npy. Shape: (1707, 5)\n\nLoaded optimal thresholds: {'anger': 0.8585858585858587, 'fear': 0.6161616161616162, 'joy': 0.8686868686868687, 'sadness': 0.7777777777777778, 'surprise': 0.8282828282828284}\n","output_type":"stream"},{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"   id  anger  fear  joy  sadness  surprise\n0   0      1     1    0        1         0\n1   1      0     0    0        0         0\n2   2      1     1    0        0         0\n3   3      0     1    0        0         0\n4   4      0     1    0        0         1","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>id</th>\n      <th>anger</th>\n      <th>fear</th>\n      <th>joy</th>\n      <th>sadness</th>\n      <th>surprise</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"# # --- DEBERTA INFERENCE: SELF-CONTAINED BLOCK ---\n\n# import numpy as np\n# import pandas as pd\n# import torch\n# import torch.nn as nn\n# from torch.utils.data import DataLoader, Dataset\n# from transformers import AutoTokenizer, AutoModel\n# from tqdm.auto import tqdm\n\n# # Paths you MUST set correctly:\n# BEST_MODEL_PATH = \"/kaggle/input/sub-files-dlgenai/best_deberta_0.8594.pt\"      # update\n# THRESH_PATH     = \"/kaggle/input/sub-files-dlgenai/optimal_thresholds_deberta.npy\"  # update\n# TEST_PATH       = \"/kaggle/input/2025-sep-dl-gen-ai-project/test.csv\"           # competition test\n\n# LABEL_COLS = ['anger','fear','joy','sadness','surprise']\n\n# CONFIG_INF = {\n#     'MODEL_NAME': 'microsoft/deberta-v3-base',\n#     'MAX_LEN': 128,\n#     'OUTPUT_DIM': len(LABEL_COLS),\n# }\n# DEVICE = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n# print(\"Using device:\", DEVICE)\n\n# # ---- Model definition ----\n# class DebertaClassifier(nn.Module):\n#     def __init__(self, n_classes, model_name, dropout):\n#         super(DebertaClassifier, self).__init__()\n#         self.backbone = AutoModel.from_pretrained(model_name)\n#         self.drop = nn.Dropout(p=dropout)\n#         self.classifier = nn.Linear(self.backbone.config.hidden_size, n_classes)\n\n#     def forward(self, input_ids, attention_mask):\n#         out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)\n#         pooled = out.last_hidden_state[:, 0, :]\n#         pooled = self.drop(pooled)\n#         logits = self.classifier(pooled)\n#         return logits\n\n# # ---- Dataset for test ----\n# class EmotionTestDataset(Dataset):\n#     def __init__(self, texts, tokenizer, max_len):\n#         self.texts = list(texts)\n#         self.tokenizer = tokenizer\n#         self.max_len = max_len\n\n#     def __len__(self):\n#         return len(self.texts)\n\n#     def __getitem__(self, index):\n#         text = str(self.texts[index])\n#         enc = self.tokenizer.encode_plus(\n#             text,\n#             add_special_tokens=True,\n#             max_length=self.max_len,\n#             padding=\"max_length\",\n#             truncation=True,\n#             return_attention_mask=True,\n#             return_tensors=\"pt\",\n#         )\n#         return {\n#             \"input_ids\": enc[\"input_ids\"].squeeze(0),\n#             \"attention_mask\": enc[\"attention_mask\"].squeeze(0),\n#         }\n\n# # ---- Prediction helper ----\n# def predict_test_probs(model_path, test_path, batch_size=64):\n#     print(f\"Loading test data from {test_path}\")\n#     df_test = pd.read_csv(test_path)\n\n#     tokenizer = AutoTokenizer.from_pretrained(CONFIG_INF[\"MODEL_NAME\"])\n\n#     model = DebertaClassifier(\n#         n_classes=CONFIG_INF[\"OUTPUT_DIM\"],\n#         model_name=CONFIG_INF[\"MODEL_NAME\"],\n#         dropout=0.1\n#     )\n#     model.load_state_dict(torch.load(model_path, map_location=DEVICE))\n#     model.to(DEVICE)\n#     model.eval()\n\n#     test_ds = EmotionTestDataset(df_test[\"text\"], tokenizer, CONFIG_INF[\"MAX_LEN\"])\n#     test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2)\n\n#     all_probs = []\n#     with torch.no_grad():\n#         for batch in tqdm(test_loader, desc=\"DeBERTa Test Predictions\", leave=False):\n#             input_ids = batch[\"input_ids\"].to(DEVICE)\n#             attention_mask = batch[\"attention_mask\"].to(DEVICE)\n#             logits = model(input_ids=input_ids, attention_mask=attention_mask)\n#             probs = torch.sigmoid(logits).cpu().numpy()\n#             all_probs.append(probs)\n\n#     test_probs = np.vstack(all_probs)\n#     print(\"Probabilities shape:\", test_probs.shape)\n#     return test_probs, df_test\n\n# # ---- Run inference ----\n# deberta_test_probs, df_test = predict_test_probs(BEST_MODEL_PATH, TEST_PATH)\n\n# # Load label-wise optimal thresholds\n# try:\n#     optimal_thresholds = np.load(THRESH_PATH, allow_pickle=True).item()\n#     print(\"Loaded optimal thresholds:\", optimal_thresholds)\n# except FileNotFoundError:\n#     print(\"WARNING: thresholds file not found, using 0.5 for all labels.\")\n#     optimal_thresholds = {k: 0.5 for k in LABEL_COLS}\n\n# # Build submission\n# df_submission = df_test[[\"id\"]].copy()\n# for i, label in enumerate(LABEL_COLS):\n#     thr = optimal_thresholds.get(label, 0.5)\n#     df_submission[label] = (deberta_test_probs[:, i] > thr).astype(int)\n\n# df_submission.to_csv(\"submission_deberta.csv\", index=False)\n# df_submission.head()","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}