mamoeed
/

apebench-pde-simulation-data

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d865b0fe-d47f-424c-9f19-51647414f06f",
+   "metadata": {},
+   "source": [
+    "### Notebook for generating datasets \n",
+    "\n",
+    "Create datasets using apebench (jax) to be used by torch models for trainings.\n",
+    "Agree on a fixed naming convention."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "472f2bd0-64b3-44f8-b930-03e64a776144",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/moeed/miniconda3/envs/apebench/lib/python3.12/site-packages/trainax/_general_trainer.py:7: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from tqdm.autonotebook import tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import apebench\n",
+    "import numpy as np\n",
+    "import jax.numpy as jnp\n",
+    "import seaborn as sns\n",
+    "from scipy import stats\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "2197b3c3-093d-46cc-92f1-fd0d5b144b68",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# data generation configs\n",
+    "spatial_grid_points = 128\n",
+    "spatial_dims = 2\n",
+    "num_train_samples = 100\n",
+    "train_time_steps = 50\n",
+    "num_test_samples = 40\n",
+    "test_time_steps = 200\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e95ac47f-49ef-4c9e-aaf4-26744007d4ba",
+   "metadata": {},
+   "source": [
+    "Data from advection scenario"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1aaa1495-dc7e-4a32-b046-65f8ec1d62dc",
+   "metadata": {},
+   "source": [
+    "Advection hard for 2D"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "d7aef0d2-f151-4ef1-a591-829acfb6d246",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# more difficult advection equation\n",
+    "advection2D_hard_scenario = apebench.scenarios.difficulty.Advection(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "\n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    "    \n",
+    "    advection_gamma=-10.5,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "92855a20-0981-4d91-be73-86b95b83cdf5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 1, 128, 128)\n",
+      "test ICs shape (40, 1, 128, 128)\n",
+      "complete test data shape (40, 201, 1, 128, 128)\n",
+      "val data (10, 51, 1, 128, 128)\n",
+      "test data after removing val (30, 201, 1, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_data = advection2D_hard_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = advection2D_hard_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = advection2D_hard_scenario.get_test_data()   \n",
+    "print('complete test data shape',full_test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "b5bf0add-1b5d-48fe-8463-5477d7fda33a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"advection2D_hard_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"advection2D_hard_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"advection2D_hard_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3562aa84-7db2-4e94-bfb6-0bd99b83db67",
+   "metadata": {},
+   "source": [
+    "Data from kuramoto sivashinksy scenario"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "86b4595a-1371-4f6f-8faa-7bfe2d649276",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kuramoto2D_scenario = apebench.scenarios.difficulty.KuramotoSivashinsky(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "    \n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "dccb6e26-4835-4d76-ab38-c06995cf7e51",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 1, 128, 128)\n",
+      "test ICs shape (40, 1, 128, 128)\n",
+      "complete test data shape (40, 201, 1, 128, 128)\n",
+      "val data (10, 51, 1, 128, 128)\n",
+      "test data after removing val (30, 201, 1, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data from kuramoto scenario \n",
+    "train_data = kuramoto2D_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = kuramoto2D_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = kuramoto2D_scenario.get_test_data()   \n",
+    "print('complete test data shape',full_test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "0233bd94-e51d-4b89-89ad-0f83ccaec2ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"kuramoto2D_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"kuramoto2D_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"kuramoto2D_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e51cde4-241b-4c79-8c11-791e375687b4",
+   "metadata": {},
+   "source": [
+    "Data from advection diffusion scenario"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "bb39fe39-e208-4fa8-8688-53035c9aecfa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "advdiff2D_scenario = apebench.scenarios.difficulty.AdvectionDiffusion(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "\n",
+    "    \n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "ac69e48e-7bba-45a7-89f8-1add76e87a8f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 1, 128, 128)\n",
+      "test ICs shape (40, 1, 128, 128)\n",
+      "complete test data shape (40, 201, 1, 128, 128)\n",
+      "val data (10, 51, 1, 128, 128)\n",
+      "test data after removing val (30, 201, 1, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data from advection diffusion\n",
+    "train_data = advdiff2D_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = advdiff2D_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = advdiff2D_scenario.get_test_data()   \n",
+    "print('complete test data shape',full_test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "98b2ad28-243f-4a87-ac30-7d7fc0873fd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"advdiff2D_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"advdiff2D_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"advdiff2D_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "55fb63eb-9155-4817-b0d2-cb2dd0b33878",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "burgers2D_scenario = apebench.scenarios.difficulty.Burgers(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "\n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    "    \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "f8654e86-250f-462b-8f8c-099e95855043",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 2, 128, 128)\n",
+      "test ICs shape (40, 2, 128, 128)\n",
+      "complete test data shape (40, 201, 2, 128, 128)\n",
+      "val data (10, 51, 2, 128, 128)\n",
+      "test data after removing val (30, 201, 2, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data from burgers\n",
+    "train_data = burgers2D_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = burgers2D_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = burgers2D_scenario.get_test_data()   \n",
+    "print('complete test data shape',full_test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "19bc4cf4-af8b-4ad9-bb73-76f98b20e43e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"burgers2D_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"burgers2D_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"burgers2D_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e176902-3626-4634-9447-d65167de7688",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "e2b7d30d-4e16-4b9f-b544-d7e3e050a096",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fisher2D_scenario = apebench.scenarios.difficulty.FisherKPP(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "\n",
+    "    \n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "154d74a7-3fd1-45db-acee-f1508ee75f04",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 1, 128, 128)\n",
+      "test ICs shape (40, 1, 128, 128)\n",
+      "complete test data shape (40, 201, 1, 128, 128)\n",
+      "val data (10, 51, 1, 128, 128)\n",
+      "test data after removing val (30, 201, 1, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data from fisher-kpp\n",
+    "train_data = fisher2D_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = fisher2D_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = fisher2D_scenario.get_test_data()   \n",
+    "print('complete test data shape',full_test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "9cd14e27-23ed-4e05-b63a-aa38df53a945",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"fisher2D_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"fisher2D_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"fisher2D_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18d43b36-1c1e-4ec6-8bd1-532f08e334ff",
+   "metadata": {},
+   "source": [
+    "2D Kolmogorov flow. Kolmogorov forcing of Navier stokes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac7c2025-c7d5-4141-837d-c34b6defcbf7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "abf9d28d-8903-41ca-829f-8632d35b9b20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kolmflow2D_scenario = apebench.scenarios.physical.KolmogorovFlow(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "\n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "8c1d6fa7-d085-49cf-8bc9-67f391d8f95f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 1, 128, 128)\n",
+      "test ICs shape (40, 1, 128, 128)\n",
+      "complete test data shape (40, 201, 1, 128, 128)\n",
+      "val data (10, 51, 1, 128, 128)\n",
+      "test data after removing val (30, 201, 1, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data from NS-kolm flow\n",
+    "train_data = kolmflow2D_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = kolmflow2D_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = kolmflow2D_scenario.get_test_data()   \n",
+    "print('complete test data shape',full_test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "fe45d76b-9a8f-4998-aba9-2685f8ba1ad8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"kolmflow2D_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"kolmflow2D/kolmflow2D_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"kolmflow2D/kolmflow2D_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8111538-d4b8-464f-9c0e-e3758d89a5d6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "51fbdc8d-4a44-465b-85e1-19e15d965350",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grayscott2D_scenario = apebench.scenarios.physical.GrayScott(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "\n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "d1482cd8-1651-4bd0-9ed3-52729d424f6f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 2, 128, 128)\n",
+      "test ICs shape (40, 2, 128, 128)\n",
+      "complete test data shape (40, 201, 2, 128, 128)\n",
+      "val data (10, 51, 2, 128, 128)\n",
+      "test data after removing val (30, 201, 2, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data from gray scott\n",
+    "train_data = grayscott2D_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = grayscott2D_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = grayscott2D_scenario.get_test_data()   # [:,1:,:,:] # removing the initial condition from test set\n",
+    "print('complete test data shape',full_test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "8e53499e-2242-4de9-8738-be9cf4b93ea1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"grayscott2D_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"grayscott2D_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"grayscott2D_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "39124165-aa63-433b-b81f-6ca560db10ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dypdiff2D_scenario = apebench.scenarios.difficulty.HyperDiffusion(\n",
+    "    \n",
+    "    optim_config=\"adam;10_000;constant;1e-4\",\n",
+    "\n",
+    "    report_metrics=\"mean_nRMSE\", # \"mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE\"\n",
+    "    \n",
+    "    num_points = spatial_grid_points, # discretized points on the spatial \n",
+    "\n",
+    "    num_spatial_dims = spatial_dims, # spatial dimensions \n",
+    "    \n",
+    "    num_train_samples = num_train_samples, # number of ICs for train set\n",
+    "    \n",
+    "    train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run\n",
+    "\n",
+    "    train_seed = 0,\n",
+    "\n",
+    "    num_test_samples = num_test_samples, # number of ICs for test set\n",
+    "    \n",
+    "    test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set\n",
+    "\n",
+    "    test_seed = 773,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "9e5a8c4c-fe32-4e49-9504-bd423d5deb95",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data shape (100, 51, 1, 128, 128)\n",
+      "test ICs shape (40, 1, 128, 128)\n",
+      "complete test data shape (30, 201, 2, 128, 128)\n",
+      "val data (10, 51, 1, 128, 128)\n",
+      "test data after removing val (30, 201, 1, 128, 128)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data from hyper diffusion\n",
+    "train_data = dypdiff2D_scenario.get_train_data()\n",
+    "print('train data shape',train_data.shape)\n",
+    "\n",
+    "test_ic_set = dypdiff2D_scenario.get_test_ic_set()\n",
+    "print('test ICs shape',test_ic_set.shape)\n",
+    "\n",
+    "full_test_data = dypdiff2D_scenario.get_test_data()   # [:,1:,:,:] # removing the initial condition from test set\n",
+    "print('complete test data shape',test_data.shape)\n",
+    "\n",
+    "val_data = full_test_data[:10,:51,:,:]\n",
+    "print('val data',val_data.shape)\n",
+    "\n",
+    "test_data = full_test_data[10:,:,:,:]\n",
+    "print('test data after removing val',test_data.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "1916cc3d-342c-4aaa-8e0a-48fdea75ced7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jnp.save(\"dypdiff2D_train_data_128x128_100ic_50t.npy\", train_data)\n",
+    "# jnp.save(\"dypdiff2D_val_data_128x128_10ic_50t.npy\", val_data)\n",
+    "# jnp.save(\"dypdiff2D_test_data_128x128_30ic_200t.npy\", test_data)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (apebench)",
+   "language": "python",
+   "name": "apebench"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}