Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- README.md +13 -1
- args.yaml +0 -0
- ckpt/info.yaml +1 -0
- ckpt/model.pt +3 -0
- ckpt/optimizer.pt +3 -0
- evaluation.yaml +17 -0
- evaluation_type.yaml +54 -0
- model.txt +368 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
README.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
| 1 |
---
|
| 2 |
license: mit
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: mit
|
| 3 |
+
tags:
|
| 4 |
+
- neuroai
|
| 5 |
+
- neuro-ai
|
| 6 |
+
- neural-encoding
|
| 7 |
+
- transformer
|
| 8 |
+
- viv1t
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# ViV1T model checkpoint
|
| 12 |
+
|
| 13 |
+
Model checkpoints used in the paper "[Movie-trained transformer reveals novel response properties to dynamic stimuli in mouse visual cortex](https://www.biorxiv.org/content/10.1101/2025.09.16.676524v1)"
|
| 14 |
+
|
| 15 |
+
Please check [github.com/bryanlimy/ViV1T-closed-loop](https://github.com/bryanlimy/ViV1T-closed-loop) for more information.
|
args.yaml
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpt/info.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{best_corr: 0.28211894631385803, epoch: 85}
|
ckpt/model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4f3ab3414311c1a157913709a0e5ccc8fe8d805fb282bc7543d7e961f4f6512
|
| 3 |
+
size 52809566
|
ckpt/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87e8beedbe57eb96e7cdee60b4295399e50c5aa1d32ba9a144654661e08ab21b
|
| 3 |
+
size 104273914
|
evaluation.yaml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
live_bonus:
|
| 2 |
+
correlation: {A: 0.15000689029693604, B: 0.18395531177520752, C: 0.16595186293125153,
|
| 3 |
+
D: 0.2351299375295639, E: 0.1381269246339798, average: 0.17463418543338777}
|
| 4 |
+
normalized_correlation: {A: 0.5566717982292175, B: 0.5185837149620056, C: 0.5679521560668945,
|
| 5 |
+
D: 0.656633734703064, E: 0.5679119825363159, average: 0.5735506772994995}
|
| 6 |
+
live_main:
|
| 7 |
+
correlation: {A: 0.2739085555076599, B: 0.3003808259963989, C: 0.301272451877594,
|
| 8 |
+
D: 0.28224700689315796, E: 0.2678496837615967, average: 0.2851317048072815}
|
| 9 |
+
normalized_correlation: {A: 0.6592996716499329, B: 0.7056032419204712, C: 0.6867263317108154,
|
| 10 |
+
D: 0.6743381023406982, E: 0.6981702446937561, average: 0.6848275184631347}
|
| 11 |
+
validation:
|
| 12 |
+
correlation: {A: 0.27861830592155457, B: 0.313666433095932, C: 0.3097257614135742,
|
| 13 |
+
D: 0.26396533846855164, E: 0.2649754285812378, F: 0.259592205286026, G: 0.288699746131897,
|
| 14 |
+
H: 0.26274827122688293, I: 0.28843459486961365, J: 0.2907619774341583, average: 0.28211880624294283}
|
| 15 |
+
normalized_correlation: {A: 0.6839460730552673, B: 0.6934837102890015, C: 0.6661377549171448,
|
| 16 |
+
D: 0.6311413645744324, E: 0.6495935320854187, F: 0.6747240424156189, G: 0.6918221712112427,
|
| 17 |
+
H: 0.7596278786659241, I: 0.6281053423881531, J: 0.6616817116737366, average: 0.674026358127594}
|
evaluation_type.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
directional pink noise:
|
| 2 |
+
correlation: {A: 0.1499825417995453, B: 0.17215222120285034, D: 0.15980379283428192,
|
| 3 |
+
average: 0.16064618527889252}
|
| 4 |
+
correlation_to_average: {A: 0.288217693567276, B: 0.2948967516422272, D: 0.30728060007095337,
|
| 5 |
+
average: 0.29679834842681885}
|
| 6 |
+
normalized_correlation: {A: 0.5767839550971985, B: 0.600197434425354, D: 0.5932188630104065,
|
| 7 |
+
average: 0.5900667508443197}
|
| 8 |
+
single_trial_correlation: {A: 0.12998300790786743, B: 0.13862085342407227, D: 0.13875971734523773,
|
| 9 |
+
average: 0.13578785955905914}
|
| 10 |
+
drifting gabor:
|
| 11 |
+
correlation: {B: 0.18396437168121338, C: 0.190323606133461, E: 0.18708060681819916,
|
| 12 |
+
average: 0.18712286154429117}
|
| 13 |
+
correlation_to_average: {B: 0.259581059217453, C: 0.2872157692909241, E: 0.2909420132637024,
|
| 14 |
+
average: 0.2792462805906932}
|
| 15 |
+
normalized_correlation: {B: 0.5216432213783264, C: 0.5274460315704346, E: 0.5603039860725403,
|
| 16 |
+
average: 0.5364644130071005}
|
| 17 |
+
single_trial_correlation: {B: 0.12222522497177124, C: 0.14061908423900604, E: 0.1370350569486618,
|
| 18 |
+
average: 0.13329312205314636}
|
| 19 |
+
gaussian dots:
|
| 20 |
+
correlation: {A: 0.16155534982681274, D: 0.1673295497894287, E: 0.1381046324968338,
|
| 21 |
+
average: 0.15566317737102509}
|
| 22 |
+
correlation_to_average: {A: 0.305797278881073, D: 0.309158593416214, E: 0.2392762452363968,
|
| 23 |
+
average: 0.2847440391778946}
|
| 24 |
+
normalized_correlation: {A: 0.656486988067627, D: 0.6579489707946777, E: 0.5892219543457031,
|
| 25 |
+
average: 0.6345526377360026}
|
| 26 |
+
single_trial_correlation: {A: 0.13023220002651215, D: 0.13202743232250214, E: 0.10221488773822784,
|
| 27 |
+
average: 0.12149150669574738}
|
| 28 |
+
image:
|
| 29 |
+
correlation: {B: 0.28715696930885315, C: 0.2836993634700775, D: 0.23508329689502716,
|
| 30 |
+
average: 0.2686465432246526}
|
| 31 |
+
correlation_to_average: {B: 0.4189091622829437, C: 0.42463061213493347, D: 0.36522963643074036,
|
| 32 |
+
average: 0.4029231369495392}
|
| 33 |
+
normalized_correlation: {B: 0.7015156149864197, C: 0.727063775062561, D: 0.6713369488716125,
|
| 34 |
+
average: 0.6999721129735311}
|
| 35 |
+
single_trial_correlation: {B: 0.21168261766433716, C: 0.20740419626235962, D: 0.16876332461833954,
|
| 36 |
+
average: 0.19595004618167877}
|
| 37 |
+
movie:
|
| 38 |
+
correlation: {A: 0.2817181348800659, B: 0.3070089519023895, C: 0.3215413987636566,
|
| 39 |
+
D: 0.2975461781024933, E: 0.2799873352050781, average: 0.29756039977073667}
|
| 40 |
+
correlation_to_average: {A: 0.3629347085952759, B: 0.381259948015213, C: 0.39735254645347595,
|
| 41 |
+
D: 0.401260644197464, E: 0.36520883440971375, average: 0.3816033363342285}
|
| 42 |
+
normalized_correlation: {A: 0.6725828647613525, B: 0.722639799118042, C: 0.6913962364196777,
|
| 43 |
+
D: 0.6852651834487915, E: 0.7059605121612549, average: 0.6955689191818237}
|
| 44 |
+
single_trial_correlation: {A: 0.17680174112319946, B: 0.18513968586921692, C: 0.1966484636068344,
|
| 45 |
+
D: 0.1971893310546875, E: 0.17419464886188507, average: 0.18599477410316467}
|
| 46 |
+
random dot kinematogram:
|
| 47 |
+
correlation: {A: 0.12593674659729004, C: 0.1659228652715683, E: 0.16469404101371765,
|
| 48 |
+
average: 0.15218455096085867}
|
| 49 |
+
correlation_to_average: {A: 0.24701885879039764, C: 0.285869836807251, E: 0.27381202578544617,
|
| 50 |
+
average: 0.2689002404610316}
|
| 51 |
+
normalized_correlation: {A: 0.5424525141716003, C: 0.5914567708969116, E: 0.6179284453392029,
|
| 52 |
+
average: 0.5839459101359049}
|
| 53 |
+
single_trial_correlation: {A: 0.10958757251501083, C: 0.13489359617233276, E: 0.1264951229095459,
|
| 54 |
+
average: 0.12365876386562984}
|
model.txt
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
=================================================================================================================================================
|
| 2 |
+
Layer (type:depth-idx) Input Shape Output Shape Param #
|
| 3 |
+
=================================================================================================================================================
|
| 4 |
+
Model -- [1, 7440, 300] --
|
| 5 |
+
├─ViViTCore: 1-1 -- [1, 112, 300, 18, 32] --
|
| 6 |
+
│ └─Tokenizer: 2-1 [1, 1, 300, 36, 64] [1, 300, 576, 112] --
|
| 7 |
+
│ │ └─ConcatBehaviors: 3-1 [1, 1, 300, 36, 64] [1, 5, 300, 36, 64] --
|
| 8 |
+
│ │ └─ZeroPad3d: 3-2 [1, 5, 300, 36, 64] [1, 5, 312, 41, 69] --
|
| 9 |
+
│ │ └─Unfold3d: 3-3 [1, 5, 312, 41, 69] [1, 300, 576, 112] --
|
| 10 |
+
│ │ │ └─Rearrange: 4-1 [1, 5, 300, 18, 32, 13, 7, 7] [1, 300, 576, 3185] --
|
| 11 |
+
│ │ │ └─LayerNorm: 4-2 [1, 300, 576, 3185] [1, 300, 576, 3185] 6,370
|
| 12 |
+
│ │ │ └─Linear: 4-3 [1, 300, 576, 3185] [1, 300, 576, 112] 356,832
|
| 13 |
+
│ │ └─LayerNorm: 3-4 [1, 300, 576, 112] [1, 300, 576, 112] 224
|
| 14 |
+
│ │ └─DropPatch: 3-5 [1, 300, 576, 112] [1, 300, 576, 112] --
|
| 15 |
+
│ └─ViViT: 2-2 [1, 300, 576, 112] [1, 300, 576, 112] 5,376
|
| 16 |
+
│ │ └─Transformer: 3-6 [300, 600, 112] [300, 600, 112] --
|
| 17 |
+
│ │ │ └─ModuleList: 4-4 -- -- --
|
| 18 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-1 [300, 600, 112] [300, 600, 112] --
|
| 19 |
+
│ │ │ │ │ └─LayerNorm: 6-1 [300, 600, 112] [300, 600, 112] 224
|
| 20 |
+
│ │ │ │ │ └─Linear: 6-2 [300, 600, 112] [300, 600, 704] 78,848
|
| 21 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-3 -- [300, 2, 600, 80] --
|
| 22 |
+
│ │ │ │ │ └─Linear: 6-4 [300, 600, 160] [300, 600, 112] 17,920
|
| 23 |
+
│ │ │ │ │ └─DropPath: 6-5 [300, 600, 112] [300, 600, 112] --
|
| 24 |
+
│ │ │ │ │ └─Sequential: 6-6 [300, 600, 224] [300, 600, 112] --
|
| 25 |
+
│ │ │ │ │ │ └─ReLU: 7-1 [300, 600, 224] [300, 600, 224] --
|
| 26 |
+
│ │ │ │ │ │ └─Dropout: 7-2 [300, 600, 224] [300, 600, 224] --
|
| 27 |
+
│ │ │ │ │ │ └─Linear: 7-3 [300, 600, 224] [300, 600, 112] 25,088
|
| 28 |
+
│ │ │ │ │ └─DropPath: 6-7 [300, 600, 112] [300, 600, 112] --
|
| 29 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-2 [300, 600, 112] [300, 600, 112] --
|
| 30 |
+
│ │ │ │ │ └─LayerNorm: 6-8 [300, 600, 112] [300, 600, 112] 224
|
| 31 |
+
│ │ │ │ │ └─Linear: 6-9 [300, 600, 112] [300, 600, 704] 78,848
|
| 32 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-10 -- [300, 2, 600, 80] --
|
| 33 |
+
│ │ │ │ │ └─Linear: 6-11 [300, 600, 160] [300, 600, 112] 17,920
|
| 34 |
+
│ │ │ │ │ └─DropPath: 6-12 [300, 600, 112] [300, 600, 112] --
|
| 35 |
+
│ │ │ │ │ └─Sequential: 6-13 [300, 600, 224] [300, 600, 112] --
|
| 36 |
+
│ │ │ │ │ │ └─ReLU: 7-4 [300, 600, 224] [300, 600, 224] --
|
| 37 |
+
│ │ │ │ │ │ └─Dropout: 7-5 [300, 600, 224] [300, 600, 224] --
|
| 38 |
+
│ │ │ │ │ │ └─Linear: 7-6 [300, 600, 224] [300, 600, 112] 25,088
|
| 39 |
+
│ │ │ │ │ └─DropPath: 6-14 [300, 600, 112] [300, 600, 112] --
|
| 40 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-3 [300, 600, 112] [300, 600, 112] --
|
| 41 |
+
│ │ │ │ │ └─LayerNorm: 6-15 [300, 600, 112] [300, 600, 112] 224
|
| 42 |
+
│ │ │ │ │ └─Linear: 6-16 [300, 600, 112] [300, 600, 704] 78,848
|
| 43 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-17 -- [300, 2, 600, 80] --
|
| 44 |
+
│ │ │ │ │ └─Linear: 6-18 [300, 600, 160] [300, 600, 112] 17,920
|
| 45 |
+
│ │ │ │ │ └─DropPath: 6-19 [300, 600, 112] [300, 600, 112] --
|
| 46 |
+
│ │ │ │ │ └─Sequential: 6-20 [300, 600, 224] [300, 600, 112] --
|
| 47 |
+
│ │ │ │ │ │ └─ReLU: 7-7 [300, 600, 224] [300, 600, 224] --
|
| 48 |
+
│ │ │ │ │ │ └─Dropout: 7-8 [300, 600, 224] [300, 600, 224] --
|
| 49 |
+
│ │ │ │ │ │ └─Linear: 7-9 [300, 600, 224] [300, 600, 112] 25,088
|
| 50 |
+
│ │ │ │ │ └─DropPath: 6-21 [300, 600, 112] [300, 600, 112] --
|
| 51 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-4 [300, 600, 112] [300, 600, 112] --
|
| 52 |
+
│ │ │ │ │ └─LayerNorm: 6-22 [300, 600, 112] [300, 600, 112] 224
|
| 53 |
+
│ │ │ │ │ └─Linear: 6-23 [300, 600, 112] [300, 600, 704] 78,848
|
| 54 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-24 -- [300, 2, 600, 80] --
|
| 55 |
+
│ │ │ │ │ └─Linear: 6-25 [300, 600, 160] [300, 600, 112] 17,920
|
| 56 |
+
│ │ │ │ │ └─DropPath: 6-26 [300, 600, 112] [300, 600, 112] --
|
| 57 |
+
│ │ │ │ │ └─Sequential: 6-27 [300, 600, 224] [300, 600, 112] --
|
| 58 |
+
│ │ │ │ │ │ └─ReLU: 7-10 [300, 600, 224] [300, 600, 224] --
|
| 59 |
+
│ │ │ │ │ │ └─Dropout: 7-11 [300, 600, 224] [300, 600, 224] --
|
| 60 |
+
│ │ │ │ │ │ └─Linear: 7-12 [300, 600, 224] [300, 600, 112] 25,088
|
| 61 |
+
│ │ │ │ │ └─DropPath: 6-28 [300, 600, 112] [300, 600, 112] --
|
| 62 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-5 [300, 600, 112] [300, 600, 112] --
|
| 63 |
+
│ │ │ │ │ └─LayerNorm: 6-29 [300, 600, 112] [300, 600, 112] 224
|
| 64 |
+
│ │ │ │ │ └─Linear: 6-30 [300, 600, 112] [300, 600, 704] 78,848
|
| 65 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-31 -- [300, 2, 600, 80] --
|
| 66 |
+
│ │ │ │ │ └─Linear: 6-32 [300, 600, 160] [300, 600, 112] 17,920
|
| 67 |
+
│ │ │ │ │ └─DropPath: 6-33 [300, 600, 112] [300, 600, 112] --
|
| 68 |
+
│ │ │ │ │ └─Sequential: 6-34 [300, 600, 224] [300, 600, 112] --
|
| 69 |
+
│ │ │ │ │ │ └─ReLU: 7-13 [300, 600, 224] [300, 600, 224] --
|
| 70 |
+
│ │ │ │ │ │ └─Dropout: 7-14 [300, 600, 224] [300, 600, 224] --
|
| 71 |
+
│ │ │ │ │ │ └─Linear: 7-15 [300, 600, 224] [300, 600, 112] 25,088
|
| 72 |
+
│ │ │ │ │ └─DropPath: 6-35 [300, 600, 112] [300, 600, 112] --
|
| 73 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-6 [300, 600, 112] [300, 600, 112] --
|
| 74 |
+
│ │ │ │ │ └─LayerNorm: 6-36 [300, 600, 112] [300, 600, 112] 224
|
| 75 |
+
│ │ │ │ │ └─Linear: 6-37 [300, 600, 112] [300, 600, 704] 78,848
|
| 76 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-38 -- [300, 2, 600, 80] --
|
| 77 |
+
│ │ │ │ │ └─Linear: 6-39 [300, 600, 160] [300, 600, 112] 17,920
|
| 78 |
+
│ │ │ │ │ └─DropPath: 6-40 [300, 600, 112] [300, 600, 112] --
|
| 79 |
+
│ │ │ │ │ └─Sequential: 6-41 [300, 600, 224] [300, 600, 112] --
|
| 80 |
+
│ │ │ │ │ │ └─ReLU: 7-16 [300, 600, 224] [300, 600, 224] --
|
| 81 |
+
│ │ │ │ │ │ └─Dropout: 7-17 [300, 600, 224] [300, 600, 224] --
|
| 82 |
+
│ │ │ │ │ │ └─Linear: 7-18 [300, 600, 224] [300, 600, 112] 25,088
|
| 83 |
+
│ │ │ │ │ └─DropPath: 6-42 [300, 600, 112] [300, 600, 112] --
|
| 84 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-7 [300, 600, 112] [300, 600, 112] --
|
| 85 |
+
│ │ │ │ │ └─LayerNorm: 6-43 [300, 600, 112] [300, 600, 112] 224
|
| 86 |
+
│ │ │ │ │ └─Linear: 6-44 [300, 600, 112] [300, 600, 704] 78,848
|
| 87 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-45 -- [300, 2, 600, 80] --
|
| 88 |
+
│ │ │ │ │ └─Linear: 6-46 [300, 600, 160] [300, 600, 112] 17,920
|
| 89 |
+
│ │ │ │ │ └─DropPath: 6-47 [300, 600, 112] [300, 600, 112] --
|
| 90 |
+
│ │ │ │ │ └─Sequential: 6-48 [300, 600, 224] [300, 600, 112] --
|
| 91 |
+
│ │ │ │ │ │ └─ReLU: 7-19 [300, 600, 224] [300, 600, 224] --
|
| 92 |
+
│ │ │ │ │ │ └─Dropout: 7-20 [300, 600, 224] [300, 600, 224] --
|
| 93 |
+
│ │ │ │ │ │ └─Linear: 7-21 [300, 600, 224] [300, 600, 112] 25,088
|
| 94 |
+
│ │ │ │ │ └─DropPath: 6-49 [300, 600, 112] [300, 600, 112] --
|
| 95 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-8 [300, 600, 112] [300, 600, 112] --
|
| 96 |
+
│ │ │ │ │ └─LayerNorm: 6-50 [300, 600, 112] [300, 600, 112] 224
|
| 97 |
+
│ │ │ │ │ └─Linear: 6-51 [300, 600, 112] [300, 600, 704] 78,848
|
| 98 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-52 -- [300, 2, 600, 80] --
|
| 99 |
+
│ │ │ │ │ └─Linear: 6-53 [300, 600, 160] [300, 600, 112] 17,920
|
| 100 |
+
│ │ │ │ │ └─DropPath: 6-54 [300, 600, 112] [300, 600, 112] --
|
| 101 |
+
│ │ │ │ │ └─Sequential: 6-55 [300, 600, 224] [300, 600, 112] --
|
| 102 |
+
│ │ │ │ │ │ └─ReLU: 7-22 [300, 600, 224] [300, 600, 224] --
|
| 103 |
+
│ │ │ │ │ │ └─Dropout: 7-23 [300, 600, 224] [300, 600, 224] --
|
| 104 |
+
│ │ │ │ │ │ └─Linear: 7-24 [300, 600, 224] [300, 600, 112] 25,088
|
| 105 |
+
│ │ │ │ │ └─DropPath: 6-56 [300, 600, 112] [300, 600, 112] --
|
| 106 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-9 [300, 600, 112] [300, 600, 112] --
|
| 107 |
+
│ │ │ │ │ └─LayerNorm: 6-57 [300, 600, 112] [300, 600, 112] 224
|
| 108 |
+
│ │ │ │ │ └─Linear: 6-58 [300, 600, 112] [300, 600, 704] 78,848
|
| 109 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-59 -- [300, 2, 600, 80] --
|
| 110 |
+
│ │ │ │ │ └─Linear: 6-60 [300, 600, 160] [300, 600, 112] 17,920
|
| 111 |
+
│ │ │ │ │ └─DropPath: 6-61 [300, 600, 112] [300, 600, 112] --
|
| 112 |
+
│ │ │ │ │ └─Sequential: 6-62 [300, 600, 224] [300, 600, 112] --
|
| 113 |
+
│ │ │ │ │ │ └─ReLU: 7-25 [300, 600, 224] [300, 600, 224] --
|
| 114 |
+
│ │ │ │ │ │ └─Dropout: 7-26 [300, 600, 224] [300, 600, 224] --
|
| 115 |
+
│ │ │ │ │ │ └─Linear: 7-27 [300, 600, 224] [300, 600, 112] 25,088
|
| 116 |
+
│ │ │ │ │ └─DropPath: 6-63 [300, 600, 112] [300, 600, 112] --
|
| 117 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-10 [300, 600, 112] [300, 600, 112] --
|
| 118 |
+
│ │ │ │ │ └─LayerNorm: 6-64 [300, 600, 112] [300, 600, 112] 224
|
| 119 |
+
│ │ │ │ │ └─Linear: 6-65 [300, 600, 112] [300, 600, 704] 78,848
|
| 120 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-66 -- [300, 2, 600, 80] --
|
| 121 |
+
│ │ │ │ │ └─Linear: 6-67 [300, 600, 160] [300, 600, 112] 17,920
|
| 122 |
+
│ │ │ │ │ └─DropPath: 6-68 [300, 600, 112] [300, 600, 112] --
|
| 123 |
+
│ │ │ │ │ └─Sequential: 6-69 [300, 600, 224] [300, 600, 112] --
|
| 124 |
+
│ │ │ │ │ │ └─ReLU: 7-28 [300, 600, 224] [300, 600, 224] --
|
| 125 |
+
│ │ │ │ │ │ └─Dropout: 7-29 [300, 600, 224] [300, 600, 224] --
|
| 126 |
+
│ │ │ │ │ │ └─Linear: 7-30 [300, 600, 224] [300, 600, 112] 25,088
|
| 127 |
+
│ │ │ │ │ └─DropPath: 6-70 [300, 600, 112] [300, 600, 112] --
|
| 128 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-11 [300, 600, 112] [300, 600, 112] --
|
| 129 |
+
│ │ │ │ │ └─LayerNorm: 6-71 [300, 600, 112] [300, 600, 112] 224
|
| 130 |
+
│ │ │ │ │ └─Linear: 6-72 [300, 600, 112] [300, 600, 704] 78,848
|
| 131 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-73 -- [300, 2, 600, 80] --
|
| 132 |
+
│ │ │ │ │ └─Linear: 6-74 [300, 600, 160] [300, 600, 112] 17,920
|
| 133 |
+
│ │ │ │ │ └─DropPath: 6-75 [300, 600, 112] [300, 600, 112] --
|
| 134 |
+
│ │ │ │ │ └─Sequential: 6-76 [300, 600, 224] [300, 600, 112] --
|
| 135 |
+
│ │ │ │ │ │ └─ReLU: 7-31 [300, 600, 224] [300, 600, 224] --
|
| 136 |
+
│ │ │ │ │ │ └─Dropout: 7-32 [300, 600, 224] [300, 600, 224] --
|
| 137 |
+
│ │ │ │ │ │ └─Linear: 7-33 [300, 600, 224] [300, 600, 112] 25,088
|
| 138 |
+
│ │ │ │ │ └─DropPath: 6-77 [300, 600, 112] [300, 600, 112] --
|
| 139 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-12 [300, 600, 112] [300, 600, 112] --
|
| 140 |
+
│ │ │ │ │ └─LayerNorm: 6-78 [300, 600, 112] [300, 600, 112] 224
|
| 141 |
+
│ │ │ │ │ └─Linear: 6-79 [300, 600, 112] [300, 600, 704] 78,848
|
| 142 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-80 -- [300, 2, 600, 80] --
|
| 143 |
+
│ │ │ │ │ └─Linear: 6-81 [300, 600, 160] [300, 600, 112] 17,920
|
| 144 |
+
│ │ │ │ │ └─DropPath: 6-82 [300, 600, 112] [300, 600, 112] --
|
| 145 |
+
│ │ │ │ │ └─Sequential: 6-83 [300, 600, 224] [300, 600, 112] --
|
| 146 |
+
│ │ │ │ │ │ └─ReLU: 7-34 [300, 600, 224] [300, 600, 224] --
|
| 147 |
+
│ │ │ │ │ │ └─Dropout: 7-35 [300, 600, 224] [300, 600, 224] --
|
| 148 |
+
│ │ │ │ │ │ └─Linear: 7-36 [300, 600, 224] [300, 600, 112] 25,088
|
| 149 |
+
│ │ │ │ │ └─DropPath: 6-84 [300, 600, 112] [300, 600, 112] --
|
| 150 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-13 [300, 600, 112] [300, 600, 112] --
|
| 151 |
+
│ │ │ │ │ └─LayerNorm: 6-85 [300, 600, 112] [300, 600, 112] 224
|
| 152 |
+
│ │ │ │ │ └─Linear: 6-86 [300, 600, 112] [300, 600, 704] 78,848
|
| 153 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-87 -- [300, 2, 600, 80] --
|
| 154 |
+
│ │ │ │ │ └─Linear: 6-88 [300, 600, 160] [300, 600, 112] 17,920
|
| 155 |
+
│ │ │ │ │ └─DropPath: 6-89 [300, 600, 112] [300, 600, 112] --
|
| 156 |
+
│ │ │ │ │ └─Sequential: 6-90 [300, 600, 224] [300, 600, 112] --
|
| 157 |
+
│ │ │ │ │ │ └─ReLU: 7-37 [300, 600, 224] [300, 600, 224] --
|
| 158 |
+
│ │ │ │ │ │ └─Dropout: 7-38 [300, 600, 224] [300, 600, 224] --
|
| 159 |
+
│ │ │ │ │ │ └─Linear: 7-39 [300, 600, 224] [300, 600, 112] 25,088
|
| 160 |
+
│ │ │ │ │ └─DropPath: 6-91 [300, 600, 112] [300, 600, 112] --
|
| 161 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-14 [300, 600, 112] [300, 600, 112] --
|
| 162 |
+
│ │ │ │ │ └─LayerNorm: 6-92 [300, 600, 112] [300, 600, 112] 224
|
| 163 |
+
│ │ │ │ │ └─Linear: 6-93 [300, 600, 112] [300, 600, 704] 78,848
|
| 164 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-94 -- [300, 2, 600, 80] --
|
| 165 |
+
│ │ │ │ │ └─Linear: 6-95 [300, 600, 160] [300, 600, 112] 17,920
|
| 166 |
+
│ │ │ │ │ └─DropPath: 6-96 [300, 600, 112] [300, 600, 112] --
|
| 167 |
+
│ │ │ │ │ └─Sequential: 6-97 [300, 600, 224] [300, 600, 112] --
|
| 168 |
+
│ │ │ │ │ │ └─ReLU: 7-40 [300, 600, 224] [300, 600, 224] --
|
| 169 |
+
│ │ │ │ │ │ └─Dropout: 7-41 [300, 600, 224] [300, 600, 224] --
|
| 170 |
+
│ │ │ │ │ │ └─Linear: 7-42 [300, 600, 224] [300, 600, 112] 25,088
|
| 171 |
+
│ │ │ │ │ └─DropPath: 6-98 [300, 600, 112] [300, 600, 112] --
|
| 172 |
+
│ │ └─Transformer: 3-7 [576, 324, 112] [576, 324, 112] --
|
| 173 |
+
│ │ │ └─ModuleList: 4-5 -- -- --
|
| 174 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-15 [576, 324, 112] [576, 324, 112] --
|
| 175 |
+
│ │ │ │ │ └─LayerNorm: 6-99 [576, 324, 112] [576, 324, 112] 224
|
| 176 |
+
│ │ │ │ │ └─Linear: 6-100 [576, 324, 112] [576, 324, 704] 78,848
|
| 177 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-101 -- [576, 2, 324, 80] --
|
| 178 |
+
│ │ │ │ │ └─Linear: 6-102 [576, 324, 160] [576, 324, 112] 17,920
|
| 179 |
+
│ │ │ │ │ └─DropPath: 6-103 [576, 324, 112] [576, 324, 112] --
|
| 180 |
+
│ │ │ │ │ └─Sequential: 6-104 [576, 324, 224] [576, 324, 112] --
|
| 181 |
+
│ │ │ │ │ │ └─ReLU: 7-43 [576, 324, 224] [576, 324, 224] --
|
| 182 |
+
│ │ │ │ │ │ └─Dropout: 7-44 [576, 324, 224] [576, 324, 224] --
|
| 183 |
+
│ │ │ │ │ │ └─Linear: 7-45 [576, 324, 224] [576, 324, 112] 25,088
|
| 184 |
+
│ │ │ │ │ └─DropPath: 6-105 [576, 324, 112] [576, 324, 112] --
|
| 185 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-16 [576, 324, 112] [576, 324, 112] --
|
| 186 |
+
│ │ │ │ │ └─LayerNorm: 6-106 [576, 324, 112] [576, 324, 112] 224
|
| 187 |
+
│ │ │ │ │ └─Linear: 6-107 [576, 324, 112] [576, 324, 704] 78,848
|
| 188 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-108 -- [576, 2, 324, 80] --
|
| 189 |
+
│ │ │ │ │ └─Linear: 6-109 [576, 324, 160] [576, 324, 112] 17,920
|
| 190 |
+
│ │ │ │ │ └─DropPath: 6-110 [576, 324, 112] [576, 324, 112] --
|
| 191 |
+
│ │ │ │ │ └─Sequential: 6-111 [576, 324, 224] [576, 324, 112] --
|
| 192 |
+
│ │ │ │ │ │ └─ReLU: 7-46 [576, 324, 224] [576, 324, 224] --
|
| 193 |
+
│ │ │ │ │ │ └─Dropout: 7-47 [576, 324, 224] [576, 324, 224] --
|
| 194 |
+
│ │ │ │ │ │ └─Linear: 7-48 [576, 324, 224] [576, 324, 112] 25,088
|
| 195 |
+
│ │ │ │ │ └─DropPath: 6-112 [576, 324, 112] [576, 324, 112] --
|
| 196 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-17 [576, 324, 112] [576, 324, 112] --
|
| 197 |
+
│ │ │ │ │ └─LayerNorm: 6-113 [576, 324, 112] [576, 324, 112] 224
|
| 198 |
+
│ │ │ │ │ └─Linear: 6-114 [576, 324, 112] [576, 324, 704] 78,848
|
| 199 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-115 -- [576, 2, 324, 80] --
|
| 200 |
+
│ │ │ │ │ └─Linear: 6-116 [576, 324, 160] [576, 324, 112] 17,920
|
| 201 |
+
│ │ │ │ │ └─DropPath: 6-117 [576, 324, 112] [576, 324, 112] --
|
| 202 |
+
│ │ │ │ │ └─Sequential: 6-118 [576, 324, 224] [576, 324, 112] --
|
| 203 |
+
│ │ │ │ │ │ └─ReLU: 7-49 [576, 324, 224] [576, 324, 224] --
|
| 204 |
+
│ │ │ │ │ │ └─Dropout: 7-50 [576, 324, 224] [576, 324, 224] --
|
| 205 |
+
│ │ │ │ │ │ └─Linear: 7-51 [576, 324, 224] [576, 324, 112] 25,088
|
| 206 |
+
│ │ │ │ │ └─DropPath: 6-119 [576, 324, 112] [576, 324, 112] --
|
| 207 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-18 [576, 324, 112] [576, 324, 112] --
|
| 208 |
+
│ │ │ │ │ └─LayerNorm: 6-120 [576, 324, 112] [576, 324, 112] 224
|
| 209 |
+
│ │ │ │ │ └─Linear: 6-121 [576, 324, 112] [576, 324, 704] 78,848
|
| 210 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-122 -- [576, 2, 324, 80] --
|
| 211 |
+
│ │ │ │ │ └─Linear: 6-123 [576, 324, 160] [576, 324, 112] 17,920
|
| 212 |
+
│ │ │ │ │ └─DropPath: 6-124 [576, 324, 112] [576, 324, 112] --
|
| 213 |
+
│ │ │ │ │ └─Sequential: 6-125 [576, 324, 224] [576, 324, 112] --
|
| 214 |
+
│ │ │ │ │ │ └─ReLU: 7-52 [576, 324, 224] [576, 324, 224] --
|
| 215 |
+
│ │ │ │ │ │ └─Dropout: 7-53 [576, 324, 224] [576, 324, 224] --
|
| 216 |
+
│ │ │ │ │ │ └─Linear: 7-54 [576, 324, 224] [576, 324, 112] 25,088
|
| 217 |
+
│ │ │ │ │ └─DropPath: 6-126 [576, 324, 112] [576, 324, 112] --
|
| 218 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-19 [576, 324, 112] [576, 324, 112] --
|
| 219 |
+
│ │ │ │ │ └─LayerNorm: 6-127 [576, 324, 112] [576, 324, 112] 224
|
| 220 |
+
│ │ │ │ │ └─Linear: 6-128 [576, 324, 112] [576, 324, 704] 78,848
|
| 221 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-129 -- [576, 2, 324, 80] --
|
| 222 |
+
│ │ │ │ │ └─Linear: 6-130 [576, 324, 160] [576, 324, 112] 17,920
|
| 223 |
+
│ │ │ │ │ └─DropPath: 6-131 [576, 324, 112] [576, 324, 112] --
|
| 224 |
+
│ │ │ │ │ └─Sequential: 6-132 [576, 324, 224] [576, 324, 112] --
|
| 225 |
+
��� │ │ │ │ │ └─ReLU: 7-55 [576, 324, 224] [576, 324, 224] --
|
| 226 |
+
│ │ │ │ │ │ └─Dropout: 7-56 [576, 324, 224] [576, 324, 224] --
|
| 227 |
+
│ │ │ │ │ │ └─Linear: 7-57 [576, 324, 224] [576, 324, 112] 25,088
|
| 228 |
+
│ │ │ │ │ └─DropPath: 6-133 [576, 324, 112] [576, 324, 112] --
|
| 229 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-20 [576, 324, 112] [576, 324, 112] --
|
| 230 |
+
│ │ │ │ │ └─LayerNorm: 6-134 [576, 324, 112] [576, 324, 112] 224
|
| 231 |
+
│ │ │ │ │ └─Linear: 6-135 [576, 324, 112] [576, 324, 704] 78,848
|
| 232 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-136 -- [576, 2, 324, 80] --
|
| 233 |
+
│ │ │ │ │ └─Linear: 6-137 [576, 324, 160] [576, 324, 112] 17,920
|
| 234 |
+
│ │ │ │ │ └─DropPath: 6-138 [576, 324, 112] [576, 324, 112] --
|
| 235 |
+
│ │ │ │ │ └─Sequential: 6-139 [576, 324, 224] [576, 324, 112] --
|
| 236 |
+
│ │ │ │ │ │ └─ReLU: 7-58 [576, 324, 224] [576, 324, 224] --
|
| 237 |
+
│ │ │ │ │ │ └─Dropout: 7-59 [576, 324, 224] [576, 324, 224] --
|
| 238 |
+
│ │ │ │ │ │ └─Linear: 7-60 [576, 324, 224] [576, 324, 112] 25,088
|
| 239 |
+
│ │ │ │ │ └─DropPath: 6-140 [576, 324, 112] [576, 324, 112] --
|
| 240 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-21 [576, 324, 112] [576, 324, 112] --
|
| 241 |
+
│ │ │ │ │ └─LayerNorm: 6-141 [576, 324, 112] [576, 324, 112] 224
|
| 242 |
+
│ │ │ │ │ └─Linear: 6-142 [576, 324, 112] [576, 324, 704] 78,848
|
| 243 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-143 -- [576, 2, 324, 80] --
|
| 244 |
+
│ │ │ │ │ └─Linear: 6-144 [576, 324, 160] [576, 324, 112] 17,920
|
| 245 |
+
│ │ │ │ │ └─DropPath: 6-145 [576, 324, 112] [576, 324, 112] --
|
| 246 |
+
│ │ │ │ │ └─Sequential: 6-146 [576, 324, 224] [576, 324, 112] --
|
| 247 |
+
│ │ │ │ │ │ └─ReLU: 7-61 [576, 324, 224] [576, 324, 224] --
|
| 248 |
+
│ │ │ │ │ │ └─Dropout: 7-62 [576, 324, 224] [576, 324, 224] --
|
| 249 |
+
│ │ │ │ │ │ └─Linear: 7-63 [576, 324, 224] [576, 324, 112] 25,088
|
| 250 |
+
│ │ │ │ │ └─DropPath: 6-147 [576, 324, 112] [576, 324, 112] --
|
| 251 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-22 [576, 324, 112] [576, 324, 112] --
|
| 252 |
+
│ │ │ │ │ └─LayerNorm: 6-148 [576, 324, 112] [576, 324, 112] 224
|
| 253 |
+
│ │ │ │ │ └─Linear: 6-149 [576, 324, 112] [576, 324, 704] 78,848
|
| 254 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-150 -- [576, 2, 324, 80] --
|
| 255 |
+
│ │ │ │ │ └─Linear: 6-151 [576, 324, 160] [576, 324, 112] 17,920
|
| 256 |
+
│ │ │ │ │ └─DropPath: 6-152 [576, 324, 112] [576, 324, 112] --
|
| 257 |
+
│ │ │ │ │ └─Sequential: 6-153 [576, 324, 224] [576, 324, 112] --
|
| 258 |
+
│ │ │ │ │ │ └─ReLU: 7-64 [576, 324, 224] [576, 324, 224] --
|
| 259 |
+
│ │ │ │ │ │ └─Dropout: 7-65 [576, 324, 224] [576, 324, 224] --
|
| 260 |
+
│ │ │ │ │ │ └─Linear: 7-66 [576, 324, 224] [576, 324, 112] 25,088
|
| 261 |
+
│ │ │ │ │ └─DropPath: 6-154 [576, 324, 112] [576, 324, 112] --
|
| 262 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-23 [576, 324, 112] [576, 324, 112] --
|
| 263 |
+
│ │ │ │ │ └─LayerNorm: 6-155 [576, 324, 112] [576, 324, 112] 224
|
| 264 |
+
│ │ │ │ │ └─Linear: 6-156 [576, 324, 112] [576, 324, 704] 78,848
|
| 265 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-157 -- [576, 2, 324, 80] --
|
| 266 |
+
│ │ │ │ │ └─Linear: 6-158 [576, 324, 160] [576, 324, 112] 17,920
|
| 267 |
+
│ │ │ │ │ └─DropPath: 6-159 [576, 324, 112] [576, 324, 112] --
|
| 268 |
+
│ │ │ │ │ └─Sequential: 6-160 [576, 324, 224] [576, 324, 112] --
|
| 269 |
+
│ │ │ │ │ │ └─ReLU: 7-67 [576, 324, 224] [576, 324, 224] --
|
| 270 |
+
│ │ │ │ │ │ └─Dropout: 7-68 [576, 324, 224] [576, 324, 224] --
|
| 271 |
+
│ │ │ │ │ │ └─Linear: 7-69 [576, 324, 224] [576, 324, 112] 25,088
|
| 272 |
+
│ │ │ │ │ └─DropPath: 6-161 [576, 324, 112] [576, 324, 112] --
|
| 273 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-24 [576, 324, 112] [576, 324, 112] --
|
| 274 |
+
│ │ │ │ │ └─LayerNorm: 6-162 [576, 324, 112] [576, 324, 112] 224
|
| 275 |
+
│ │ │ │ │ └─Linear: 6-163 [576, 324, 112] [576, 324, 704] 78,848
|
| 276 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-164 -- [576, 2, 324, 80] --
|
| 277 |
+
│ │ │ │ │ └─Linear: 6-165 [576, 324, 160] [576, 324, 112] 17,920
|
| 278 |
+
│ │ │ │ │ └─DropPath: 6-166 [576, 324, 112] [576, 324, 112] --
|
| 279 |
+
│ │ │ │ │ └─Sequential: 6-167 [576, 324, 224] [576, 324, 112] --
|
| 280 |
+
│ │ │ │ │ │ └─ReLU: 7-70 [576, 324, 224] [576, 324, 224] --
|
| 281 |
+
│ │ │ │ │ │ └─Dropout: 7-71 [576, 324, 224] [576, 324, 224] --
|
| 282 |
+
│ │ │ │ │ │ └─Linear: 7-72 [576, 324, 224] [576, 324, 112] 25,088
|
| 283 |
+
│ │ │ │ │ └─DropPath: 6-168 [576, 324, 112] [576, 324, 112] --
|
| 284 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-25 [576, 324, 112] [576, 324, 112] --
|
| 285 |
+
│ │ │ │ │ └─LayerNorm: 6-169 [576, 324, 112] [576, 324, 112] 224
|
| 286 |
+
│ │ │ │ │ └─Linear: 6-170 [576, 324, 112] [576, 324, 704] 78,848
|
| 287 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-171 -- [576, 2, 324, 80] --
|
| 288 |
+
│ │ │ │ │ └─Linear: 6-172 [576, 324, 160] [576, 324, 112] 17,920
|
| 289 |
+
│ │ │ │ │ └─DropPath: 6-173 [576, 324, 112] [576, 324, 112] --
|
| 290 |
+
│ │ │ │ │ └─Sequential: 6-174 [576, 324, 224] [576, 324, 112] --
|
| 291 |
+
│ │ │ │ │ │ └─ReLU: 7-73 [576, 324, 224] [576, 324, 224] --
|
| 292 |
+
│ │ │ │ │ │ └─Dropout: 7-74 [576, 324, 224] [576, 324, 224] --
|
| 293 |
+
│ │ │ │ │ │ └─Linear: 7-75 [576, 324, 224] [576, 324, 112] 25,088
|
| 294 |
+
│ │ │ │ │ └─DropPath: 6-175 [576, 324, 112] [576, 324, 112] --
|
| 295 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-26 [576, 324, 112] [576, 324, 112] --
|
| 296 |
+
│ │ │ │ │ └─LayerNorm: 6-176 [576, 324, 112] [576, 324, 112] 224
|
| 297 |
+
│ │ │ │ │ └─Linear: 6-177 [576, 324, 112] [576, 324, 704] 78,848
|
| 298 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-178 -- [576, 2, 324, 80] --
|
| 299 |
+
│ │ │ │ │ └─Linear: 6-179 [576, 324, 160] [576, 324, 112] 17,920
|
| 300 |
+
│ │ │ │ │ └─DropPath: 6-180 [576, 324, 112] [576, 324, 112] --
|
| 301 |
+
│ │ │ │ │ └─Sequential: 6-181 [576, 324, 224] [576, 324, 112] --
|
| 302 |
+
│ │ │ │ │ │ └─ReLU: 7-76 [576, 324, 224] [576, 324, 224] --
|
| 303 |
+
│ │ │ │ │ │ └─Dropout: 7-77 [576, 324, 224] [576, 324, 224] --
|
| 304 |
+
│ │ │ │ │ │ └─Linear: 7-78 [576, 324, 224] [576, 324, 112] 25,088
|
| 305 |
+
│ │ │ │ │ └─DropPath: 6-182 [576, 324, 112] [576, 324, 112] --
|
| 306 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-27 [576, 324, 112] [576, 324, 112] --
|
| 307 |
+
│ │ │ │ │ └─LayerNorm: 6-183 [576, 324, 112] [576, 324, 112] 224
|
| 308 |
+
│ │ │ │ │ └─Linear: 6-184 [576, 324, 112] [576, 324, 704] 78,848
|
| 309 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-185 -- [576, 2, 324, 80] --
|
| 310 |
+
│ │ │ │ │ └─Linear: 6-186 [576, 324, 160] [576, 324, 112] 17,920
|
| 311 |
+
│ │ │ │ │ └─DropPath: 6-187 [576, 324, 112] [576, 324, 112] --
|
| 312 |
+
│ │ │ │ │ └─Sequential: 6-188 [576, 324, 224] [576, 324, 112] --
|
| 313 |
+
│ │ │ │ │ │ └─ReLU: 7-79 [576, 324, 224] [576, 324, 224] --
|
| 314 |
+
│ │ │ │ │ │ └─Dropout: 7-80 [576, 324, 224] [576, 324, 224] --
|
| 315 |
+
│ │ │ │ │ │ └─Linear: 7-81 [576, 324, 224] [576, 324, 112] 25,088
|
| 316 |
+
│ │ │ │ │ └─DropPath: 6-189 [576, 324, 112] [576, 324, 112] --
|
| 317 |
+
│ │ │ │ └─ParallelAttentionBlock: 5-28 [576, 324, 112] [576, 324, 112] --
|
| 318 |
+
│ │ │ │ │ └─LayerNorm: 6-190 [576, 324, 112] [576, 324, 112] 224
|
| 319 |
+
│ │ │ │ │ └─Linear: 6-191 [576, 324, 112] [576, 324, 704] 78,848
|
| 320 |
+
│ │ │ │ │ └─RotaryPosEmb: 6-192 -- [576, 2, 324, 80] --
|
| 321 |
+
│ │ │ │ │ └─Linear: 6-193 [576, 324, 160] [576, 324, 112] 17,920
|
| 322 |
+
│ │ │ │ │ └─DropPath: 6-194 [576, 324, 112] [576, 324, 112] --
|
| 323 |
+
│ │ │ │ │ └─Sequential: 6-195 [576, 324, 224] [576, 324, 112] --
|
| 324 |
+
│ │ │ │ │ │ └─ReLU: 7-82 [576, 324, 224] [576, 324, 224] --
|
| 325 |
+
│ │ │ │ │ │ └─Dropout: 7-83 [576, 324, 224] [576, 324, 224] --
|
| 326 |
+
│ │ │ │ │ │ └─Linear: 7-84 [576, 324, 224] [576, 324, 112] 25,088
|
| 327 |
+
│ │ │ │ │ └─DropPath: 6-196 [576, 324, 112] [576, 324, 112] --
|
| 328 |
+
│ └─Rearrange: 2-3 [1, 300, 576, 112] [1, 112, 300, 18, 32] --
|
| 329 |
+
│ └─Identity: 2-4 [1, 112, 300, 18, 32] [1, 112, 300, 18, 32] --
|
| 330 |
+
├─MLPShifters: 1-2 -- [1, 2, 300] 513
|
| 331 |
+
│ └─MLPShifter: 2-5 [300, 2] [300, 2] --
|
| 332 |
+
│ │ └─Sequential: 3-8 [300, 2] [300, 2] --
|
| 333 |
+
│ │ │ └─Linear: 4-6 [300, 2] [300, 5] 15
|
| 334 |
+
│ │ │ └─Tanh: 4-7 [300, 5] [300, 5] --
|
| 335 |
+
│ │ │ └─Linear: 4-8 [300, 5] [300, 5] 30
|
| 336 |
+
│ │ │ └─Tanh: 4-9 [300, 5] [300, 5] --
|
| 337 |
+
│ │ │ └─Linear: 4-10 [300, 5] [300, 2] 12
|
| 338 |
+
│ │ │ └─Tanh: 4-11 [300, 2] [300, 2] --
|
| 339 |
+
├─GaussianReadouts: 1-3 [1, 112, 300, 18, 32] [1, 7440, 300] 8,356,689
|
| 340 |
+
│ └─GaussianReadout: 2-6 [1, 112, 300, 18, 32] [1, 7440, 300] 870,480
|
| 341 |
+
│ │ └─Sequential: 3-9 [7440, 2] [7440, 2] --
|
| 342 |
+
│ │ │ └─Linear: 4-12 [7440, 2] [7440, 30] 90
|
| 343 |
+
│ │ │ └─ELU: 4-13 [7440, 30] [7440, 30] --
|
| 344 |
+
│ │ │ └─Linear: 4-14 [7440, 30] [7440, 2] 62
|
| 345 |
+
│ │ │ └─Tanh: 4-15 [7440, 2] [7440, 2] --
|
| 346 |
+
│ │ └─Sequential: 3-10 [7440, 2] [7440, 2] (recursive)
|
| 347 |
+
│ │ │ └─Linear: 4-16 [7440, 2] [7440, 30] (recursive)
|
| 348 |
+
│ │ │ └─ELU: 4-17 [7440, 30] [7440, 30] --
|
| 349 |
+
│ │ │ └─Linear: 4-18 [7440, 30] [7440, 2] (recursive)
|
| 350 |
+
│ │ │ └─Tanh: 4-19 [7440, 2] [7440, 2] --
|
| 351 |
+
│ │ └─Sequential: 3-11 [7440, 2] [7440, 2] (recursive)
|
| 352 |
+
│ │ │ └─Linear: 4-20 [7440, 2] [7440, 30] (recursive)
|
| 353 |
+
│ │ │ └─ELU: 4-21 [7440, 30] [7440, 30] --
|
| 354 |
+
│ │ │ └─Linear: 4-22 [7440, 30] [7440, 2] (recursive)
|
| 355 |
+
│ │ │ └─Tanh: 4-23 [7440, 2] [7440, 2] --
|
| 356 |
+
├─OutputActivation: 1-4 [1, 7440, 300] [1, 7440, 300] --
|
| 357 |
+
│ └─ELU1: 2-7 [1, 7440, 300] [1, 7440, 300] --
|
| 358 |
+
=================================================================================================================================================
|
| 359 |
+
Total params: 13,014,933
|
| 360 |
+
Trainable params: 13,014,933
|
| 361 |
+
Non-trainable params: 0
|
| 362 |
+
Total mult-adds (Units.GIGABYTES): 1.50
|
| 363 |
+
=================================================================================================================================================
|
| 364 |
+
Input size (MB): 2.77
|
| 365 |
+
Forward/backward pass size (MB): 47422.71
|
| 366 |
+
Params size (MB): 15.13
|
| 367 |
+
Estimated Total Size (MB): 47440.61
|
| 368 |
+
=================================================================================================================================================
|