Upload transformerdecoder.py
Browse files- transformerdecoder.py +3 -1
transformerdecoder.py
CHANGED
|
@@ -47,6 +47,7 @@ class AugmentedPositionGPTConfig(PretrainedConfig):
|
|
| 47 |
d_model=128,
|
| 48 |
num_heads=2,
|
| 49 |
num_layers=1,
|
|
|
|
| 50 |
max_position_embeddings=512,
|
| 51 |
**kwargs,
|
| 52 |
):
|
|
@@ -188,7 +189,8 @@ class AugmentedPositionGPTForCausalLM(PreTrainedModel, GenerationMixin):
|
|
| 188 |
|
| 189 |
|
| 190 |
|
| 191 |
-
def forward(self, input_ids=None, attention_mask=None, input_embeds=None, labels=None, output_hidden_states=False, return_dict=True):
|
|
|
|
| 192 |
outputs= self.transformerdecoder(
|
| 193 |
input_ids=input_ids,
|
| 194 |
attention_mask=attention_mask,
|
|
|
|
| 47 |
d_model=128,
|
| 48 |
num_heads=2,
|
| 49 |
num_layers=1,
|
| 50 |
+
num_hidden_layers = 128*4,
|
| 51 |
max_position_embeddings=512,
|
| 52 |
**kwargs,
|
| 53 |
):
|
|
|
|
| 189 |
|
| 190 |
|
| 191 |
|
| 192 |
+
def forward(self, input_ids=None, attention_mask=None, input_embeds=None, labels=None, output_hidden_states=False, return_dict=True, cache_position=None, position_ids=None):
|
| 193 |
+
cache_position=None
|
| 194 |
outputs= self.transformerdecoder(
|
| 195 |
input_ids=input_ids,
|
| 196 |
attention_mask=attention_mask,
|