brandonbeiler commited on
Commit
6c37ff7
·
verified ·
1 Parent(s): 95beb4e

Adding in missing fields from original config.json

Browse files
Files changed (1) hide show
  1. config.json +110 -0
config.json CHANGED
@@ -13,24 +13,67 @@
13
  "hidden_size": 8192,
14
  "image_fold": null,
15
  "llm_config": {
 
16
  "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
 
17
  "architectures": [
18
  "Qwen2ForCausalLM"
19
  ],
20
  "attention_dropout": 0.0,
 
 
21
  "bos_token_id": 151643,
 
 
 
 
 
 
 
22
  "eos_token_id": 151643,
 
 
 
 
23
  "hidden_act": "silu",
24
  "hidden_size": 8192,
 
 
 
 
25
  "initializer_range": 0.02,
26
  "intermediate_size": 29568,
 
 
 
 
 
 
 
 
27
  "max_position_embeddings": 32768,
28
  "max_window_layers": 70,
 
29
  "model_type": "qwen2",
30
  "moe_config": null,
 
31
  "num_attention_heads": 64,
 
 
32
  "num_hidden_layers": 80,
33
  "num_key_value_heads": 8,
 
 
 
 
 
 
 
 
 
 
 
 
34
  "rms_norm_eps": 1e-06,
35
  "rope_scaling": {
36
  "factor": 2.0,
@@ -38,8 +81,19 @@
38
  "type": "dynamic"
39
  },
40
  "rope_theta": 1000000.0,
 
41
  "sliding_window": null,
 
 
 
 
 
 
 
 
 
42
  "torch_dtype": "bfloat16",
 
43
  "use_bfloat16": true,
44
  "use_cache": false,
45
  "use_sliding_window": false,
@@ -284,7 +338,9 @@
284
  "use_llm_lora": 0,
285
  "use_thumbnail": true,
286
  "vision_config": {
 
287
  "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
 
288
  "architectures": [
289
  "InternVisionModel"
290
  ],
@@ -293,35 +349,89 @@
293
  "AutoConfig": "configuration_intern_vit.InternVisionConfig",
294
  "AutoModel": "modeling_intern_vit.InternVisionModel"
295
  },
 
 
 
296
  "capacity_factor": 1.2,
 
 
 
 
 
297
  "drop_path_rate": 0.4,
298
  "dropout": 0.0,
 
 
 
299
  "eval_capacity_factor": 1.4,
 
 
 
 
300
  "hidden_act": "gelu",
301
  "hidden_size": 3200,
 
 
 
 
302
  "image_size": 448,
303
  "initializer_factor": 0.1,
304
  "initializer_range": 1e-10,
305
  "intermediate_size": 12800,
 
 
 
 
 
 
306
  "laux_allreduce": "all_nodes",
307
  "layer_norm_eps": 1e-06,
 
 
 
308
  "model_type": "intern_vit_6b",
309
  "moe_coeff_ratio": 0.5,
310
  "moe_intermediate_size": 3200,
311
  "moe_output_scale": 4.0,
 
312
  "noisy_gate_policy": "RSample_before",
313
  "norm_type": "rms_norm",
314
  "num_attention_heads": 25,
 
 
315
  "num_channels": 3,
316
  "num_experts": 8,
317
  "num_hidden_layers": 45,
 
318
  "num_routed_experts": 4,
319
  "num_shared_experts": 4,
 
 
 
 
320
  "patch_size": 14,
 
 
 
321
  "qk_normalization": true,
322
  "qkv_bias": false,
 
 
 
 
 
323
  "shared_expert_intermediate_size": 12800,
 
 
 
 
 
 
 
 
 
324
  "torch_dtype": "bfloat16",
 
325
  "use_bfloat16": true,
326
  "use_flash_attn": true,
327
  "use_moe": false,
 
13
  "hidden_size": 8192,
14
  "image_fold": null,
15
  "llm_config": {
16
+ "_attn_implementation_autoset": true,
17
  "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
18
+ "add_cross_attention": false,
19
  "architectures": [
20
  "Qwen2ForCausalLM"
21
  ],
22
  "attention_dropout": 0.0,
23
+ "bad_words_ids": null,
24
+ "begin_suppress_tokens": null,
25
  "bos_token_id": 151643,
26
+ "chunk_size_feed_forward": 0,
27
+ "cross_attention_hidden_size": null,
28
+ "decoder_start_token_id": null,
29
+ "diversity_penalty": 0,
30
+ "do_sample": false,
31
+ "early_stopping": false,
32
+ "encoder_no_repeat_ngram_size": 0,
33
  "eos_token_id": 151643,
34
+ "exponential_decay_length_penalty": null,
35
+ "finetuning_task": null,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
  "hidden_act": "silu",
39
  "hidden_size": 8192,
40
+ "id2label": {
41
+ "0": "LABEL_0",
42
+ "1": "LABEL_1"
43
+ },
44
  "initializer_range": 0.02,
45
  "intermediate_size": 29568,
46
+ "is_decoder": false,
47
+ "is_encoder_decoder": false,
48
+ "label2id": {
49
+ "LABEL_0": 0,
50
+ "LABEL_1": 1
51
+ },
52
+ "length_penalty": 1,
53
+ "max_length": 20,
54
  "max_position_embeddings": 32768,
55
  "max_window_layers": 70,
56
+ "min_length": 0,
57
  "model_type": "qwen2",
58
  "moe_config": null,
59
+ "no_repeat_ngram_size": 0,
60
  "num_attention_heads": 64,
61
+ "num_beam_groups": 1,
62
+ "num_beams": 1,
63
  "num_hidden_layers": 80,
64
  "num_key_value_heads": 8,
65
+ "num_return_sequences": 1,
66
+ "output_attentions": false,
67
+ "output_hidden_states": false,
68
+ "output_scores": false,
69
+ "pad_token_id": null,
70
+ "prefix": null,
71
+ "problem_type": null,
72
+ "pruned_heads": {},
73
+ "remove_invalid_values": false,
74
+ "repetition_penalty": 1,
75
+ "return_dict": true,
76
+ "return_dict_in_generate": false,
77
  "rms_norm_eps": 1e-06,
78
  "rope_scaling": {
79
  "factor": 2.0,
 
81
  "type": "dynamic"
82
  },
83
  "rope_theta": 1000000.0,
84
+ "sep_token_id": null,
85
  "sliding_window": null,
86
+ "suppress_tokens": null,
87
+ "task_specific_params": null,
88
+ "temperature": 1,
89
+ "tf_legacy_loss": false,
90
+ "tie_encoder_decoder": false,
91
+ "tie_word_embeddings": false,
92
+ "tokenizer_class": null,
93
+ "top_k": 50,
94
+ "top_p": 1,
95
  "torch_dtype": "bfloat16",
96
+ "typical_p": 1,
97
  "use_bfloat16": true,
98
  "use_cache": false,
99
  "use_sliding_window": false,
 
338
  "use_llm_lora": 0,
339
  "use_thumbnail": true,
340
  "vision_config": {
341
+ "_attn_implementation_autoset": true,
342
  "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
343
+ "add_cross_attention": false,
344
  "architectures": [
345
  "InternVisionModel"
346
  ],
 
349
  "AutoConfig": "configuration_intern_vit.InternVisionConfig",
350
  "AutoModel": "modeling_intern_vit.InternVisionModel"
351
  },
352
+ "bad_words_ids": null,
353
+ "begin_suppress_tokens": null,
354
+ "bos_token_id": null,
355
  "capacity_factor": 1.2,
356
+ "chunk_size_feed_forward": 0,
357
+ "cross_attention_hidden_size": null,
358
+ "decoder_start_token_id": null,
359
+ "diversity_penalty": 0,
360
+ "do_sample": false,
361
  "drop_path_rate": 0.4,
362
  "dropout": 0.0,
363
+ "early_stopping": false,
364
+ "encoder_no_repeat_ngram_size": 0,
365
+ "eos_token_id": null,
366
  "eval_capacity_factor": 1.4,
367
+ "exponential_decay_length_penalty": null,
368
+ "finetuning_task": null,
369
+ "forced_bos_token_id": null,
370
+ "forced_eos_token_id": null,
371
  "hidden_act": "gelu",
372
  "hidden_size": 3200,
373
+ "id2label": {
374
+ "0": "LABEL_0",
375
+ "1": "LABEL_1"
376
+ },
377
  "image_size": 448,
378
  "initializer_factor": 0.1,
379
  "initializer_range": 1e-10,
380
  "intermediate_size": 12800,
381
+ "is_decoder": false,
382
+ "is_encoder_decoder": false,
383
+ "label2id": {
384
+ "LABEL_0": 0,
385
+ "LABEL_1": 1
386
+ },
387
  "laux_allreduce": "all_nodes",
388
  "layer_norm_eps": 1e-06,
389
+ "length_penalty": 1,
390
+ "max_length": 20,
391
+ "min_length": 0,
392
  "model_type": "intern_vit_6b",
393
  "moe_coeff_ratio": 0.5,
394
  "moe_intermediate_size": 3200,
395
  "moe_output_scale": 4.0,
396
+ "no_repeat_ngram_size": 0,
397
  "noisy_gate_policy": "RSample_before",
398
  "norm_type": "rms_norm",
399
  "num_attention_heads": 25,
400
+ "num_beam_groups": 1,
401
+ "num_beams": 1,
402
  "num_channels": 3,
403
  "num_experts": 8,
404
  "num_hidden_layers": 45,
405
+ "num_return_sequences": 1,
406
  "num_routed_experts": 4,
407
  "num_shared_experts": 4,
408
+ "output_attentions": false,
409
+ "output_hidden_states": false,
410
+ "output_scores": false,
411
+ "pad_token_id": null,
412
  "patch_size": 14,
413
+ "prefix": null,
414
+ "problem_type": null,
415
+ "pruned_heads": {},
416
  "qk_normalization": true,
417
  "qkv_bias": false,
418
+ "remove_invalid_values": false,
419
+ "repetition_penalty": 1,
420
+ "return_dict": true,
421
+ "return_dict_in_generate": false,
422
+ "sep_token_id": null,
423
  "shared_expert_intermediate_size": 12800,
424
+ "suppress_tokens": null,
425
+ "task_specific_params": null,
426
+ "temperature": 1,
427
+ "tf_legacy_loss": false,
428
+ "tie_encoder_decoder": false,
429
+ "tie_word_embeddings": true,
430
+ "tokenizer_class": null,
431
+ "top_k": 50,
432
+ "top_p": 1,
433
  "torch_dtype": "bfloat16",
434
+ "typical_p": 1,
435
  "use_bfloat16": true,
436
  "use_flash_attn": true,
437
  "use_moe": false,