chunhuizng commited on
Commit
66fa2b5
·
verified ·
1 Parent(s): 53eaeb3

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2_5OmniThinkerForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "_attn_implementation_autoset": true,
7
+ "activation_dropout": 0.0,
8
+ "activation_function": "gelu",
9
+ "attention_dropout": 0.0,
10
+ "d_model": 1280,
11
+ "dropout": 0.0,
12
+ "encoder_attention_heads": 20,
13
+ "encoder_ffn_dim": 5120,
14
+ "encoder_layerdrop": 0.0,
15
+ "encoder_layers": 32,
16
+ "init_std": 0.02,
17
+ "max_source_positions": 1500,
18
+ "model_type": "qwen2_5_omni_audio_encoder",
19
+ "n_window": 100,
20
+ "num_hidden_layers": 32,
21
+ "num_mel_bins": 128,
22
+ "output_dim": 3584,
23
+ "scale_embedding": false
24
+ },
25
+ "audio_end_token_id": 151648,
26
+ "audio_start_token_id": 151647,
27
+ "audio_token_index": 151646,
28
+ "bos_token_id": 151644,
29
+ "eos_token_id": 151645,
30
+ "ignore_index": -100,
31
+ "image_token_index": 151655,
32
+ "init_std": 0.02,
33
+ "model_type": "qwen2_5_omni_thinker",
34
+ "pad_token_id": 151643,
35
+ "position_id_per_seconds": 25,
36
+ "seconds_per_chunk": 2,
37
+ "text_config": {
38
+ "attention_dropout": 0.0,
39
+ "hidden_act": "silu",
40
+ "hidden_size": 3584,
41
+ "init_std": 0.02,
42
+ "intermediate_size": 18944,
43
+ "max_position_embeddings": 32768,
44
+ "max_window_layers": 28,
45
+ "model_type": "qwen2_5_omni_text",
46
+ "num_attention_heads": 28,
47
+ "num_hidden_layers": 28,
48
+ "num_key_value_heads": 4,
49
+ "rms_norm_eps": 1e-06,
50
+ "rope_scaling": {
51
+ "mrope_section": [
52
+ 16,
53
+ 24,
54
+ 24
55
+ ],
56
+ "rope_type": "default",
57
+ "type": "default"
58
+ },
59
+ "rope_theta": 1000000.0,
60
+ "sliding_window": 32768,
61
+ "tie_word_embeddings": false,
62
+ "use_cache": true,
63
+ "use_sliding_window": false,
64
+ "vocab_size": 152064
65
+ },
66
+ "tie_word_embeddings": false,
67
+ "torch_dtype": "bfloat16",
68
+ "transformers_version": "4.50.0.dev0",
69
+ "user_token_id": 872,
70
+ "video_token_index": 151656,
71
+ "vision_config": {
72
+ "_attn_implementation_autoset": true,
73
+ "depth": 32,
74
+ "embed_dim": 1280,
75
+ "fullatt_block_indexes": [
76
+ 7,
77
+ 15,
78
+ 23,
79
+ 31
80
+ ],
81
+ "hidden_act": "silu",
82
+ "hidden_size": 1280,
83
+ "in_channels": 3,
84
+ "in_chans": 3,
85
+ "init_std": 0.02,
86
+ "intermediate_size": 3420,
87
+ "model_type": "qwen2_5_omni_vision_encoder",
88
+ "num_heads": 16,
89
+ "out_hidden_size": 3584,
90
+ "patch_size": 14,
91
+ "spatial_merge_size": 2,
92
+ "spatial_patch_size": 14,
93
+ "temporal_patch_size": 2,
94
+ "tokens_per_second": 25,
95
+ "window_size": 112
96
+ },
97
+ "vision_end_token_id": 151653,
98
+ "vision_start_token_id": 151652,
99
+ "vision_token_id": 151654
100
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.50.0.dev0"
4
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6b4b07b81813480912347e72f42e66c85c21730af8cb4a15e231011be4bd1b8
3
+ size 4985047208
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb2c14b80bbc3e2ebd75873b29b92136ed44310643a9a0de10c46b5886c4c89e
3
+ size 4991495784
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bfc0ec435bca3f1b87b727b6dc30c318adf84f8adef246aa271cf5765464a05
3
+ size 4991495888
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f52529980dcccaf409baf4e1efb8eab733fe6dd98669067ca4d8aadb04db2b
3
+ size 2895739720
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff