optimum-neuron-cache
/
neuronxcc-2.15.128.0+56dc5a86
/0_REGISTRY
/0.0.25.dev0
/training
/llama
/meta-llama
/Llama-3.2-1B
/09f3cdf1e695f554bbfb.json
{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.15.128.0+56dc5a86", "input_specs": {"chosen": ["The Harry Potter series, written by J.K. Rowling, follows the life of a young wizard, Harry Potter, and his friends Hermione Granger and Ron Weasley, all of whom are students at Hogwarts School of Witchcraft and Wizardry. This beloved book series consists of seven books, beginning with Harry's acceptance into Hogwarts and culminating with his final battle against the dark wizard, Lord Voldemort."], "chosen_attention_mask": [1, 640], "chosen_input_ids": [1, 640], "chosen_labels": [1, 640], "prompt": ["What popular children's book series features a young orphaned wizard attending a magical school called Hogwarts?"], "prompt_attention_mask": [1, 640], "prompt_input_ids": [1, 640], "question": ["What popular children's book series features a young orphaned wizard attending a magical school called Hogwarts?"], "rejected": ["The Harry Potter series, written by J.K. Rowling, follows the life of a young wizard, Harry Potter, and his friends Hermione Granger and Ron Weasley, all of whom are students at Hogwarts School of Witchcraft and Wizardry. This beloved book series consists of seven books, beginning with Harry's acceptance into Hogwarts and culminating with his final battle against the dark wizard, Lord Voldemort."], "rejected_attention_mask": [1, 640], "rejected_input_ids": [1, 640], "rejected_labels": [1, 640], "source": ["Airoboros"], "system": [null]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 2, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 1, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} |