Spaces:
Paused
Paused
File size: 1,420 Bytes
1efd233 cd39699 9de5ca4 ba50ca9 68f9e87 1e1efc2 27bcfa0 1efd233 2ccc88d b50be2b 2ccc88d 028d122 6bf2756 86fbb40 68f9e87 86fbb40 6bf2756 1efd233 86fbb40 48ed837 e49cffc 0d18b6e 48ed837 eece1da 043fb30 c7f454a eece1da 54f9034 1aa828d 0d18b6e 3e06d1d 0d18b6e 6bf2756 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
import os
import torch, torchvision, einops
import spaces
from transformers import AutoModelForCausalLM
from huggingface_hub import login
import subprocess
subprocess.run(
"pip install flash-attn --no-build-isolation",
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
shell=True,
)
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token, add_to_git_credential=True)
@spaces.GPU
def get_model_summary(model_name):
# Check if CUDA is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the model and move it to the selected device
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)
# Return the model's architecture as a string
return str(model)
# Create the Gradio interface
interface = gr.Interface(
fn=get_model_summary,
inputs="text",
outputs="text",
examples=[
["google/gemma-7b"],
["microsoft/Phi-3-mini-4k-instruct"],
["meta-llama/Meta-Llama-3-8B"],
["mistralai/Mistral-7B-Instruct-v0.3"],
["vikhyatk/moondream2"],
["microsoft/Phi-3-vision-128k-instruct"],
["openbmb/MiniCPM-Llama3-V-2_5"],
["google/paligemma-3b-mix-224"],
["HuggingFaceM4/idefics2-8b-chatty"],
["mistralai/Codestral-22B-v0.1"]
]
)
interface.launch()
|