import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import os hf_token = os.getenv('HF_API_TOKEN') import streamlit as st from transformers import pipeline # Load the model generator = pipeline("text-generation", model="meta-llama/Meta-Llama-3.1-8B", token= hf_token) # Create an API route in Streamlit @st.cache_resource def predict(inputs): return generator(inputs, max_length=512, top_p=0.9, temperature=0.8)[0]['generated_text'] @st.cache_resource def predict_endpoint(): inputs = st.experimental_get_query_params().get('inputs', [''])[0] return predict(inputs) st.experimental_set_query_params(result=predict_endpoint()) st.title("Llama3.1 API is Running")