Spaces:

osidenna
/

test

Sleeping

App Files Files Community

Oumoukelthoum sidenna commited on Jan 1

Commit

b157648

verified ·

1 Parent(s): f412d3b

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -2

app.py CHANGED Viewed

@@ -1,4 +1,60 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

+# app.py
 import streamlit as st
+from unsloth import FastLanguageModel
+from transformers import TextStreamer
+# To speed up model loading in repeated queries, you can use st.cache_resource (Streamlit 1.18+).
+@st.cache_resource
+def load_unsloth_model(
+    model_name="azizsi/model2",
+    max_seq_length=4096,
+    dtype="float16",
+    load_in_4bit=False
+):
+    """
+    Loads and prepares the model for inference using FastLanguageModel from Unsloth.
+    Returns (model, tokenizer).
+    """
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=model_name,
+        max_seq_length=max_seq_length,
+        dtype=dtype,
+        load_in_4bit=load_in_4bit
+    )
+    # Enable 2x faster inference (per Unsloth docs)
+    FastLanguageModel.for_inference(model)
+    return model, tokenizer
+def main():
+    st.title("Unsloth Model Demo")
+    # Provide a text input area for the user
+    user_input = st.text_area("Enter your prompt:", "")
+    # Generate button
+    if st.button("Generate"):
+        with st.spinner("Generating response..."):
+            # Load the model & tokenizer
+            model, tokenizer = load_unsloth_model()
+            # Create a TextStreamer to stream tokens or capture final text
+            streamer = TextStreamer(tokenizer)
+            # Tokenize user prompt and move to GPU (or the model's device)
+            inputs = tokenizer(user_input, return_tensors="pt").to(model.device)
+            # Generate up to 128 new tokens (modify as desired)
+            outputs = model.generate(**inputs, streamer=streamer, max_new_tokens=128)
+            # If you want to display the entire response at once:
+            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            st.markdown("**Response:**")
+            st.write(generated_text)
+if __name__ == "__main__":
+    main()