refactor: Update Llama app to use local model path instead of model ID

This commit updates the Llama app to use a local model path instead of a model ID. The model path is set to "/app/Models/Meta-Llama-3-8B-Instruct". This change improves the reliability and performance of the app by directly referencing the model file instead of relying on an external model ID.
2026-04-06 00:32:12 +02:00 · 2024-06-18 21:41:29 +01:00
parent 095493cec9
commit 3f315be279
5 changed files with 19 additions and 24 deletions
--- a/Llama/app.py
+++ b/Llama/app.py
@@ -1,6 +1,4 @@
-from transformers import AutoTokenizer
 import transformers
-import torch
 from fastapi import FastAPI
 from pydantic import BaseModel

@@ -9,14 +7,9 @@ from pydantic import BaseModel
 class Prompt(BaseModel):
   prompt: str

-model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+model_path = "/app/Models/Meta-Llama-3-8B-Instruct"

-pipeline = transformers.pipeline(
-    "text-generation",
-    model=model_id,
-    model_kwargs={"torch_dtype": torch.bfloat16},
-    device_map="auto",
-)
+pipe = transformers.pipeline("text-generation", model=model_path)

 app = FastAPI()

@@ -28,23 +21,10 @@ async def create_item(prompt: Prompt):
        return {"error": "Prompt is required"}

    messages = [
-        {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
        {"role": "user", "content": "Who are you?"},
    ]

-    terminators = [
-        pipeline.tokenizer.eos_token_id,
-        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
-    ]
-
-    outputs = pipeline(
-        messages,
-        max_new_tokens=256,
-        eos_token_id=terminators,
-        do_sample=True,
-        temperature=0.6,
-        top_p=0.9,
-    )
+    outputs = pipe(messages)
   

    output = outputs[0]["generated_text"][-1]
--- a/Llama/requirements.txt
+++ b/Llama/requirements.txt
@@ -1,5 +1,5 @@
 # Hugging Face Transformers
-transformers==4.36.0
+transformers==4.41.2

 # Rest api related stuff. 
 fastapi===0.109.1
--- a/docker-compose.ai.yml
+++ b/docker-compose.ai.yml
@@ -0,0 +1,11 @@
+services:
+  llama: 
+    extends:
+        file: ./docker-compose.base.yml
+        service: llama
+    ports:
+        - '8547:8547'
+    build:
+        network: host
+        context: .
+        dockerfile: ./Llama/Dockerfile
--- a/docker-compose.base.yml
+++ b/docker-compose.base.yml
@@ -185,6 +185,8 @@ services:
        environment:
            <<: *common-server-variables
            PORT: 8547
+        volumes:
+            - ./Llama/Models:/app/Models
        logging:
            driver: "local"
            options:
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -17,6 +17,8 @@ services:
        extends:
            file: ./docker-compose.base.yml
            service: llama
+        ports:
+            - '8547:8547'
        build:
            network: host
            context: .