From 3f315be27994239a4b0a4a055e776a1afdb8e9c9 Mon Sep 17 00:00:00 2001 From: Simon Larsen Date: Tue, 18 Jun 2024 21:41:29 +0100 Subject: [PATCH] refactor: Update Llama app to use local model path instead of model ID This commit updates the Llama app to use a local model path instead of a model ID. The model path is set to "/app/Models/Meta-Llama-3-8B-Instruct". This change improves the reliability and performance of the app by directly referencing the model file instead of relying on an external model ID. --- Llama/app.py | 26 +++----------------------- Llama/requirements.txt | 2 +- docker-compose.ai.yml | 11 +++++++++++ docker-compose.base.yml | 2 ++ docker-compose.dev.yml | 2 ++ 5 files changed, 19 insertions(+), 24 deletions(-) create mode 100644 docker-compose.ai.yml diff --git a/Llama/app.py b/Llama/app.py index 882bd72a15..149d17cd21 100644 --- a/Llama/app.py +++ b/Llama/app.py @@ -1,6 +1,4 @@ -from transformers import AutoTokenizer import transformers -import torch from fastapi import FastAPI from pydantic import BaseModel @@ -9,14 +7,9 @@ from pydantic import BaseModel class Prompt(BaseModel): prompt: str -model_id = "meta-llama/Meta-Llama-3-8B-Instruct" +model_path = "/app/Models/Meta-Llama-3-8B-Instruct" -pipeline = transformers.pipeline( - "text-generation", - model=model_id, - model_kwargs={"torch_dtype": torch.bfloat16}, - device_map="auto", -) +pipe = transformers.pipeline("text-generation", model=model_path) app = FastAPI() @@ -28,23 +21,10 @@ async def create_item(prompt: Prompt): return {"error": "Prompt is required"} messages = [ - {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, {"role": "user", "content": "Who are you?"}, ] - terminators = [ - pipeline.tokenizer.eos_token_id, - pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") - ] - - outputs = pipeline( - messages, - max_new_tokens=256, - eos_token_id=terminators, - do_sample=True, - temperature=0.6, - top_p=0.9, - ) + outputs = pipe(messages) output = outputs[0]["generated_text"][-1] diff --git a/Llama/requirements.txt b/Llama/requirements.txt index e303aab12f..5453440ba9 100644 --- a/Llama/requirements.txt +++ b/Llama/requirements.txt @@ -1,5 +1,5 @@ # Hugging Face Transformers -transformers==4.36.0 +transformers==4.41.2 # Rest api related stuff. fastapi===0.109.1 diff --git a/docker-compose.ai.yml b/docker-compose.ai.yml new file mode 100644 index 0000000000..5692519a30 --- /dev/null +++ b/docker-compose.ai.yml @@ -0,0 +1,11 @@ +services: + llama: + extends: + file: ./docker-compose.base.yml + service: llama + ports: + - '8547:8547' + build: + network: host + context: . + dockerfile: ./Llama/Dockerfile \ No newline at end of file diff --git a/docker-compose.base.yml b/docker-compose.base.yml index 386b5420a7..cb4c08d947 100644 --- a/docker-compose.base.yml +++ b/docker-compose.base.yml @@ -185,6 +185,8 @@ services: environment: <<: *common-server-variables PORT: 8547 + volumes: + - ./Llama/Models:/app/Models logging: driver: "local" options: diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index d6b74510e0..42af48d96e 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -17,6 +17,8 @@ services: extends: file: ./docker-compose.base.yml service: llama + ports: + - '8547:8547' build: network: host context: .