refactor: Update Llama app to use local model path instead of model ID

This commit updates the Llama app to use a local model path instead of a model ID. The model path is set to "/app/Models/Meta-Llama-3-8B-Instruct". This change improves the reliability and performance of the app by directly referencing the model file instead of relying on an external model ID.
This commit is contained in:
Simon Larsen
2024-06-18 21:41:29 +01:00
parent 095493cec9
commit 3f315be279
5 changed files with 19 additions and 24 deletions

View File

@@ -1,6 +1,4 @@
from transformers import AutoTokenizer
import transformers
import torch
from fastapi import FastAPI
from pydantic import BaseModel
@@ -9,14 +7,9 @@ from pydantic import BaseModel
class Prompt(BaseModel):
prompt: str
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model_path = "/app/Models/Meta-Llama-3-8B-Instruct"
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device_map="auto",
)
pipe = transformers.pipeline("text-generation", model=model_path)
app = FastAPI()
@@ -28,23 +21,10 @@ async def create_item(prompt: Prompt):
return {"error": "Prompt is required"}
messages = [
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
{"role": "user", "content": "Who are you?"},
]
terminators = [
pipeline.tokenizer.eos_token_id,
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = pipeline(
messages,
max_new_tokens=256,
eos_token_id=terminators,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
outputs = pipe(messages)
output = outputs[0]["generated_text"][-1]

View File

@@ -1,5 +1,5 @@
# Hugging Face Transformers
transformers==4.36.0
transformers==4.41.2
# Rest api related stuff.
fastapi===0.109.1

11
docker-compose.ai.yml Normal file
View File

@@ -0,0 +1,11 @@
services:
llama:
extends:
file: ./docker-compose.base.yml
service: llama
ports:
- '8547:8547'
build:
network: host
context: .
dockerfile: ./Llama/Dockerfile

View File

@@ -185,6 +185,8 @@ services:
environment:
<<: *common-server-variables
PORT: 8547
volumes:
- ./Llama/Models:/app/Models
logging:
driver: "local"
options:

View File

@@ -17,6 +17,8 @@ services:
extends:
file: ./docker-compose.base.yml
service: llama
ports:
- '8547:8547'
build:
network: host
context: .