devopsdocker
+++++ api/core/model_handler.py
import os
import logging
from typing import Dict, Any, Optional
from huggingface_hub import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from api.core.config import settings
from api.core.utils import get_device
logger = logging.getLogger(__name__)
class ModelHandler:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(ModelHandler, cls).__new__(cls)
cls._instance._initialize()
return cls._instance
def _initialize(self):
self.models: Dict[str, Any] = {}
self.tokenizers: Dict[str, Any] = {}
self.model_configs: Dict[str, Dict[str, Any]] = {}
self.device = get_device()
self.load_models()
def load_models(self):
"""Load all models specified in the configuration."""
for model_name, model_config in settings.MODELS.items():
try:
self.load_model(model_name, model_config)
except Exception as e:
logger.error(f"Failed to load model {model_name}: {str(e)}")
continue
def load_model(self, model_name: str, model_config: Dict[str, Any]):
"""Load a single model and its tokenizer."""
if model_name in self.models:
logger.warning(f"Model {model_name} already loaded. Skipping.")
return
logger.info(f"Loading model: {model_name}")
# Download model from Hugging Face Hub if not already cached
model_path = self._download_model(model_config["repo_id"])
# Configure quantization if specified
quantization_config = None
if model_config.get("quantization", False):
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
quantization_config=quantization_config,
device_map="auto" if self.device.type == "cuda" else None,
trust_remote_code=True
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_path,
trust_remote_code=True
)
# Set pad token if not set
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Move model to device if not using device_map
if self.device.type != "cuda" or not model_config.get("quantization", False):
model.to(self.device)
self.models[model_name] = model
self.tokenizers[model_name] = tokenizer
self.model_configs[model_name] = model_config
logger.info(f"Successfully loaded model: {model_name}")
def _download_model(self, repo_id: str) -> str:
"""Download model from Hugging Face Hub if not already cached."""
cache_dir = os.path.join(settings.MODEL_CACHE_DIR, repo_id.replace("/", "--"))
if not os.path.exists(cache_dir):
logger.info(f"Downloading model {repo_id} to {cache_dir}")
snapshot_download(
repo_id=repo_id,
cache_dir=cache_dir,
local_dir=cache_dir,
local_dir_use_symlinks=False,
ignore_patterns=["*.bin", "*.h5", "*.tflite"] # Skip large files we won't use
)
return cache_dir
def get_model(self, model_name: str) -> Optional[Any]:
"""Get a loaded model by name."""
return self.models.get(model_name)
def get_tokenizer(self, model_name: str) -> Optional[Any]:
"""Get a loaded tokenizer by name."""
return self.tokenizers.get(model_name)
def get_model_config(self, model_name: str) -> Optional[Dict[str, Any]]:
"""Get the configuration for a specific model."""
return self.model_configs.get(model_name)
def get_available_models(self) -> list:
"""Get list of available model names."""
return list(self.models.keys())
def unload_model(self, model_name: str):
"""Unload a model to free up memory."""
if model_name in self.models:
del self.models[model_name]
del self.tokenizers[model_name]
del self.model_configs[model_name]
logger.info(f"Unloaded model: {model_name}")
torch.cuda.empty_cache()
+++++ api/core/translation_engine.py
import logging
from typing import Dict, Any, Optional
from transformers import pipeline
from api.core.model_handler import ModelHandler
from api.core.config import settings
from api.core.utils import get_device
logger = logging.getLogger(__name__)
class TranslationEngine:
def __init__(self):
self.model_handler = ModelHandler()
self.device = get_device()
self.translation_pipelines: Dict[str, Any] = {}
def get_translation_pipeline(self, model_name: str):
"""Get or create a translation pipeline for the specified model."""
if model_name not in self.translation_pipelines:
model = self.model_handler.get_model(model_name)
tokenizer = self.model_handler.get_tokenizer(model_name)
if not model or not tokenizer:
raise ValueError(f"Model {model_name} not loaded")
self.translation_pipelines[model_name] = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=self.device.index if self.device.type == "cuda" else -1
)
return self.translation_pipelines[model_name]
def translate(
self,
text: str,
target_lang: str,
model_name: str = settings.DEFAULT_MODEL,
max_length: int = 512,
temperature: float = 0.7,
top_p: float = 0.9,
**kwargs
) -> str:
"""
Translate text to the target language using the specified model.
Args:
text: Text to translate
target_lang: Target language code
model_name: Model to use for translation
max_length: Maximum length of generated text
temperature: Sampling temperature
top_p: Nucleus sampling parameter
**kwargs: Additional model-specific parameters
Returns:
Translated text
"""
try:
# Get the translation pipeline
pipeline = self.get_translation_pipeline(model_name)
# Get model configuration
model_config = self.model_handler.get_model_config(model_name)
if not model_config:
raise ValueError(f"Model configuration not found for {model_name}")
# Prepare the prompt based on model configuration
prompt_template = model_config.get("prompt_template", settings.DEFAULT_PROMPT_TEMPLATE)
prompt = prompt_template.format(
text=text,
target_lang=target_lang,
source_lang=kwargs.get("source_lang", "auto")
)
# Generate translation
result = pipeline(
prompt,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
num_return_sequences=1,
eos_token_id=self.model_handler.get_tokenizer(model_name).eos_token_id,
pad_token_id=self.model_handler.get_tokenizer(model_name).pad_token_id,
**kwargs
)
# Extract the translated text
translated_text = result[0]["generated_text"]
# Remove the prompt from the response if it's included
if translated_text.startswith(prompt):
translated_text = translated_text[len(prompt):].strip()
return translated_text
except Exception as e:
logger.error(f"Translation failed: {str(e)}")
raise
def get_supported_languages(self, model_name: str = settings.DEFAULT_MODEL) -> list:
"""Get list of supported languages for a specific model."""
model_config = self.model_handler.get_model_config(model_name)
if not model_config:
return []
return model_config.get("supported_languages", [])
+++++ api/core/utils.py
import torch
import logging
from typing import Optional
logger = logging.getLogger(__name__)
def get_device() -> torch.device:
"""
Get the appropriate device for model computation.
Returns:
torch.device: The device to use (cuda if available, otherwise cpu)
"""
if torch.cuda.is_available():
device = torch.device("cuda")
logger.info(f"Using CUDA device: {torch.cuda.get_device_name(0)}")
elif torch.backends.mps.is_available():
device = torch.device("mps")
logger.info("Using MPS device (Apple Silicon)")
else:
device = torch.device("cpu")
logger.info("Using CPU device")
return device
def validate_language_code(lang_code: str) -> bool:
"""
Validate if a language code is in the correct format (ISO 639-1).
Args:
lang_code: Language code to validate
Returns:
bool: True if valid, False otherwise
"""
if not lang_code or not isinstance(lang_code, str):
return False
# Basic check for ISO 639-1 format (2 lowercase letters)
return len(lang_code) == 2 and lang_code.isalpha() and lang_code.islower()
def format_translation_prompt(
text: str,
target_lang: str,
source_lang: Optional[str] = None,
prompt_template: Optional[str] = None
) -> str:
"""
Format the translation prompt according to the specified template.
Args:
text: Text to translate
target_lang: Target language code
source_lang: Source language code (optional)
prompt_template: Custom prompt template (optional)
Returns:
str: Formatted prompt
"""
if not prompt_template:
prompt_template = "Translate the following text to {target_lang}: {text}"
if source_lang:
prompt_template = prompt_template.replace(
"{text}",
f"from {source_lang} to {target_lang}: {text}"
)
else:
prompt_template = prompt_template.replace(
"{text}",
f"to {target_lang}: {text}"
)
return prompt_template.format(
text=text,
target_lang=target_lang,
source_lang=source_lang
)
+++++ api/endpoints/health.py
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from api.core.model_handler import ModelHandler
from api.core.config import settings
router = APIRouter()
@router.get("/health")
async def health_check(model_handler: ModelHandler = Depends()):
"""
Health check endpoint to verify service status and model availability.
"""
available_models = model_handler.get_available_models()
return JSONResponse(
content={
"status": "healthy",
"version": settings.VERSION,
"available_models": available_models,
"default_model": settings.DEFAULT_MODEL
}
)
+++++ api/endpoints/translate.py
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from typing import Optional
from api.core.translation_engine import TranslationEngine
from api.core.config import settings
from api.core.utils import validate_language_code
router = APIRouter()
class TranslationRequest(BaseModel):
text: str
target_lang: str
source_lang: Optional[str] = None
model: Optional[str] = settings.DEFAULT_MODEL
max_length: Optional[int] = 512
temperature: Optional[float] = 0.7
top_p: Optional[float] = 0.9
class TranslationResponse(BaseModel):
translated_text: str
model: str
target_lang: str
source_lang: Optional[str] = None
@router.post("/translate", response_model=TranslationResponse)
async def translate(
request: TranslationRequest,
translation_engine: TranslationEngine = Depends()
):
"""
Translate text from source language to target language.
Args:
request: TranslationRequest containing text and language information
Returns:
TranslationResponse with translated text and metadata
"""
# Validate language codes
if not validate_language_code(request.target_lang):
raise HTTPException(
status_code=400,
detail=f"Invalid target language code: {request.target_lang}"
)
if request.source_lang and not validate_language_code(request.source_lang):
raise HTTPException(
status_code=400,
detail=f"Invalid source language code: {request.source_lang}"
)
# Check if model is available
if request.model not in translation_engine.model_handler.get_available_models():
raise HTTPException(
status_code=400,
detail=f"Model {request.model} not available. Available models: {translation_engine.model_handler.get_available_models()}"
)
# Check if target language is supported by the model
supported_languages = translation_engine.get_supported_languages(request.model)
if supported_languages and request.target_lang not in supported_languages:
raise HTTPException(
status_code=400,
detail=f"Target language {request.target_lang} not supported by model {request.model}. Supported languages: {supported_languages}"
)
try:
# Perform translation
translated_text = translation_engine.translate(
text=request.text,
target_lang=request.target_lang,
model_name=request.model,
source_lang=request.source_lang,
max_length=request.max_length,
temperature=request.temperature,
top_p=request.top_p
)
return TranslationResponse(
translated_text=translated_text,
model=request.model,
target_lang=request.target_lang,
source_lang=request.source_lang
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Translation failed: {str(e)}"
)
@router.get("/languages")
async def get_supported_languages(
model: Optional[str] = settings.DEFAULT_MODEL,
translation_engine: TranslationEngine = Depends()
):
"""
Get list of supported languages for a specific model.
Args:
model: Model name to check supported languages for
Returns:
List of supported language codes
"""
if model not in translation_engine.model_handler.get_available_models():
raise HTTPException(
status_code=400,
detail=f"Model {model} not available"
)
return {
"model": model,
"supported_languages": translation_engine.get_supported_languages(model)
}
+++++ api/main.py
import logging
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from api.core.config import settings
from api.endpoints import translate, health
from api.core.model_handler import ModelHandler
# Configure logging
logging.basicConfig(
level=settings.LOG_LEVEL,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
app = FastAPI(
title=settings.PROJECT_NAME,
version=settings.VERSION,
description=settings.PROJECT_DESCRIPTION,
docs_url="/docs" if settings.DEBUG else None,
redoc_url="/redoc" if settings.DEBUG else None
)
# Set up CORS
app.add_middleware(
CORSMiddleware,
allow_origins=settings.CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize model handler
model_handler = ModelHandler()
# Include routers
app.include_router(translate.router, prefix="/api/v1", tags=["translation"])
app.include_router(health.router, prefix="/api/v1", tags=["health"])
@app.on_event("startup")
async def startup_event():
"""Initialize resources on startup."""
logger.info("Starting up translation service")
logger.info(f"Available models: {model_handler.get_available_models()}")
logger.info(f"Default model: {settings.DEFAULT_MODEL}")
@app.on_event("shutdown")
async def shutdown_event():
"""Clean up resources on shutdown."""
logger.info("Shutting down translation service")
# Clean up any resources if needed
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"api.main:app",
host=settings.HOST,
port=settings.PORT,
reload=settings.DEBUG,
log_level=settings.LOG_LEVEL.lower()
)
+++++ tests/test_translation_engine.py
import pytest
from unittest.mock import MagicMock, patch
from api.core.translation_engine import TranslationEngine
from api.core.config import settings
@pytest.fixture
def mock_model_handler():
mock = MagicMock()
mock.get_model.return_value = MagicMock()
mock.get_tokenizer.return_value = MagicMock()
mock.get_model_config.return_value = {
"repo_id": "test/model",
"prompt_template": "Translate to {target_lang}: {text}"
}
return mock
@pytest.fixture
def translation_engine(mock_model_handler):
with patch('api.core.translation_engine.ModelHandler', return_value=mock_model_handler):
engine = TranslationEngine()
return engine
def test_get_translation_pipeline(translation_engine, mock_model_handler):
pipeline = translation_engine.get_translation_pipeline("test_model")
assert pipeline is not None
mock_model_handler.get_model.assert_called_once_with("test_model")
mock_model_handler.get_tokenizer.assert_called_once_with("test_model")
def test_translate(translation_engine, mock_model_handler):
# Mock the pipeline
mock_pipeline = MagicMock()
mock_pipeline.return_value = [{
"generated_text": "Translate to eu: Hello\nKaixo"
}]
translation_engine.get_translation_pipeline = MagicMock(return_value=mock_pipeline)
result = translation_engine.translate("Hello", "eu")
assert result == "Kaixo"
mock_pipeline.assert_called_once()
def test_translate_with_source_lang(translation_engine, mock_model_handler):
mock_pipeline = MagicMock()
mock_pipeline.return_value = [{
"generated_text": "Translate from en to eu: Hello\nKaixo"
}]
translation_engine.get_translation_pipeline = MagicMock(return_value=mock_pipeline)
result = translation_engine.translate("Hello", "eu", source_lang="en")
assert result == "Kaixo"
def test_get_supported_languages(translation_engine, mock_model_handler):
mock_model_handler.get_model_config.return_value = {
"supported_languages": ["en", "es", "eu"]
}
languages = translation_engine.get_supported_languages("test_model")
assert languages == ["en", "es", "eu"]
+++++ tests/test_utils.py
import pytest
import torch
from api.core.utils import get_device, validate_language_code, format_translation_prompt
def test_get_device():
device = get_device()
assert isinstance(device, torch.device)
# We can't predict the exact device, but we can check it's one of the expected types
assert device.type in ["cuda", "mps", "cpu"]
def test_validate_language_code():
assert validate_language_code("en") == True
assert validate_language_code("EU") == False # uppercase
assert validate_language_code("eng") == False # too long
assert validate_language_code("e") == False # too short
assert validate_language_code("") == False
assert validate_language_code(None) == False
assert validate_language_code(123) == False
def test_format_translation_prompt():
# Test basic template
result = format_translation_prompt("Hello", "eu")
assert result == "Translate the following text to eu: to eu: Hello"
# Test with source language
result = format_translation_prompt("Hello", "eu", "en")
assert result == "Translate the following text to eu: from en to eu: Hello"
# Test with custom template
custom_template = "Please translate this {text} to {target_lang}"
result = format_translation_prompt("Hello", "eu", prompt_template=custom_template)
assert result == "Please translate this to eu: Hello to eu"
Docker vs Makina Birtualak
Docker edukiontziak eta makina birtualen arteko desberdintasunak azaltzen ditu arkitektura, baliabideen erabilera, errendimendua, isolamendua, eskalagarritasuna eta erabilera kasu arruntak aztertuz, taldeei zein birtualizazio hurbilketa egokitzen zaien erabakitzen laguntzeko garapen modernoaren eta azpiegitura beharretarako.