devopsdocker
+++++ api/core/model_handler.py
import os
import logging
from typing import Dict, Any, Optional
from huggingface_hub import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from api.core.config import settings
from api.core.utils import get_device
logger = logging.getLogger(__name__)
class ModelHandler:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(ModelHandler, cls).__new__(cls)
cls._instance._initialize()
return cls._instance
def _initialize(self):
self.models: Dict[str, Any] = {}
self.tokenizers: Dict[str, Any] = {}
self.model_configs: Dict[str, Dict[str, Any]] = {}
self.device = get_device()
self.load_models()
def load_models(self):
"""Load all models specified in the configuration."""
for model_name, model_config in settings.MODELS.items():
try:
self.load_model(model_name, model_config)
except Exception as e:
logger.error(f"Failed to load model {model_name}: {str(e)}")
continue
def load_model(self, model_name: str, model_config: Dict[str, Any]):
"""Load a single model and its tokenizer."""
if model_name in self.models:
logger.warning(f"Model {model_name} already loaded. Skipping.")
return
logger.info(f"Loading model: {model_name}")
# Download model from Hugging Face Hub if not already cached
model_path = self._download_model(model_config["repo_id"])
# Configure quantization if specified
quantization_config = None
if model_config.get("quantization", False):
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
quantization_config=quantization_config,
device_map="auto" if self.device.type == "cuda" else None,
trust_remote_code=True
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_path,
trust_remote_code=True
)
# Set pad token if not set
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Move model to device if not using device_map
if self.device.type != "cuda" or not model_config.get("quantization", False):
model.to(self.device)
self.models[model_name] = model
self.tokenizers[model_name] = tokenizer
self.model_configs[model_name] = model_config
logger.info(f"Successfully loaded model: {model_name}")
def _download_model(self, repo_id: str) -> str:
"""Download model from Hugging Face Hub if not already cached."""
cache_dir = os.path.join(settings.MODEL_CACHE_DIR, repo_id.replace("/", "--"))
if not os.path.exists(cache_dir):
logger.info(f"Downloading model {repo_id} to {cache_dir}")
snapshot_download(
repo_id=repo_id,
cache_dir=cache_dir,
local_dir=cache_dir,
local_dir_use_symlinks=False,
ignore_patterns=["*.bin", "*.h5", "*.tflite"] # Skip large files we won't use
)
return cache_dir
def get_model(self, model_name: str) -> Optional[Any]:
"""Get a loaded model by name."""
return self.models.get(model_name)
def get_tokenizer(self, model_name: str) -> Optional[Any]:
"""Get a loaded tokenizer by name."""
return self.tokenizers.get(model_name)
def get_model_config(self, model_name: str) -> Optional[Dict[str, Any]]:
"""Get the configuration for a specific model."""
return self.model_configs.get(model_name)
def get_available_models(self) -> list:
"""Get list of available model names."""
return list(self.models.keys())
def unload_model(self, model_name: str):
"""Unload a model to free up memory."""
if model_name in self.models:
del self.models[model_name]
del self.tokenizers[model_name]
del self.model_configs[model_name]
logger.info(f"Unloaded model: {model_name}")
torch.cuda.empty_cache()
+++++ api/core/translation_engine.py
import logging
from typing import Dict, Any, Optional
from transformers import pipeline
from api.core.model_handler import ModelHandler
from api.core.config import settings
from api.core.utils import get_device
logger = logging.getLogger(__name__)
class TranslationEngine:
def __init__(self):
self.model_handler = ModelHandler()
self.device = get_device()
self.translation_pipelines: Dict[str, Any] = {}
def get_translation_pipeline(self, model_name: str):
"""Get or create a translation pipeline for the specified model."""
if model_name not in self.translation_pipelines:
model = self.model_handler.get_model(model_name)
tokenizer = self.model_handler.get_tokenizer(model_name)
if not model or not tokenizer:
raise ValueError(f"Model {model_name} not loaded")
self.translation_pipelines[model_name] = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=self.device.index if self.device.type == "cuda" else -1
)
return self.translation_pipelines[model_name]
def translate(
self,
text: str,
target_lang: str,
model_name: str = settings.DEFAULT_MODEL,
max_length: int = 512,
temperature: float = 0.7,
top_p: float = 0.9,
**kwargs
) -> str:
"""
Translate text to the target language using the specified model.
Args:
text: Text to translate
target_lang: Target language code
model_name: Model to use for translation
max_length: Maximum length of generated text
temperature: Sampling temperature
top_p: Nucleus sampling parameter
**kwargs: Additional model-specific parameters
Returns:
Translated text
"""
try:
# Get the translation pipeline
pipeline = self.get_translation_pipeline(model_name)
# Get model configuration
model_config = self.model_handler.get_model_config(model_name)
if not model_config:
raise ValueError(f"Model configuration not found for {model_name}")
# Prepare the prompt based on model configuration
prompt_template = model_config.get("prompt_template", settings.DEFAULT_PROMPT_TEMPLATE)
prompt = prompt_template.format(
text=text,
target_lang=target_lang,
source_lang=kwargs.get("source_lang", "auto")
)
# Generate translation
result = pipeline(
prompt,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
num_return_sequences=1,
eos_token_id=self.model_handler.get_tokenizer(model_name).eos_token_id,
pad_token_id=self.model_handler.get_tokenizer(model_name).pad_token_id,
**kwargs
)
# Extract the translated text
translated_text = result[0]["generated_text"]
# Remove the prompt from the response if it's included
if translated_text.startswith(prompt):
translated_text = translated_text[len(prompt):].strip()
return translated_text
except Exception as e:
logger.error(f"Translation failed: {str(e)}")
raise
def get_supported_languages(self, model_name: str = settings.DEFAULT_MODEL) -> list:
"""Get list of supported languages for a specific model."""
model_config = self.model_handler.get_model_config(model_name)
if not model_config:
return []
return model_config.get("supported_languages", [])
+++++ api/core/utils.py
import torch
import logging
from typing import Optional
logger = logging.getLogger(__name__)
def get_device() -> torch.device:
"""
Get the appropriate device for model computation.
Returns:
torch.device: The device to use (cuda if available, otherwise cpu)
"""
if torch.cuda.is_available():
device = torch.device("cuda")
logger.info(f"Using CUDA device: {torch.cuda.get_device_name(0)}")
elif torch.backends.mps.is_available():
device = torch.device("mps")
logger.info("Using MPS device (Apple Silicon)")
else:
device = torch.device("cpu")
logger.info("Using CPU device")
return device
def validate_language_code(lang_code: str) -> bool:
"""
Validate if a language code is in the correct format (ISO 639-1).
Args:
lang_code: Language code to validate
Returns:
bool: True if valid, False otherwise
"""
if not lang_code or not isinstance(lang_code, str):
return False
# Basic check for ISO 639-1 format (2 lowercase letters)
return len(lang_code) == 2 and lang_code.isalpha() and lang_code.islower()
def format_translation_prompt(
text: str,
target_lang: str,
source_lang: Optional[str] = None,
prompt_template: Optional[str] = None
) -> str:
"""
Format the translation prompt according to the specified template.
Args:
text: Text to translate
target_lang: Target language code
source_lang: Source language code (optional)
prompt_template: Custom prompt template (optional)
Returns:
str: Formatted prompt
"""
if not prompt_template:
prompt_template = "Translate the following text to {target_lang}: {text}"
if source_lang:
prompt_template = prompt_template.replace(
"{text}",
f"from {source_lang} to {target_lang}: {text}"
)
else:
prompt_template = prompt_template.replace(
"{text}",
f"to {target_lang}: {text}"
)
return prompt_template.format(
text=text,
target_lang=target_lang,
source_lang=source_lang
)
+++++ api/endpoints/health.py
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from api.core.model_handler import ModelHandler
from api.core.config import settings
router = APIRouter()
@router.get("/health")
async def health_check(model_handler: ModelHandler = Depends()):
"""
Health check endpoint to verify service status and model availability.
"""
available_models = model_handler.get_available_models()
return JSONResponse(
content={
"status": "healthy",
"version": settings.VERSION,
"available_models": available_models,
"default_model": settings.DEFAULT_MODEL
}
)
+++++ api/endpoints/translate.py
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from typing import Optional
from api.core.translation_engine import TranslationEngine
from api.core.config import settings
from api.core.utils import validate_language_code
router = APIRouter()
class TranslationRequest(BaseModel):
text: str
target_lang: str
source_lang: Optional[str] = None
model: Optional[str] = settings.DEFAULT_MODEL
max_length: Optional[int] = 512
temperature: Optional[float] = 0.7
top_p: Optional[float] = 0.9
class TranslationResponse(BaseModel):
translated_text: str
model: str
target_lang: str
source_lang: Optional[str] = None
@router.post("/translate", response_model=TranslationResponse)
async def translate(
request: TranslationRequest,
translation_engine: TranslationEngine = Depends()
):
"""
Translate text from source language to target language.
Args:
request: TranslationRequest containing text and language information
Returns:
TranslationResponse with translated text and metadata
"""
# Validate language codes
if not validate_language_code(request.target_lang):
raise HTTPException(
status_code=400,
detail=f"Invalid target language code: {request.target_lang}"
)
if request.source_lang and not validate_language_code(request.source_lang):
raise HTTPException(
status_code=400,
detail=f"Invalid source language code: {request.source_lang}"
)
# Check if model is available
if request.model not in translation_engine.model_handler.get_available_models():
raise HTTPException(
status_code=400,
detail=f"Model {request.model} not available. Available models: {translation_engine.model_handler.get_available_models()}"
)
# Check if target language is supported by the model
supported_languages = translation_engine.get_supported_languages(request.model)
if supported_languages and request.target_lang not in supported_languages:
raise HTTPException(
status_code=400,
detail=f"Target language {request.target_lang} not supported by model {request.model}. Supported languages: {supported_languages}"
)
try:
# Perform translation
translated_text = translation_engine.translate(
text=request.text,
target_lang=request.target_lang,
model_name=request.model,
source_lang=request.source_lang,
max_length=request.max_length,
temperature=request.temperature,
top_p=request.top_p
)
return TranslationResponse(
translated_text=translated_text,
model=request.model,
target_lang=request.target_lang,
source_lang=request.source_lang
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Translation failed: {str(e)}"
)
@router.get("/languages")
async def get_supported_languages(
model: Optional[str] = settings.DEFAULT_MODEL,
translation_engine: TranslationEngine = Depends()
):
"""
Get list of supported languages for a specific model.
Args:
model: Model name to check supported languages for
Returns:
List of supported language codes
"""
if model not in translation_engine.model_handler.get_available_models():
raise HTTPException(
status_code=400,
detail=f"Model {model} not available"
)
return {
"model": model,
"supported_languages": translation_engine.get_supported_languages(model)
}
+++++ api/main.py
import logging
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from api.core.config import settings
from api.endpoints import translate, health
from api.core.model_handler import ModelHandler
# Configure logging
logging.basicConfig(
level=settings.LOG_LEVEL,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
app = FastAPI(
title=settings.PROJECT_NAME,
version=settings.VERSION,
description=settings.PROJECT_DESCRIPTION,
docs_url="/docs" if settings.DEBUG else None,
redoc_url="/redoc" if settings.DEBUG else None
)
# Set up CORS
app.add_middleware(
CORSMiddleware,
allow_origins=settings.CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize model handler
model_handler = ModelHandler()
# Include routers
app.include_router(translate.router, prefix="/api/v1", tags=["translation"])
app.include_router(health.router, prefix="/api/v1", tags=["health"])
@app.on_event("startup")
async def startup_event():
"""Initialize resources on startup."""
logger.info("Starting up translation service")
logger.info(f"Available models: {model_handler.get_available_models()}")
logger.info(f"Default model: {settings.DEFAULT_MODEL}")
@app.on_event("shutdown")
async def shutdown_event():
"""Clean up resources on shutdown."""
logger.info("Shutting down translation service")
# Clean up any resources if needed
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"api.main:app",
host=settings.HOST,
port=settings.PORT,
reload=settings.DEBUG,
log_level=settings.LOG_LEVEL.lower()
)
+++++ tests/test_translation_engine.py
import pytest
from unittest.mock import MagicMock, patch
from api.core.translation_engine import TranslationEngine
from api.core.config import settings
@pytest.fixture
def mock_model_handler():
mock = MagicMock()
mock.get_model.return_value = MagicMock()
mock.get_tokenizer.return_value = MagicMock()
mock.get_model_config.return_value = {
"repo_id": "test/model",
"prompt_template": "Translate to {target_lang}: {text}"
}
return mock
@pytest.fixture
def translation_engine(mock_model_handler):
with patch('api.core.translation_engine.ModelHandler', return_value=mock_model_handler):
engine = TranslationEngine()
return engine
def test_get_translation_pipeline(translation_engine, mock_model_handler):
pipeline = translation_engine.get_translation_pipeline("test_model")
assert pipeline is not None
mock_model_handler.get_model.assert_called_once_with("test_model")
mock_model_handler.get_tokenizer.assert_called_once_with("test_model")
def test_translate(translation_engine, mock_model_handler):
# Mock the pipeline
mock_pipeline = MagicMock()
mock_pipeline.return_value = [{
"generated_text": "Translate to eu: Hello\nKaixo"
}]
translation_engine.get_translation_pipeline = MagicMock(return_value=mock_pipeline)
result = translation_engine.translate("Hello", "eu")
assert result == "Kaixo"
mock_pipeline.assert_called_once()
def test_translate_with_source_lang(translation_engine, mock_model_handler):
mock_pipeline = MagicMock()
mock_pipeline.return_value = [{
"generated_text": "Translate from en to eu: Hello\nKaixo"
}]
translation_engine.get_translation_pipeline = MagicMock(return_value=mock_pipeline)
result = translation_engine.translate("Hello", "eu", source_lang="en")
assert result == "Kaixo"
def test_get_supported_languages(translation_engine, mock_model_handler):
mock_model_handler.get_model_config.return_value = {
"supported_languages": ["en", "es", "eu"]
}
languages = translation_engine.get_supported_languages("test_model")
assert languages == ["en", "es", "eu"]
+++++ tests/test_utils.py
import pytest
import torch
from api.core.utils import get_device, validate_language_code, format_translation_prompt
def test_get_device():
device = get_device()
assert isinstance(device, torch.device)
# We can't predict the exact device, but we can check it's one of the expected types
assert device.type in ["cuda", "mps", "cpu"]
def test_validate_language_code():
assert validate_language_code("en") == True
assert validate_language_code("EU") == False # uppercase
assert validate_language_code("eng") == False # too long
assert validate_language_code("e") == False # too short
assert validate_language_code("") == False
assert validate_language_code(None) == False
assert validate_language_code(123) == False
def test_format_translation_prompt():
# Test basic template
result = format_translation_prompt("Hello", "eu")
assert result == "Translate the following text to eu: to eu: Hello"
# Test with source language
result = format_translation_prompt("Hello", "eu", "en")
assert result == "Translate the following text to eu: from en to eu: Hello"
# Test with custom template
custom_template = "Please translate this {text} to {target_lang}"
result = format_translation_prompt("Hello", "eu", prompt_template=custom_template)
assert result == "Please translate this to eu: Hello to eu"makina birtualakbirtsualizazioahodei
Docker vs Makina Birtualak
Docker edukiontziak eta makina birtualen arteko desberdintasunak azaltzen ditu arkitektura, baliabideen erabilera, errendimendua, isolamendua, eskalagarritasuna eta erabilera kasu arruntak aztertuz, taldeei zein birtualizazio hurbilketa egokitzen zaien erabakitzen laguntzeko garapen modernoaren eta azpiegitura beharretarako.
Nabarmendunak
Dockerrek eraginkortasunerako ostalariaren sistema eragilearen kernel-a partekatzen du.
Makina birtualek sistema eragile osoak exekutatzen dituzte.
Edukiontziak makina birtualak baino askoz azkarrago hasten dira.
Makinek isolamendu-muga sendoagoak eskaintzen dituzte.
Zer da Docker?
Aplikazioak eta haien mendekotasunak paketatzen dituen edukiontziratze-plataforma, ostalarien sistema eragilearen nukleoa partekatuz.
Teknologia mota: Edukiontzia
Lehenengo bertsioa: 2013
Prozesu-mailako isolamendu-maila
Sistema eragilearen mendekotasuna: ostalariaren kernel-a partekatzen du
Abiarazte-denbora tipikoa: segundo batzuk
Zer da Makina birtualak?
Sistema eragile osoak exekutatzen dituen birtualizazio-metodoa, hiperbisore batek kudeatutako hardware birtualizatuan.
Hardware birtualizazio mota
Lehenengo argitalpena: 1960ko hamarkada (forma modernoa geroago)
Sistema isolamendu maila: OS isolamendu osoa
Gonbidetzaren mendekotasuna: Gonbidatuaren sistema eragilea independentea
Abiarazte-denbora tipikoa: Minutuak
Konparazio Taula
Ezaugarria
Docker
Makina birtualak
Birualizazio-maila
Aplikazio-mailako
Hardware-mailako
Eragile-sistema
Partekatutako kernel-a
Makina birtual bakoitzeko sistema eragile bereizia
Baliabideen erabilera
Arina
Baliabide-intentsibo
Abiarazte-abiadura
Oso azkar
Geldoago
Bakartze-indarra
Ertaina
Indartsu
Eskalagarritasun
Eskalagarritasun handikoa
Neurri mailakorrean eskalagarria
Erabilgarritasun-tamaina
Irudi txikiak
Disko-irudi handiak
Erabilpen kasu tipikoak
Mikrozerbitzuak, CI/CD
Legacy aplikazioak, isolamendua
Xehetasunak alderatzea
Arkitektura
Docker edukiontziak ostalari sistema eragile bakar baten gainean exekutatzen dira eta aplikazioak prozesu mailan isolatzen dituzte. Makina birtualek sistema eragile gonbidatu oso bat dute, hiperbisore batek emandako hardware birtualizatuan exekutatzen dena.
Errendimendua eta eraginkortasuna
Docker edukiontziek gainkarga minimoa dute, ostalariaren nukleoa partekatzen dutelako, ia bertako errendimendua lortuz. Makina birtualek CPU, memoria eta biltegiratze gehiago kontsumitzen dute, sistema eragile desberdinak exekutatzen dituztelako.
Bakartze eta Segurtasuna
Makina birtualek isolamendu sendoagoa eskaintzen dute, VM bakoitza OS mailan erabat bananduta dagoelako. Dockerrek isolamendu nahikoa eskaintzen du lan-karga askorako, baina kernel-mailako banaketan oinarritzen da, eta hori ez da hain zorrotza.
Eskalagarritasuna eta Inplementazioa
Docker-ek eskalatzea eta inplementatzea azkar ahalbidetzen du, dinamikak diren ingurune eta mikroserbizioetarako egokia eginez. Makina birtualek eskalatzea motelagoa da abio-denbora luzeagoak eta baliabide-eskakizun astunagoak direla eta.
Garapen eta Eragiketa
Docker garapen fluxuak sinplifikatzen ditu inguruneen arteko koherentzia bermatuz. Makina birtualak sarritan nahiago izaten dira hainbat sistema eragile exekutatzeko edo aplikazio zaharrak babesteko.
Abantailak eta Erabiltzailearen interfazea
Docker
Abantailak
+Abiarazte azkarra
+Kostu txiki samarra
+Eskalatu erraza
+Ingurune koherenteak
Erabiltzailearen interfazea
−Isolamendu ahulagoa
−Sistema eragilearen nukleoaren mendekotasuna
−Segurtasuna ostalariaren menpe dago
−Sistema eragile mota mugatua
Makina birtualak
Abantailak
+Indarrezko isolamendua
+Sistema eragile anitzeko euskarria
+Segurtasun-eredu heldua
+Ondo legatu-aplikazioentzat
Erabiltzailearen interfazea
−Baliabide erabilera handia
−Abio motela
−Irudi handiagoak
−Eragikortasun operatiboa
Ohiko uste okerrak
Mitologia
Dockerrek makina birtualak erabat ordezkatzen ditu.
Errealitatea
Docker eta makina birtualek arazo desberdinak konpontzen dituzte eta sarritan erabiltzen dira batera egungo azpiegituretan.
Mitologia
Edukiontziak ez dira segurutzat jotzen.
Errealitatea
Edukiontziak behar bezala konfiguratzen badira seguruak izan daitezke, baina VMek baino isolamendu ahulagoa eskaintzen dute.
Mitologia
Makina birtualak zaharkituta daude.
Errealitatea
Makina birtualak funtsezkoak izaten jarraitzen dute isolamendu sendoa edo sistema eragileko ingurune osoak behar dituzten lan-kargentzat.
Mitologia
Docker edukiontziak VM arinak besterik ez dira.
Errealitatea
Edukiontziak ez dute sistema eragile osoa barne hartzen eta makina birtualek ez bezala, ostalariaren kernelaren menpe daude.
Sarritan Egindako Galderak
Docker birtuala makinak baino azkarragoa da?
Docker edukiontziak normalean azkarrago abiarazi eta exekutatzen dira, sistema eragile oso bat abiarazteko gainkarga saihesten dutelako.
Docker bir makina birtualean exekutatu daiteke?
Bai, Docker sarritan makina birtualetan exekutatzen da, batez ere hodei-inguruneetan.
Zer da seguruagoa, Docker ala makina birtualak?
Makina birtualek isolamendu sendoagoa eskaintzen dute, baina Docker seguru izan daiteke praktika onak jarraituz gero.
Edukiontziak hiperbisoreen beharraz ordezkatzen al dute?
Ez, edukiontziak eta hiperbisoreak helburu desberdinak dituzte eta askotan elkar osatzen dute.
Zer da hobeago mikroserbitzuetarako?
Docker, oro har, mikrozerbitzuetarako hobetsia da azkarreko inplementazioagatik eta eskalatze eraginkorragatik.
Birтуаль makinek sistema eragile desberdinak exekuta ditzakete?
Bai, makina birtual bakoitzak bere sistema eragilea independente moduan exekutatu dezake.
Edukiak ekoizpenerako egokiak dira?
Bai, edukiontziak asko erabiltzen dira ekoizpen-inguruneetan industria askotan zehar.
Zerrek kontsumitzen ditu baliabide gehiago?
Makina birtualek Docker edukiontziak baino CPU, memoria eta biltegiratze gehiago erabiltzen dute normalean.
Epaia
Docker aukeratu aplikazio arin eta eskalatze azkarrak behar dituzunean eta arkitektura moderno hodei-natiboetarako. Makina birtualak aukeratu isolamendu sendoa, sistema eragile osoak edo software zaharren bateragarritasuna behar dituzunean.