· Java Machine Learning · 15 min read
Java Model Deployment - Production ML Systems
This tutorial covers comprehensive model deployment strategies using SuperML Java 2.1.0. You’ll learn how to deploy machine learning models to production environments with containerization, cloud platforms, monitoring, and auto-scaling capabilities.
What You’ll Learn
- Containerization - Docker containers for ML models
- Cloud Deployment - AWS, Azure, and GCP deployment strategies
- Kubernetes - Container orchestration for scalable ML services
- CI/CD Pipelines - Automated deployment pipelines
- Monitoring - Model performance and system health monitoring
- Auto-scaling - Dynamic scaling based on demand
- Security - Authentication, authorization, and data protection
- A/B Testing - Gradual model rollouts and testing
Prerequisites
- Completion of “Java Inference Engine” tutorial
- Docker and container knowledge
- Cloud platform familiarity (AWS/Azure/GCP)
- Kubernetes basics
- CI/CD pipeline experience
Containerization with Docker
Dockerfile for ML Models
This example shows how to create optimized Docker containers for machine learning models with SuperML Java 2.1.0.
# Multi-stage build for optimized production image
FROM openjdk:17-jdk-slim as builder
# Set working directory
WORKDIR /app
# Copy Maven files
COPY pom.xml .
COPY .mvn .mvn
COPY mvnw .
# Download dependencies
RUN ./mvnw dependency:go-offline
# Copy source code
COPY src ./src
# Build application
RUN ./mvnw clean package -DskipTests
# Production stage
FROM openjdk:17-jre-slim
# Install system dependencies
RUN apt-get update && apt-get install -y \
curl \
wget \
&& rm -rf /var/lib/apt/lists/*
# Create app user
RUN groupadd -r appuser && useradd -r -g appuser appuser
# Set working directory
WORKDIR /app
# Copy built application
COPY --from=builder /app/target/*.jar app.jar
# Create directories for models and data
RUN mkdir -p /app/models /app/data /app/logs && \
chown -R appuser:appuser /app
# Copy models (if bundled with image)
COPY models/ /app/models/
# Switch to non-root user
USER appuser
# Expose port
EXPOSE 8080
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
# Set JVM options for containers
ENV JAVA_OPTS="-Xmx2g -Xms1g -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+UseStringDeduplication"
# Run application
ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -jar app.jar"]
Docker Compose for Development
# docker-compose.yml
version: '3.8'
services:
ml-service:
build:
context: .
dockerfile: Dockerfile
ports:
- "8080:8080"
environment:
- SPRING_PROFILES_ACTIVE=development
- JAVA_OPTS=-Xmx1g -Xms512m
volumes:
- ./models:/app/models
- ./data:/app/data
- ./logs:/app/logs
depends_on:
- redis
- postgres
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 3
restart: unless-stopped
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes
restart: unless-stopped
postgres:
image: postgres:15-alpine
environment:
- POSTGRES_DB=mlservice
- POSTGRES_USER=mluser
- POSTGRES_PASSWORD=mlpass
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
restart: unless-stopped
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
restart: unless-stopped
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana_data:/var/lib/grafana
- ./monitoring/grafana:/etc/grafana/provisioning
depends_on:
- prometheus
restart: unless-stopped
volumes:
redis_data:
postgres_data:
prometheus_data:
grafana_data:
Container Optimization Script
package com.company.deployment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.TimeUnit;
/**
* Container optimization utilities for ML model deployment
* Handles resource optimization, health checks, and graceful shutdown
*/
@Component
public class ContainerOptimizer {
private static final Logger logger = LoggerFactory.getLogger(ContainerOptimizer.class);
/**
* Optimize JVM settings for container deployment
* Adjusts memory settings based on container limits
*/
public void optimizeJvmSettings() {
logger.info("Optimizing JVM settings for container deployment");
try {
// Get container memory limit
long containerMemoryLimit = getContainerMemoryLimit();
if (containerMemoryLimit > 0) {
// Set heap size to 70% of container memory
long heapSize = (long) (containerMemoryLimit * 0.7);
System.setProperty("java.awt.headless", "true");
logger.info("Container memory limit: {} MB", containerMemoryLimit / (1024 * 1024));
logger.info("Recommended heap size: {} MB", heapSize / (1024 * 1024));
// Set GC options for containers
System.setProperty("XX:+UseG1GC", "true");
System.setProperty("XX:MaxGCPauseMillis", "200");
System.setProperty("XX:+UseStringDeduplication", "true");
// Enable container-aware JVM
System.setProperty("XX:+UseContainerSupport", "true");
} else {
logger.info("Container memory limit not detected, using default JVM settings");
}
} catch (Exception e) {
logger.error("Error optimizing JVM settings", e);
}
}
/**
* Get container memory limit from cgroup
*/
private long getContainerMemoryLimit() {
try {
Path memoryLimitPath = Paths.get("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (Files.exists(memoryLimitPath)) {
String limitStr = Files.readString(memoryLimitPath).trim();
return Long.parseLong(limitStr);
}
} catch (IOException | NumberFormatException e) {
logger.debug("Could not read container memory limit: {}", e.getMessage());
}
return -1;
}
/**
* Perform container health check
*/
public boolean performHealthCheck() {
logger.debug("Performing container health check");
try {
// Check JVM health
if (!isJvmHealthy()) {
logger.warn("JVM health check failed");
return false;
}
// Check disk space
if (!isDiskSpaceHealthy()) {
logger.warn("Disk space health check failed");
return false;
}
// Check memory usage
if (!isMemoryHealthy()) {
logger.warn("Memory health check failed");
return false;
}
// Check application-specific health
if (!isApplicationHealthy()) {
logger.warn("Application health check failed");
return false;
}
logger.debug("All health checks passed");
return true;
} catch (Exception e) {
logger.error("Error during health check", e);
return false;
}
}
/**
* Check JVM health metrics
*/
private boolean isJvmHealthy() {
Runtime runtime = Runtime.getRuntime();
// Check memory usage
long maxMemory = runtime.maxMemory();
long totalMemory = runtime.totalMemory();
long freeMemory = runtime.freeMemory();
long usedMemory = totalMemory - freeMemory;
double memoryUsagePercent = (double) usedMemory / maxMemory * 100;
// Memory usage should be below 90%
if (memoryUsagePercent > 90) {
logger.warn("High memory usage: {}%", memoryUsagePercent);
return false;
}
// Check available processors
int availableProcessors = runtime.availableProcessors();
if (availableProcessors < 1) {
logger.warn("No available processors");
return false;
}
return true;
}
/**
* Check disk space health
*/
private boolean isDiskSpaceHealthy() {
try {
Path appPath = Paths.get("/app");
long totalSpace = Files.getFileStore(appPath).getTotalSpace();
long usableSpace = Files.getFileStore(appPath).getUsableSpace();
double usagePercent = (double) (totalSpace - usableSpace) / totalSpace * 100;
// Disk usage should be below 85%
if (usagePercent > 85) {
logger.warn("High disk usage: {}%", usagePercent);
return false;
}
return true;
} catch (IOException e) {
logger.error("Error checking disk space", e);
return false;
}
}
/**
* Check memory health
*/
private boolean isMemoryHealthy() {
try {
// Force garbage collection
System.gc();
// Wait for GC to complete
Thread.sleep(100);
Runtime runtime = Runtime.getRuntime();
long maxMemory = runtime.maxMemory();
long totalMemory = runtime.totalMemory();
long freeMemory = runtime.freeMemory();
long usedMemory = totalMemory - freeMemory;
double memoryUsagePercent = (double) usedMemory / maxMemory * 100;
// After GC, memory usage should be reasonable
if (memoryUsagePercent > 80) {
logger.warn("High memory usage after GC: {}%", memoryUsagePercent);
return false;
}
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return false;
}
}
/**
* Check application-specific health
*/
private boolean isApplicationHealthy() {
// Check if models are loaded
// Check if database connections are working
// Check if external services are reachable
// This would be implemented based on specific application requirements
return true;
}
/**
* Graceful shutdown handler
*/
public void gracefulShutdown() {
logger.info("Initiating graceful shutdown");
try {
// Stop accepting new requests
logger.info("Stopping new request acceptance");
// Wait for existing requests to complete
logger.info("Waiting for existing requests to complete");
Thread.sleep(TimeUnit.SECONDS.toMillis(30));
// Close resources
logger.info("Closing application resources");
// Persist any in-memory state
logger.info("Persisting in-memory state");
// Final cleanup
logger.info("Performing final cleanup");
logger.info("Graceful shutdown completed");
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
logger.error("Graceful shutdown interrupted", e);
} catch (Exception e) {
logger.error("Error during graceful shutdown", e);
}
}
/**
* Monitor container metrics
*/
public ContainerMetrics getContainerMetrics() {
ContainerMetrics metrics = new ContainerMetrics();
Runtime runtime = Runtime.getRuntime();
// Memory metrics
metrics.setMaxMemory(runtime.maxMemory());
metrics.setTotalMemory(runtime.totalMemory());
metrics.setFreeMemory(runtime.freeMemory());
metrics.setUsedMemory(metrics.getTotalMemory() - metrics.getFreeMemory());
// CPU metrics
metrics.setAvailableProcessors(runtime.availableProcessors());
// Disk metrics
try {
Path appPath = Paths.get("/app");
metrics.setTotalDiskSpace(Files.getFileStore(appPath).getTotalSpace());
metrics.setUsableDiskSpace(Files.getFileStore(appPath).getUsableSpace());
} catch (IOException e) {
logger.warn("Could not get disk metrics", e);
}
// Container-specific metrics
metrics.setContainerMemoryLimit(getContainerMemoryLimit());
return metrics;
}
/**
* Container metrics data class
*/
public static class ContainerMetrics {
private long maxMemory;
private long totalMemory;
private long freeMemory;
private long usedMemory;
private int availableProcessors;
private long totalDiskSpace;
private long usableDiskSpace;
private long containerMemoryLimit;
// Getters and setters
public long getMaxMemory() { return maxMemory; }
public void setMaxMemory(long maxMemory) { this.maxMemory = maxMemory; }
public long getTotalMemory() { return totalMemory; }
public void setTotalMemory(long totalMemory) { this.totalMemory = totalMemory; }
public long getFreeMemory() { return freeMemory; }
public void setFreeMemory(long freeMemory) { this.freeMemory = freeMemory; }
public long getUsedMemory() { return usedMemory; }
public void setUsedMemory(long usedMemory) { this.usedMemory = usedMemory; }
public int getAvailableProcessors() { return availableProcessors; }
public void setAvailableProcessors(int availableProcessors) { this.availableProcessors = availableProcessors; }
public long getTotalDiskSpace() { return totalDiskSpace; }
public void setTotalDiskSpace(long totalDiskSpace) { this.totalDiskSpace = totalDiskSpace; }
public long getUsableDiskSpace() { return usableDiskSpace; }
public void setUsableDiskSpace(long usableDiskSpace) { this.usableDiskSpace = usableDiskSpace; }
public long getContainerMemoryLimit() { return containerMemoryLimit; }
public void setContainerMemoryLimit(long containerMemoryLimit) { this.containerMemoryLimit = containerMemoryLimit; }
public double getMemoryUsagePercent() {
return maxMemory > 0 ? (double) usedMemory / maxMemory * 100 : 0;
}
public double getDiskUsagePercent() {
return totalDiskSpace > 0 ? (double) (totalDiskSpace - usableDiskSpace) / totalDiskSpace * 100 : 0;
}
}
}
Kubernetes Deployment
Kubernetes Manifests
# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: ml-service
namespace: production
labels:
app: ml-service
version: v1.0.0
spec:
replicas: 3
selector:
matchLabels:
app: ml-service
template:
metadata:
labels:
app: ml-service
version: v1.0.0
spec:
containers:
- name: ml-service
image: your-registry/ml-service:v1.0.0
ports:
- containerPort: 8080
name: http
env:
- name: SPRING_PROFILES_ACTIVE
value: "production"
- name: JAVA_OPTS
value: "-Xmx2g -Xms1g -XX:+UseG1GC"
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "3Gi"
cpu: "2000m"
livenessProbe:
httpGet:
path: /actuator/health
port: 8080
initialDelaySeconds: 60
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /actuator/health/readiness
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
volumeMounts:
- name: models
mountPath: /app/models
- name: config
mountPath: /app/config
volumes:
- name: models
persistentVolumeClaim:
claimName: ml-models-pvc
- name: config
configMap:
name: ml-service-config
imagePullSecrets:
- name: registry-secret
---
# service.yaml
apiVersion: v1
kind: Service
metadata:
name: ml-service
namespace: production
labels:
app: ml-service
spec:
selector:
app: ml-service
ports:
- port: 80
targetPort: 8080
name: http
type: ClusterIP
---
# hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: ml-service-hpa
namespace: production
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: ml-service
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleUp:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 100
periodSeconds: 15
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
---
# configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: ml-service-config
namespace: production
data:
application.yml: |
spring:
profiles:
active: production
datasource:
url: jdbc:postgresql://postgres:5432/mlservice
username: ${DB_USERNAME}
password: ${DB_PASSWORD}
redis:
host: redis
port: 6379
ml:
model:
path: /app/models
cache-size: 1000
cache-ttl: 3600
inference:
max-concurrent-requests: 100
timeout: 30000
batch-size: 32
monitoring:
enabled: true
metrics-interval: 60
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus
endpoint:
health:
show-details: always
metrics:
export:
prometheus:
enabled: true
---
# persistent-volume.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ml-models-pvc
namespace: production
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
storageClassName: fast-ssd
Kubernetes Deployment Controller
package com.company.deployment.kubernetes;
import io.kubernetes.client.openapi.ApiClient;
import io.kubernetes.client.openapi.ApiException;
import io.kubernetes.client.openapi.Configuration;
import io.kubernetes.client.openapi.apis.AppsV1Api;
import io.kubernetes.client.openapi.apis.CoreV1Api;
import io.kubernetes.client.openapi.models.*;
import io.kubernetes.client.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* Kubernetes deployment controller for ML services
* Manages deployment, scaling, and monitoring of ML models in Kubernetes
*/
@Component
public class KubernetesDeploymentController {
private static final Logger logger = LoggerFactory.getLogger(KubernetesDeploymentController.class);
private final ApiClient apiClient;
private final AppsV1Api appsV1Api;
private final CoreV1Api coreV1Api;
public KubernetesDeploymentController() throws IOException {
// Initialize Kubernetes client
this.apiClient = Config.defaultClient();
Configuration.setDefaultApiClient(apiClient);
this.appsV1Api = new AppsV1Api();
this.coreV1Api = new CoreV1Api();
logger.info("Kubernetes deployment controller initialized");
}
/**
* Deploy ML model service to Kubernetes
*/
public void deployMLService(MLServiceDeployment deployment) {
logger.info("Deploying ML service: {}", deployment.getName());
try {
// Create namespace if it doesn't exist
createNamespaceIfNotExists(deployment.getNamespace());
// Create ConfigMap for configuration
createConfigMap(deployment);
// Create deployment
createDeployment(deployment);
// Create service
createService(deployment);
// Create HPA (Horizontal Pod Autoscaler)
createHPA(deployment);
logger.info("ML service deployed successfully: {}", deployment.getName());
} catch (Exception e) {
logger.error("Failed to deploy ML service: {}", deployment.getName(), e);
throw new RuntimeException("Deployment failed", e);
}
}
/**
* Create namespace if it doesn't exist
*/
private void createNamespaceIfNotExists(String namespace) throws ApiException {
try {
coreV1Api.readNamespace(namespace, null);
logger.info("Namespace {} already exists", namespace);
} catch (ApiException e) {
if (e.getCode() == 404) {
// Namespace doesn't exist, create it
V1Namespace ns = new V1Namespace()
.metadata(new V1ObjectMeta().name(namespace));
coreV1Api.createNamespace(ns, null, null, null, null);
logger.info("Created namespace: {}", namespace);
} else {
throw e;
}
}
}
/**
* Create ConfigMap for ML service configuration
*/
private void createConfigMap(MLServiceDeployment deployment) throws ApiException {
String configMapName = deployment.getName() + "-config";
Map<String, String> data = new HashMap<>();
data.put("application.yml", deployment.getApplicationConfig());
data.put("model.properties", deployment.getModelConfig());
V1ConfigMap configMap = new V1ConfigMap()
.metadata(new V1ObjectMeta()
.name(configMapName)
.namespace(deployment.getNamespace())
.labels(deployment.getLabels()))
.data(data);
try {
coreV1Api.createNamespacedConfigMap(
deployment.getNamespace(), configMap, null, null, null, null);
logger.info("Created ConfigMap: {}", configMapName);
} catch (ApiException e) {
if (e.getCode() == 409) {
// ConfigMap already exists, update it
coreV1Api.replaceNamespacedConfigMap(
configMapName, deployment.getNamespace(), configMap, null, null, null, null);
logger.info("Updated ConfigMap: {}", configMapName);
} else {
throw e;
}
}
}
/**
* Create Kubernetes deployment
*/
private void createDeployment(MLServiceDeployment deployment) throws ApiException {
V1Deployment k8sDeployment = new V1Deployment()
.metadata(new V1ObjectMeta()
.name(deployment.getName())
.namespace(deployment.getNamespace())
.labels(deployment.getLabels()))
.spec(new V1DeploymentSpec()
.replicas(deployment.getReplicas())
.selector(new V1LabelSelector()
.matchLabels(deployment.getLabels()))
.template(new V1PodTemplateSpec()
.metadata(new V1ObjectMeta()
.labels(deployment.getLabels()))
.spec(createPodSpec(deployment))));
try {
appsV1Api.createNamespacedDeployment(
deployment.getNamespace(), k8sDeployment, null, null, null, null);
logger.info("Created deployment: {}", deployment.getName());
} catch (ApiException e) {
if (e.getCode() == 409) {
// Deployment already exists, update it
appsV1Api.replaceNamespacedDeployment(
deployment.getName(), deployment.getNamespace(), k8sDeployment, null, null, null, null);
logger.info("Updated deployment: {}", deployment.getName());
} else {
throw e;
}
}
}
/**
* Create pod specification
*/
private V1PodSpec createPodSpec(MLServiceDeployment deployment) {
// Container specification
V1Container container = new V1Container()
.name(deployment.getName())
.image(deployment.getImage())
.addPortsItem(new V1ContainerPort().containerPort(8080))
.env(deployment.getEnvironmentVariables())
.resources(new V1ResourceRequirements()
.requests(deployment.getResourceRequests())
.limits(deployment.getResourceLimits()))
.livenessProbe(new V1Probe()
.httpGet(new V1HTTPGetAction()
.path("/actuator/health")
.port(new IntOrString(8080)))
.initialDelaySeconds(60)
.periodSeconds(30)
.timeoutSeconds(10)
.failureThreshold(3))
.readinessProbe(new V1Probe()
.httpGet(new V1HTTPGetAction()
.path("/actuator/health/readiness")
.port(new IntOrString(8080)))
.initialDelaySeconds(30)
.periodSeconds(10)
.timeoutSeconds(5)
.failureThreshold(3))
.addVolumeMountsItem(new V1VolumeMount()
.name("models")
.mountPath("/app/models"))
.addVolumeMountsItem(new V1VolumeMount()
.name("config")
.mountPath("/app/config"));
// Pod specification
return new V1PodSpec()
.addContainersItem(container)
.addVolumesItem(new V1Volume()
.name("models")
.persistentVolumeClaim(new V1PersistentVolumeClaimVolumeSource()
.claimName("ml-models-pvc")))
.addVolumesItem(new V1Volume()
.name("config")
.configMap(new V1ConfigMapVolumeSource()
.name(deployment.getName() + "-config")))
.addImagePullSecretsItem(new V1LocalObjectReference()
.name("registry-secret"));
}
/**
* Create Kubernetes service
*/
private void createService(MLServiceDeployment deployment) throws ApiException {
V1Service service = new V1Service()
.metadata(new V1ObjectMeta()
.name(deployment.getName())
.namespace(deployment.getNamespace())
.labels(deployment.getLabels()))
.spec(new V1ServiceSpec()
.selector(deployment.getLabels())
.type("ClusterIP")
.addPortsItem(new V1ServicePort()
.port(80)
.targetPort(new IntOrString(8080))
.name("http")));
try {
coreV1Api.createNamespacedService(
deployment.getNamespace(), service, null, null, null, null);
logger.info("Created service: {}", deployment.getName());
} catch (ApiException e) {
if (e.getCode() == 409) {
// Service already exists, update it
coreV1Api.replaceNamespacedService(
deployment.getName(), deployment.getNamespace(), service, null, null, null, null);
logger.info("Updated service: {}", deployment.getName());
} else {
throw e;
}
}
}
/**
* Create Horizontal Pod Autoscaler
*/
private void createHPA(MLServiceDeployment deployment) throws ApiException {
// HPA creation would require the autoscaling API
// This is a simplified version
logger.info("HPA creation for {} (implementation depends on autoscaling API)", deployment.getName());
}
/**
* Scale deployment
*/
public void scaleDeployment(String name, String namespace, int replicas) {
logger.info("Scaling deployment {} to {} replicas", name, replicas);
try {
// Get current deployment
V1Deployment deployment = appsV1Api.readNamespacedDeployment(
name, namespace, null);
// Update replica count
deployment.getSpec().setReplicas(replicas);
// Apply changes
appsV1Api.replaceNamespacedDeployment(
name, namespace, deployment, null, null, null, null);
logger.info("Scaled deployment {} to {} replicas", name, replicas);
} catch (ApiException e) {
logger.error("Failed to scale deployment: {}", name, e);
throw new RuntimeException("Scaling failed", e);
}
}
/**
* Get deployment status
*/
public DeploymentStatus getDeploymentStatus(String name, String namespace) {
try {
V1Deployment deployment = appsV1Api.readNamespacedDeployment(
name, namespace, null);
V1DeploymentStatus status = deployment.getStatus();
return new DeploymentStatus(
status.getReplicas(),
status.getReadyReplicas(),
status.getAvailableReplicas(),
status.getUnavailableReplicas()
);
} catch (ApiException e) {
logger.error("Failed to get deployment status: {}", name, e);
throw new RuntimeException("Failed to get deployment status", e);
}
}
/**
* Delete deployment
*/
public void deleteDeployment(String name, String namespace) {
logger.info("Deleting deployment: {}", name);
try {
// Delete deployment
appsV1Api.deleteNamespacedDeployment(
name, namespace, null, null, null, null, null, null);
// Delete service
coreV1Api.deleteNamespacedService(
name, namespace, null, null, null, null, null, null);
// Delete ConfigMap
coreV1Api.deleteNamespacedConfigMap(
name + "-config", namespace, null, null, null, null, null, null);
logger.info("Deleted deployment: {}", name);
} catch (ApiException e) {
logger.error("Failed to delete deployment: {}", name, e);
throw new RuntimeException("Deletion failed", e);
}
}
/**
* ML Service deployment configuration
*/
public static class MLServiceDeployment {
private String name;
private String namespace;
private String image;
private int replicas;
private Map<String, String> labels;
private Map<String, String> resourceRequests;
private Map<String, String> resourceLimits;
private java.util.List<V1EnvVar> environmentVariables;
private String applicationConfig;
private String modelConfig;
// Constructors, getters, and setters
public MLServiceDeployment(String name, String namespace, String image) {
this.name = name;
this.namespace = namespace;
this.image = image;
this.replicas = 3;
this.labels = new HashMap<>();
this.labels.put("app", name);
this.resourceRequests = new HashMap<>();
this.resourceLimits = new HashMap<>();
this.environmentVariables = new java.util.ArrayList<>();
}
// Getters and setters
public String getName() { return name; }
public void setName(String name) { this.name = name; }
public String getNamespace() { return namespace; }
public void setNamespace(String namespace) { this.namespace = namespace; }
public String getImage() { return image; }
public void setImage(String image) { this.image = image; }
public int getReplicas() { return replicas; }
public void setReplicas(int replicas) { this.replicas = replicas; }
public Map<String, String> getLabels() { return labels; }
public void setLabels(Map<String, String> labels) { this.labels = labels; }
public Map<String, String> getResourceRequests() { return resourceRequests; }
public void setResourceRequests(Map<String, String> resourceRequests) { this.resourceRequests = resourceRequests; }
public Map<String, String> getResourceLimits() { return resourceLimits; }
public void setResourceLimits(Map<String, String> resourceLimits) { this.resourceLimits = resourceLimits; }
public java.util.List<V1EnvVar> getEnvironmentVariables() { return environmentVariables; }
public void setEnvironmentVariables(java.util.List<V1EnvVar> environmentVariables) { this.environmentVariables = environmentVariables; }
public String getApplicationConfig() { return applicationConfig; }
public void setApplicationConfig(String applicationConfig) { this.applicationConfig = applicationConfig; }
public String getModelConfig() { return modelConfig; }
public void setModelConfig(String modelConfig) { this.modelConfig = modelConfig; }
}
/**
* Deployment status information
*/
public static class DeploymentStatus {
private final Integer replicas;
private final Integer readyReplicas;
private final Integer availableReplicas;
private final Integer unavailableReplicas;
public DeploymentStatus(Integer replicas, Integer readyReplicas,
Integer availableReplicas, Integer unavailableReplicas) {
this.replicas = replicas;
this.readyReplicas = readyReplicas;
this.availableReplicas = availableReplicas;
this.unavailableReplicas = unavailableReplicas;
}
public Integer getReplicas() { return replicas; }
public Integer getReadyReplicas() { return readyReplicas; }
public Integer getAvailableReplicas() { return availableReplicas; }
public Integer getUnavailableReplicas() { return unavailableReplicas; }
public boolean isHealthy() {
return readyReplicas != null && replicas != null && readyReplicas.equals(replicas);
}
}
}
Key Learning Points:
- Container Optimization: JVM tuning and resource optimization for containers
- Health Checks: Comprehensive health checking for container environments
- Kubernetes Integration: Native Kubernetes deployment and management
- Auto-scaling: Horizontal Pod Autoscaler for dynamic scaling
- Resource Management: Proper resource requests and limits
- Configuration Management: ConfigMaps and environment variables
- Monitoring: Built-in monitoring and metrics collection
This covers the containerization and Kubernetes deployment aspects. Would you like me to continue with the remaining tutorials (Enterprise Patterns and ML Optimization)?