docker-compose-production by thebushidocollective/han
npx skills add https://github.com/thebushidocollective/han --skill docker-compose-production具备安全性、可靠性和可扩展性最佳实践的生产就绪 Docker Compose 配置。
包含基本配置的全面生产模板:
version: '3.8'
services:
nginx:
image: nginx:1.25-alpine
container_name: production-nginx
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- nginx-cache:/var/cache/nginx
- nginx-logs:/var/log/nginx
networks:
- frontend
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
deploy:
resources:
limits:
cpus: '1.0'
memory: 512M
reservations:
cpus: '0.5'
memory: 256M
api:
image: mycompany/api:${API_VERSION:-latest}
container_name: production-api
restart: unless-stopped
networks:
- frontend
- backend
environment:
NODE_ENV: production
DATABASE_URL: postgresql://postgres:5432/production_db
REDIS_URL: redis://cache:6379
LOG_LEVEL: ${LOG_LEVEL:-info}
PORT: 3000
env_file:
- .env.production
secrets:
- db_password
- jwt_secret
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
depends_on:
database:
condition: service_healthy
cache:
condition: service_healthy
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '1.0'
memory: 1G
worker:
image: mycompany/worker:${WORKER_VERSION:-latest}
container_name: production-worker
restart: unless-stopped
networks:
- backend
environment:
NODE_ENV: production
DATABASE_URL: postgresql://postgres:5432/production_db
REDIS_URL: redis://cache:6379
QUEUE_NAME: ${QUEUE_NAME:-default}
env_file:
- .env.production
secrets:
- db_password
depends_on:
database:
condition: service_healthy
cache:
condition: service_healthy
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
deploy:
replicas: 3
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
database:
image: postgres:15-alpine
container_name: production-db
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_DB: production_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
POSTGRES_INITDB_ARGS: "-E UTF8 --locale=en_US.UTF-8"
secrets:
- db_password
volumes:
- postgres-data:/var/lib/postgresql/data
- ./db/init:/docker-entrypoint-initdb.d:ro
- postgres-logs:/var/log/postgresql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres -d production_db"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
command:
- "postgres"
- "-c"
- "max_connections=200"
- "-c"
- "shared_buffers=256MB"
- "-c"
- "effective_cache_size=1GB"
- "-c"
- "maintenance_work_mem=64MB"
- "-c"
- "checkpoint_completion_target=0.9"
- "-c"
- "wal_buffers=16MB"
- "-c"
- "default_statistics_target=100"
- "-c"
- "random_page_cost=1.1"
- "-c"
- "effective_io_concurrency=200"
- "-c"
- "work_mem=1MB"
- "-c"
- "min_wal_size=1GB"
- "-c"
- "max_wal_size=4GB"
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '1.0'
memory: 1G
cache:
image: redis:7-alpine
container_name: production-cache
restart: unless-stopped
networks:
- backend
command: >
redis-server
--appendonly yes
--appendfsync everysec
--maxmemory 512mb
--maxmemory-policy allkeys-lru
--requirepass ${REDIS_PASSWORD}
volumes:
- redis-data:/data
healthcheck:
test: ["CMD", "redis-cli", "--raw", "incr", "ping"]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
deploy:
resources:
limits:
cpus: '1.0'
memory: 768M
reservations:
cpus: '0.5'
memory: 512M
backup:
image: prodrigestivill/postgres-backup-local:15-alpine
container_name: production-backup
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_HOST: database
POSTGRES_DB: production_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
SCHEDULE: "@daily"
BACKUP_KEEP_DAYS: 7
BACKUP_KEEP_WEEKS: 4
BACKUP_KEEP_MONTHS: 6
HEALTHCHECK_PORT: 8080
secrets:
- db_password
volumes:
- ./backups:/backups
depends_on:
database:
condition: service_healthy
networks:
frontend:
driver: bridge
backend:
driver: bridge
internal: true
volumes:
postgres-data:
driver: local
driver_opts:
type: none
o: bind
device: /data/postgres
redis-data:
driver: local
nginx-cache:
driver: local
nginx-logs:
driver: local
postgres-logs:
driver: local
secrets:
db_password:
file: ./secrets/db_password.txt
jwt_secret:
file: ./secrets/jwt_secret.txt
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
生产安全配置:
version: '3.8'
services:
web:
image: nginx:1.25-alpine
restart: unless-stopped
read_only: true
tmpfs:
- /var/cache/nginx
- /var/run
cap_drop:
- ALL
cap_add:
- NET_BIND_SERVICE
security_opt:
- no-new-privileges:true
- seccomp:./security/seccomp-profile.json
user: "nginx:nginx"
networks:
- frontend
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
api:
image: mycompany/api:${VERSION}
restart: unless-stopped
read_only: true
tmpfs:
- /tmp
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
- seccomp:./security/seccomp-profile.json
user: "1000:1000"
networks:
- frontend
- backend
environment:
NODE_ENV: production
env_file:
- .env.production
secrets:
- source: db_password
target: /run/secrets/db_password
mode: 0400
- source: api_key
target: /run/secrets/api_key
mode: 0400
database:
image: postgres:15-alpine
restart: unless-stopped
read_only: true
tmpfs:
- /tmp
- /run/postgresql
cap_drop:
- ALL
cap_add:
- CHOWN
- DAC_OVERRIDE
- FOWNER
- SETGID
- SETUID
security_opt:
- no-new-privileges:true
user: "postgres:postgres"
networks:
- backend
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
secrets:
- source: db_password
mode: 0400
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
frontend:
driver: bridge
driver_opts:
com.docker.network.bridge.enable_icc: "false"
backend:
driver: bridge
internal: true
volumes:
postgres-data:
secrets:
db_password:
file: ./secrets/db_password.txt
api_key:
file: ./secrets/api_key.txt
全面的资源管理:
version: '3.8'
services:
web:
image: nginx:alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '0.50'
memory: 256M
pids: 100
reservations:
cpus: '0.25'
memory: 128M
ulimits:
nofile:
soft: 1024
hard: 2048
nproc:
soft: 64
hard: 128
api:
image: node:18-alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
pids: 200
reservations:
cpus: '1.0'
memory: 1G
ulimits:
nofile:
soft: 4096
hard: 8192
nproc:
soft: 256
hard: 512
database:
image: postgres:15-alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '4.0'
memory: 4G
pids: 500
reservations:
cpus: '2.0'
memory: 2G
ulimits:
nofile:
soft: 8192
hard: 16384
shm_size: '256mb'
volumes:
- postgres-data:/var/lib/postgresql/data
cache:
image: redis:7-alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
sysctls:
net.core.somaxconn: 1024
volumes:
- redis-data:/data
volumes:
postgres-data:
redis-data:
具有负载均衡的多个副本:
version: '3.8'
services:
loadbalancer:
image: nginx:alpine
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx-lb.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
networks:
- frontend
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
interval: 10s
timeout: 5s
retries: 3
deploy:
resources:
limits:
cpus: '1.0'
memory: 512M
api:
image: mycompany/api:${VERSION}
restart: unless-stopped
networks:
- frontend
- backend
environment:
NODE_ENV: production
DATABASE_URL: postgresql://postgres:5432/app
INSTANCE_ID: "{{.Task.Slot}}"
deploy:
replicas: 5
update_config:
parallelism: 2
delay: 10s
order: start-first
failure_action: rollback
rollback_config:
parallelism: 2
delay: 10s
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
database:
image: postgres:15-alpine
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
secrets:
- db_password
volumes:
- postgres-data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready"]
interval: 10s
timeout: 5s
retries: 5
deploy:
resources:
limits:
cpus: '4.0'
memory: 4G
database-replica:
image: postgres:15-alpine
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
POSTGRES_PRIMARY_HOST: database
POSTGRES_PRIMARY_PORT: 5432
secrets:
- db_password
volumes:
- postgres-replica-data:/var/lib/postgresql/data
- ./db/replica-setup.sh:/docker-entrypoint-initdb.d/replica-setup.sh:ro
depends_on:
database:
condition: service_healthy
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
networks:
frontend:
driver: bridge
backend:
driver: bridge
internal: true
volumes:
postgres-data:
postgres-replica-data:
secrets:
db_password:
file: ./secrets/db_password.txt
生产监控栈:
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--web.enable-lifecycle'
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./monitoring/alerts:/etc/prometheus/alerts:ro
- prometheus-data:/prometheus
networks:
- monitoring
ports:
- "9090:9090"
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
cpus: '1.0'
memory: 2G
grafana:
image: grafana/grafana:latest
container_name: grafana
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana_password
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource
GF_SERVER_ROOT_URL: https://monitoring.example.com
secrets:
- grafana_password
volumes:
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
- ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
networks:
- monitoring
- frontend
ports:
- "3001:3000"
depends_on:
- prometheus
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/api/health"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
cpus: '0.5'
memory: 512M
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
restart: unless-stopped
command:
- '--path.rootfs=/host'
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
volumes:
- /:/host:ro,rslave
networks:
- monitoring
ports:
- "9100:9100"
deploy:
resources:
limits:
cpus: '0.2'
memory: 128M
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
restart: unless-stopped
privileged: true
devices:
- /dev/kmsg
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk:/dev/disk:ro
networks:
- monitoring
ports:
- "8080:8080"
deploy:
resources:
limits:
cpus: '0.3'
memory: 256M
loki:
image: grafana/loki:latest
container_name: loki
restart: unless-stopped
command: -config.file=/etc/loki/local-config.yaml
volumes:
- ./monitoring/loki-config.yml:/etc/loki/local-config.yaml:ro
- loki-data:/loki
networks:
- monitoring
ports:
- "3100:3100"
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
promtail:
image: grafana/promtail:latest
container_name: promtail
restart: unless-stopped
command: -config.file=/etc/promtail/config.yml
volumes:
- ./monitoring/promtail-config.yml:/etc/promtail/config.yml:ro
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
networks:
- monitoring
depends_on:
- loki
deploy:
resources:
limits:
cpus: '0.2'
memory: 256M
networks:
monitoring:
driver: bridge
frontend:
driver: bridge
volumes:
prometheus-data:
grafana-data:
loki-data:
secrets:
grafana_password:
file: ./secrets/grafana_password.txt
集中式日志设置:
version: '3.8'
services:
app:
image: myapp:latest
restart: unless-stopped
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
labels: "app,environment,version"
tag: "{{.Name}}/{{.ID}}"
labels:
app: "myapp"
environment: "production"
version: "${VERSION}"
nginx:
image: nginx:alpine
restart: unless-stopped
logging:
driver: "syslog"
options:
syslog-address: "tcp://logserver:514"
tag: "nginx"
syslog-format: "rfc5424micro"
api:
image: api:latest
restart: unless-stopped
logging:
driver: "fluentd"
options:
fluentd-address: "localhost:24224"
tag: "docker.{{.Name}}"
fluentd-async-connect: "true"
fluentd-retry-wait: "1s"
fluentd-max-retries: "30"
database:
image: postgres:15-alpine
restart: unless-stopped
logging:
driver: "json-file"
options:
max-size: "50m"
max-file: "10"
compress: "true"
volumes:
- postgres-data:/var/lib/postgresql/data
volumes:
postgres-data:
多环境设置:
version: '3.8'
services:
app:
image: myapp:${VERSION:-latest}
restart: unless-stopped
environment:
NODE_ENV: ${NODE_ENV:-production}
LOG_LEVEL: ${LOG_LEVEL:-info}
PORT: ${APP_PORT:-3000}
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@database:5432/${DB_NAME}
REDIS_URL: redis://:${REDIS_PASSWORD}@cache:6379
JWT_SECRET: ${JWT_SECRET}
API_TIMEOUT: ${API_TIMEOUT:-30000}
MAX_CONNECTIONS: ${MAX_CONNECTIONS:-100}
env_file:
- .env.${ENVIRONMENT:-production}
- .env.secrets
networks:
- app-network
database:
image: postgres:${POSTGRES_VERSION:-15}-alpine
restart: unless-stopped
environment:
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_INITDB_ARGS: ${POSTGRES_INITDB_ARGS:--E UTF8}
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- app-network
cache:
image: redis:${REDIS_VERSION:-7}-alpine
restart: unless-stopped
command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory ${REDIS_MAX_MEMORY:-256mb}
volumes:
- redis-data:/data
networks:
- app-network
networks:
app-network:
driver: bridge
volumes:
postgres-data:
redis-data:
全面的健康监控:
version: '3.8'
services:
web:
image: nginx:alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
api:
image: node:18-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "node", "healthcheck.js"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
depends_on:
database:
condition: service_healthy
cache:
condition: service_healthy
database:
image: postgres:15-alpine
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres -d production_db || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
volumes:
- postgres-data:/var/lib/postgresql/data
cache:
image: redis:7-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
volumes:
- redis-data:/data
queue:
image: rabbitmq:3-management-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "rabbitmq-diagnostics", "ping"]
interval: 30s
timeout: 10s
retries: 5
start_period: 60s
volumes:
- rabbitmq-data:/var/lib/rabbitmq
volumes:
postgres-data:
redis-data:
rabbitmq-data:
自动化备份配置:
version: '3.8'
services:
database:
image: postgres:15-alpine
restart: unless-stopped
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
secrets:
- db_password
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- backend
db-backup:
image: prodrigestivill/postgres-backup-local:15-alpine
restart: unless-stopped
environment:
POSTGRES_HOST: database
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
SCHEDULE: "@daily"
BACKUP_KEEP_DAYS: 7
BACKUP_KEEP_WEEKS: 4
BACKUP_KEEP_MONTHS: 6
BACKUP_DIR: /backups
HEALTHCHECK_PORT: 8080
secrets:
- db_password
volumes:
- ./backups:/backups
- ./backup-scripts:/scripts:ro
networks:
- backend
depends_on:
database:
condition: service_healthy
volume-backup:
image: futurice/docker-volume-backup:2.6.0
restart: unless-stopped
environment:
BACKUP_CRON_EXPRESSION: "0 2 * * *"
BACKUP_FILENAME: "backup-%Y-%m-%d_%H-%M-%S.tar.gz"
BACKUP_RETENTION_DAYS: 30
AWS_S3_BUCKET_NAME: ${S3_BACKUP_BUCKET}
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY_FILE: /run/secrets/aws_secret
secrets:
- aws_secret
volumes:
- postgres-data:/backup/postgres-data:ro
- redis-data:/backup/redis-data:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./backup-archive:/archive
networks:
backend:
driver: bridge
volumes:
postgres-data:
redis-data:
secrets:
db_password:
file: ./secrets/db_password.txt
aws_secret:
file: ./secrets/aws_secret.txt
当您需要时,使用 docker-compose-production:
始终使用版本固定:固定特定镜像版本,而不是使用 latest,以确保可重复的部署。
实施健康检查:为所有服务配置健康检查,以实现自动恢复和适当的依赖管理。
设置资源限制:始终定义 CPU 和内存限制,以防止资源耗尽并确保可预测的性能。
使用密钥管理:切勿将密钥存储在环境变量或 compose 文件中;使用 Docker 密钥或外部密钥管理器。
配置重启策略:对生产服务使用 restart: unless-stopped,以确保从故障中自动恢复。
实施适当的日志记录:配置具有轮换和保留策略的结构化日志记录,以管理磁盘空间。
使用只读文件系统:尽可能设置 read_only: true,并对临时数据使用 tmpfs 以提高安全性。
丢弃不必要的权限:使用 cap_drop: ALL 并仅添加所需权限,遵循最小权限原则。
启用监控:部署监控和可观测性工具,以跟踪应用程序健康状态和性能指标。
实施自动化备份:配置具有保留策略的定期自动化备份,并测试恢复流程。
使用内部网络:将后端网络标记为内部网络,以防止外部直接访问数据库和缓存。
配置更新策略:为零停机部署定义更新和回滚配置。
实施资源预留:设置资源预留,以保证关键服务的最低资源。
使用多阶段依赖:配置带有健康检查条件的 depends_on,以确保正确的启动顺序。
记录配置:维护生产配置和部署流程的全面文档。
使用最新标签:使用 latest 或未版本化的镜像可能导致镜像更新时出现意外行为;始终固定版本。
忽略资源限制:不设置资源限制可能导致一个服务消耗所有可用资源并使其他服务崩溃。
缺少健康检查:没有健康检查,Docker 无法确定服务是否真正就绪或需要重启。
以明文存储密钥:将密钥提交到版本控制或存储在环境变量中会暴露敏感数据。
不测试备份:创建备份而不定期测试恢复流程,会导致实际事件中的数据丢失。
暴露不必要的端口:将所有服务端口发布到主机增加了攻击面;仅暴露所需内容。
以 root 身份运行:不指定非 root 用户会使容器容易受到权限提升攻击。
忽略日志轮换:没有日志轮换,日志可能会填满磁盘空间并导致服务或主机崩溃。
缺少监控:部署时没有监控,无法在问题影响用户之前检测和诊断问题。
不使用网络:在默认网络上运行所有服务会阻止适当的网络分段并增加安全风险。
忘记就绪检查:在依赖项就绪之前启动依赖服务会导致连接失败和重启。
硬编码配置:在 compose 文件中嵌入环境特定值,使得难以部署到多个环境。
忽略安全更新:不定期更新基础镜像会使服务容易受到已知安全问题的影响。
启动周期不足:将健康检查启动周期设置得太短,会导致应用程序启动缓慢时出现误报。
不考虑扩展性:设计服务时不考虑水平扩展,使得难以处理增加的负载。
每周安装次数
97
仓库
GitHub 星标数
128
首次出现
2026年1月22日
安全审计
已安装于
opencode87
codex86
gemini-cli85
cursor85
github-copilot81
cline70
Production-ready Docker Compose configurations with security, reliability, and scalability best practices.
A comprehensive production template with essential configurations:
version: '3.8'
services:
nginx:
image: nginx:1.25-alpine
container_name: production-nginx
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- nginx-cache:/var/cache/nginx
- nginx-logs:/var/log/nginx
networks:
- frontend
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
deploy:
resources:
limits:
cpus: '1.0'
memory: 512M
reservations:
cpus: '0.5'
memory: 256M
api:
image: mycompany/api:${API_VERSION:-latest}
container_name: production-api
restart: unless-stopped
networks:
- frontend
- backend
environment:
NODE_ENV: production
DATABASE_URL: postgresql://postgres:5432/production_db
REDIS_URL: redis://cache:6379
LOG_LEVEL: ${LOG_LEVEL:-info}
PORT: 3000
env_file:
- .env.production
secrets:
- db_password
- jwt_secret
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
depends_on:
database:
condition: service_healthy
cache:
condition: service_healthy
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '1.0'
memory: 1G
worker:
image: mycompany/worker:${WORKER_VERSION:-latest}
container_name: production-worker
restart: unless-stopped
networks:
- backend
environment:
NODE_ENV: production
DATABASE_URL: postgresql://postgres:5432/production_db
REDIS_URL: redis://cache:6379
QUEUE_NAME: ${QUEUE_NAME:-default}
env_file:
- .env.production
secrets:
- db_password
depends_on:
database:
condition: service_healthy
cache:
condition: service_healthy
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
deploy:
replicas: 3
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
database:
image: postgres:15-alpine
container_name: production-db
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_DB: production_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
POSTGRES_INITDB_ARGS: "-E UTF8 --locale=en_US.UTF-8"
secrets:
- db_password
volumes:
- postgres-data:/var/lib/postgresql/data
- ./db/init:/docker-entrypoint-initdb.d:ro
- postgres-logs:/var/log/postgresql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres -d production_db"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
command:
- "postgres"
- "-c"
- "max_connections=200"
- "-c"
- "shared_buffers=256MB"
- "-c"
- "effective_cache_size=1GB"
- "-c"
- "maintenance_work_mem=64MB"
- "-c"
- "checkpoint_completion_target=0.9"
- "-c"
- "wal_buffers=16MB"
- "-c"
- "default_statistics_target=100"
- "-c"
- "random_page_cost=1.1"
- "-c"
- "effective_io_concurrency=200"
- "-c"
- "work_mem=1MB"
- "-c"
- "min_wal_size=1GB"
- "-c"
- "max_wal_size=4GB"
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '1.0'
memory: 1G
cache:
image: redis:7-alpine
container_name: production-cache
restart: unless-stopped
networks:
- backend
command: >
redis-server
--appendonly yes
--appendfsync everysec
--maxmemory 512mb
--maxmemory-policy allkeys-lru
--requirepass ${REDIS_PASSWORD}
volumes:
- redis-data:/data
healthcheck:
test: ["CMD", "redis-cli", "--raw", "incr", "ping"]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
deploy:
resources:
limits:
cpus: '1.0'
memory: 768M
reservations:
cpus: '0.5'
memory: 512M
backup:
image: prodrigestivill/postgres-backup-local:15-alpine
container_name: production-backup
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_HOST: database
POSTGRES_DB: production_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
SCHEDULE: "@daily"
BACKUP_KEEP_DAYS: 7
BACKUP_KEEP_WEEKS: 4
BACKUP_KEEP_MONTHS: 6
HEALTHCHECK_PORT: 8080
secrets:
- db_password
volumes:
- ./backups:/backups
depends_on:
database:
condition: service_healthy
networks:
frontend:
driver: bridge
backend:
driver: bridge
internal: true
volumes:
postgres-data:
driver: local
driver_opts:
type: none
o: bind
device: /data/postgres
redis-data:
driver: local
nginx-cache:
driver: local
nginx-logs:
driver: local
postgres-logs:
driver: local
secrets:
db_password:
file: ./secrets/db_password.txt
jwt_secret:
file: ./secrets/jwt_secret.txt
Production security configurations:
version: '3.8'
services:
web:
image: nginx:1.25-alpine
restart: unless-stopped
read_only: true
tmpfs:
- /var/cache/nginx
- /var/run
cap_drop:
- ALL
cap_add:
- NET_BIND_SERVICE
security_opt:
- no-new-privileges:true
- seccomp:./security/seccomp-profile.json
user: "nginx:nginx"
networks:
- frontend
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
api:
image: mycompany/api:${VERSION}
restart: unless-stopped
read_only: true
tmpfs:
- /tmp
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
- seccomp:./security/seccomp-profile.json
user: "1000:1000"
networks:
- frontend
- backend
environment:
NODE_ENV: production
env_file:
- .env.production
secrets:
- source: db_password
target: /run/secrets/db_password
mode: 0400
- source: api_key
target: /run/secrets/api_key
mode: 0400
database:
image: postgres:15-alpine
restart: unless-stopped
read_only: true
tmpfs:
- /tmp
- /run/postgresql
cap_drop:
- ALL
cap_add:
- CHOWN
- DAC_OVERRIDE
- FOWNER
- SETGID
- SETUID
security_opt:
- no-new-privileges:true
user: "postgres:postgres"
networks:
- backend
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
secrets:
- source: db_password
mode: 0400
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
frontend:
driver: bridge
driver_opts:
com.docker.network.bridge.enable_icc: "false"
backend:
driver: bridge
internal: true
volumes:
postgres-data:
secrets:
db_password:
file: ./secrets/db_password.txt
api_key:
file: ./secrets/api_key.txt
Comprehensive resource management:
version: '3.8'
services:
web:
image: nginx:alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '0.50'
memory: 256M
pids: 100
reservations:
cpus: '0.25'
memory: 128M
ulimits:
nofile:
soft: 1024
hard: 2048
nproc:
soft: 64
hard: 128
api:
image: node:18-alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
pids: 200
reservations:
cpus: '1.0'
memory: 1G
ulimits:
nofile:
soft: 4096
hard: 8192
nproc:
soft: 256
hard: 512
database:
image: postgres:15-alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '4.0'
memory: 4G
pids: 500
reservations:
cpus: '2.0'
memory: 2G
ulimits:
nofile:
soft: 8192
hard: 16384
shm_size: '256mb'
volumes:
- postgres-data:/var/lib/postgresql/data
cache:
image: redis:7-alpine
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
sysctls:
net.core.somaxconn: 1024
volumes:
- redis-data:/data
volumes:
postgres-data:
redis-data:
Multiple replicas with load balancing:
version: '3.8'
services:
loadbalancer:
image: nginx:alpine
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx-lb.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
networks:
- frontend
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
interval: 10s
timeout: 5s
retries: 3
deploy:
resources:
limits:
cpus: '1.0'
memory: 512M
api:
image: mycompany/api:${VERSION}
restart: unless-stopped
networks:
- frontend
- backend
environment:
NODE_ENV: production
DATABASE_URL: postgresql://postgres:5432/app
INSTANCE_ID: "{{.Task.Slot}}"
deploy:
replicas: 5
update_config:
parallelism: 2
delay: 10s
order: start-first
failure_action: rollback
rollback_config:
parallelism: 2
delay: 10s
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
database:
image: postgres:15-alpine
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
secrets:
- db_password
volumes:
- postgres-data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready"]
interval: 10s
timeout: 5s
retries: 5
deploy:
resources:
limits:
cpus: '4.0'
memory: 4G
database-replica:
image: postgres:15-alpine
restart: unless-stopped
networks:
- backend
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
POSTGRES_PRIMARY_HOST: database
POSTGRES_PRIMARY_PORT: 5432
secrets:
- db_password
volumes:
- postgres-replica-data:/var/lib/postgresql/data
- ./db/replica-setup.sh:/docker-entrypoint-initdb.d/replica-setup.sh:ro
depends_on:
database:
condition: service_healthy
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
networks:
frontend:
driver: bridge
backend:
driver: bridge
internal: true
volumes:
postgres-data:
postgres-replica-data:
secrets:
db_password:
file: ./secrets/db_password.txt
Production monitoring stack:
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--web.enable-lifecycle'
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./monitoring/alerts:/etc/prometheus/alerts:ro
- prometheus-data:/prometheus
networks:
- monitoring
ports:
- "9090:9090"
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
cpus: '1.0'
memory: 2G
grafana:
image: grafana/grafana:latest
container_name: grafana
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana_password
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource
GF_SERVER_ROOT_URL: https://monitoring.example.com
secrets:
- grafana_password
volumes:
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
- ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
networks:
- monitoring
- frontend
ports:
- "3001:3000"
depends_on:
- prometheus
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/api/health"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
cpus: '0.5'
memory: 512M
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
restart: unless-stopped
command:
- '--path.rootfs=/host'
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
volumes:
- /:/host:ro,rslave
networks:
- monitoring
ports:
- "9100:9100"
deploy:
resources:
limits:
cpus: '0.2'
memory: 128M
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
restart: unless-stopped
privileged: true
devices:
- /dev/kmsg
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk:/dev/disk:ro
networks:
- monitoring
ports:
- "8080:8080"
deploy:
resources:
limits:
cpus: '0.3'
memory: 256M
loki:
image: grafana/loki:latest
container_name: loki
restart: unless-stopped
command: -config.file=/etc/loki/local-config.yaml
volumes:
- ./monitoring/loki-config.yml:/etc/loki/local-config.yaml:ro
- loki-data:/loki
networks:
- monitoring
ports:
- "3100:3100"
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
promtail:
image: grafana/promtail:latest
container_name: promtail
restart: unless-stopped
command: -config.file=/etc/promtail/config.yml
volumes:
- ./monitoring/promtail-config.yml:/etc/promtail/config.yml:ro
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
networks:
- monitoring
depends_on:
- loki
deploy:
resources:
limits:
cpus: '0.2'
memory: 256M
networks:
monitoring:
driver: bridge
frontend:
driver: bridge
volumes:
prometheus-data:
grafana-data:
loki-data:
secrets:
grafana_password:
file: ./secrets/grafana_password.txt
Centralized logging setup:
version: '3.8'
services:
app:
image: myapp:latest
restart: unless-stopped
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
labels: "app,environment,version"
tag: "{{.Name}}/{{.ID}}"
labels:
app: "myapp"
environment: "production"
version: "${VERSION}"
nginx:
image: nginx:alpine
restart: unless-stopped
logging:
driver: "syslog"
options:
syslog-address: "tcp://logserver:514"
tag: "nginx"
syslog-format: "rfc5424micro"
api:
image: api:latest
restart: unless-stopped
logging:
driver: "fluentd"
options:
fluentd-address: "localhost:24224"
tag: "docker.{{.Name}}"
fluentd-async-connect: "true"
fluentd-retry-wait: "1s"
fluentd-max-retries: "30"
database:
image: postgres:15-alpine
restart: unless-stopped
logging:
driver: "json-file"
options:
max-size: "50m"
max-file: "10"
compress: "true"
volumes:
- postgres-data:/var/lib/postgresql/data
volumes:
postgres-data:
Multi-environment setup:
version: '3.8'
services:
app:
image: myapp:${VERSION:-latest}
restart: unless-stopped
environment:
NODE_ENV: ${NODE_ENV:-production}
LOG_LEVEL: ${LOG_LEVEL:-info}
PORT: ${APP_PORT:-3000}
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@database:5432/${DB_NAME}
REDIS_URL: redis://:${REDIS_PASSWORD}@cache:6379
JWT_SECRET: ${JWT_SECRET}
API_TIMEOUT: ${API_TIMEOUT:-30000}
MAX_CONNECTIONS: ${MAX_CONNECTIONS:-100}
env_file:
- .env.${ENVIRONMENT:-production}
- .env.secrets
networks:
- app-network
database:
image: postgres:${POSTGRES_VERSION:-15}-alpine
restart: unless-stopped
environment:
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_INITDB_ARGS: ${POSTGRES_INITDB_ARGS:--E UTF8}
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- app-network
cache:
image: redis:${REDIS_VERSION:-7}-alpine
restart: unless-stopped
command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory ${REDIS_MAX_MEMORY:-256mb}
volumes:
- redis-data:/data
networks:
- app-network
networks:
app-network:
driver: bridge
volumes:
postgres-data:
redis-data:
Comprehensive health monitoring:
version: '3.8'
services:
web:
image: nginx:alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
api:
image: node:18-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "node", "healthcheck.js"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
depends_on:
database:
condition: service_healthy
cache:
condition: service_healthy
database:
image: postgres:15-alpine
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres -d production_db || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
volumes:
- postgres-data:/var/lib/postgresql/data
cache:
image: redis:7-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
volumes:
- redis-data:/data
queue:
image: rabbitmq:3-management-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "rabbitmq-diagnostics", "ping"]
interval: 30s
timeout: 10s
retries: 5
start_period: 60s
volumes:
- rabbitmq-data:/var/lib/rabbitmq
volumes:
postgres-data:
redis-data:
rabbitmq-data:
Automated backup configuration:
version: '3.8'
services:
database:
image: postgres:15-alpine
restart: unless-stopped
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
secrets:
- db_password
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- backend
db-backup:
image: prodrigestivill/postgres-backup-local:15-alpine
restart: unless-stopped
environment:
POSTGRES_HOST: database
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
SCHEDULE: "@daily"
BACKUP_KEEP_DAYS: 7
BACKUP_KEEP_WEEKS: 4
BACKUP_KEEP_MONTHS: 6
BACKUP_DIR: /backups
HEALTHCHECK_PORT: 8080
secrets:
- db_password
volumes:
- ./backups:/backups
- ./backup-scripts:/scripts:ro
networks:
- backend
depends_on:
database:
condition: service_healthy
volume-backup:
image: futurice/docker-volume-backup:2.6.0
restart: unless-stopped
environment:
BACKUP_CRON_EXPRESSION: "0 2 * * *"
BACKUP_FILENAME: "backup-%Y-%m-%d_%H-%M-%S.tar.gz"
BACKUP_RETENTION_DAYS: 30
AWS_S3_BUCKET_NAME: ${S3_BACKUP_BUCKET}
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY_FILE: /run/secrets/aws_secret
secrets:
- aws_secret
volumes:
- postgres-data:/backup/postgres-data:ro
- redis-data:/backup/redis-data:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./backup-archive:/archive
networks:
backend:
driver: bridge
volumes:
postgres-data:
redis-data:
secrets:
db_password:
file: ./secrets/db_password.txt
aws_secret:
file: ./secrets/aws_secret.txt
Use docker-compose-production when you need to:
Always Use Version Pinning : Pin specific image versions instead of using latest to ensure reproducible deployments.
Implement Health Checks : Configure health checks for all services to enable automatic recovery and proper dependency management.
Set Resource Limits : Always define CPU and memory limits to prevent resource exhaustion and ensure predictable performance.
Use Secrets Management : Never store secrets in environment variables or compose files; use Docker secrets or external secret managers.
Configure Restart Policies : Use restart: unless-stopped for production services to ensure automatic recovery from failures.
Implement Proper Logging : Configure structured logging with rotation and retention policies to manage disk space.
Use Read-Only Filesystems : Set read_only: true where possible and use tmpfs for temporary data to improve security.
Drop Unnecessary Capabilities : Use cap_drop: ALL and only add required capabilities to follow the principle of least privilege.
Using Latest Tags : Using latest or unversioned images can cause unexpected behavior when images are updated; always pin versions.
Ignoring Resource Limits : Not setting resource limits can allow one service to consume all available resources and crash others.
Missing Health Checks : Without health checks, Docker cannot determine if services are actually ready or need to be restarted.
Storing Secrets in Plain Text : Committing secrets to version control or storing them in environment variables exposes sensitive data.
Not Testing Backups : Creating backups without regularly testing restoration procedures leads to data loss during actual incidents.
Exposing Unnecessary Ports : Publishing all service ports to the host increases attack surface; only expose what's needed.
Running as Root : Not specifying a non-root user leaves containers vulnerable to privilege escalation attacks.
Ignoring Log Rotation : Without log rotation, logs can fill up disk space and crash services or hosts.
Missing Monitoring : Deploying without monitoring makes it impossible to detect and diagnose issues before they impact users.
Not Using Networks : Running all services on the default network prevents proper segmentation and increases security risk.
Weekly Installs
97
Repository
GitHub Stars
128
First Seen
Jan 22, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykWarn
Installed on
opencode87
codex86
gemini-cli85
cursor85
github-copilot81
cline70
Azure 升级评估与自动化工具 - 轻松迁移 Functions 计划、托管层级和 SKU
104,900 周安装
Enable Monitoring : Deploy monitoring and observability tools to track application health and performance metrics.
Implement Automated Backups : Configure regular automated backups with retention policies and test recovery procedures.
Use Internal Networks : Mark backend networks as internal to prevent direct external access to databases and caches.
Configure Update Strategies : Define update and rollback configurations for zero-downtime deployments.
Implement Resource Reservations : Set resource reservations to guarantee minimum resources for critical services.
Use Multi-Stage Dependencies : Configure depends_on with health check conditions to ensure proper startup order.
Document Configuration : Maintain comprehensive documentation of your production configuration and deployment procedures.
Forgetting Readiness Checks : Starting dependent services before dependencies are ready causes connection failures and restarts.
Hardcoding Configuration : Embedding environment-specific values in the compose file makes it difficult to deploy to multiple environments.
Neglecting Security Updates : Not regularly updating base images leaves services vulnerable to known security issues.
Insufficient Start Period : Setting health check start periods too short causes false positives during slow application startup.
Not Planning for Scale : Designing services without considering horizontal scaling makes it difficult to handle increased load.