Files
honey-be/scripts/rolling-update.staged.sh

623 lines
23 KiB
Bash
Raw Permalink Normal View History

2026-03-07 23:10:41 +02:00
#!/bin/bash
# Rolling Update Deployment Script (staged / 8GB VPS)
# Same as rolling-update.sh but uses docker-compose.staged.yml (lower memory limits).
# This script performs zero-downtime deployment by:
# 1. Building new backend image
# 2. Starting new backend container on port 8082
# 3. Health checking the new container
# 4. Updating Nginx to point to new container
# 5. Reloading Nginx (zero downtime)
# 6. Stopping old container after grace period
set -euo pipefail
# Colors (define early for use in config detection)
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Logging functions (define early)
log() {
echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1" >&2
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
# Configuration (staged: use docker-compose.staged.yml)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
COMPOSE_FILE="${PROJECT_DIR}/docker-compose.staged.yml"
# Detect Nginx config file (try common locations)
# Priority: sites-enabled (what Nginx actually loads) > conf.d > custom paths
NGINX_CONF="${NGINX_CONF:-}"
if [ -z "$NGINX_CONF" ]; then
if [ -f "/etc/nginx/sites-enabled/testforapp.website" ]; then
NGINX_CONF="/etc/nginx/sites-enabled/testforapp.website"
log "Using Nginx config: $NGINX_CONF (sites-enabled - active config)"
elif [ -f "/etc/nginx/sites-enabled/testforapp.website.conf" ]; then
NGINX_CONF="/etc/nginx/sites-enabled/testforapp.website.conf"
log "Using Nginx config: $NGINX_CONF (sites-enabled - active config)"
elif [ -f "/etc/nginx/conf.d/honey.conf" ]; then
NGINX_CONF="/etc/nginx/conf.d/honey.conf"
log "Using Nginx config: $NGINX_CONF (conf.d)"
elif [ -f "/opt/app/nginx/testforapp.website.conf" ]; then
warn "Found config at /opt/app/nginx/testforapp.website.conf"
warn "Checking if it's symlinked to /etc/nginx/sites-enabled/..."
if [ -L "/etc/nginx/sites-enabled/testforapp.website" ] || [ -L "/etc/nginx/sites-enabled/testforapp.website.conf" ]; then
# Find the actual target
local target=$(readlink -f /etc/nginx/sites-enabled/testforapp.website 2>/dev/null || readlink -f /etc/nginx/sites-enabled/testforapp.website.conf 2>/dev/null)
if [ -n "$target" ]; then
NGINX_CONF="$target"
log "Using Nginx config: $NGINX_CONF (symlink target)"
else
NGINX_CONF="/opt/app/nginx/testforapp.website.conf"
warn "Using custom path - will update this file, but you may need to copy to sites-enabled"
fi
else
NGINX_CONF="/opt/app/nginx/testforapp.website.conf"
warn "Using custom path - will update this file, but you may need to copy to sites-enabled"
fi
else
error "Cannot find Nginx config file."
error "Searched:"
error " - /etc/nginx/sites-enabled/testforapp.website"
error " - /etc/nginx/sites-enabled/testforapp.website.conf"
error " - /etc/nginx/conf.d/honey.conf"
error " - /opt/app/nginx/testforapp.website.conf"
error ""
error "Please set NGINX_CONF environment variable with the correct path."
exit 1
fi
else
log "Using Nginx config: $NGINX_CONF (from NGINX_CONF environment variable)"
fi
# Create backup in /tmp to avoid nginx including it (sites-enabled/* includes all files)
NGINX_CONF_BACKUP="/tmp/nginx-backup-$(basename $NGINX_CONF).$(date +%Y%m%d_%H%M%S)"
# Ports for backends (will be swapped dynamically)
PRIMARY_PORT=8080
STANDBY_PORT=8082
# Detect which backend is currently active
detect_active_backend() {
# Check which port Nginx is currently using in upstream block
# Look for server line that is NOT marked as backup
local active_port_line=$(grep -A 10 "^upstream backend {" "$NGINX_CONF" | grep "server 127\.0\.0\.1:" | grep -v "backup" | head -1)
if echo "$active_port_line" | grep -q "127\.0\.0\.1:8082"; then
# Port 8082 is active (not backup)
ACTIVE_PORT=8082
STANDBY_PORT=8080
ACTIVE_CONTAINER="honey-backend-new"
STANDBY_CONTAINER="honey-backend"
log "Detected: Port 8082 is currently active"
else
# Port 8080 is active (default or only one present)
ACTIVE_PORT=8080
STANDBY_PORT=8082
ACTIVE_CONTAINER="honey-backend"
STANDBY_CONTAINER="honey-backend-new"
log "Detected: Port 8080 is currently active"
fi
PRIMARY_PORT=$ACTIVE_PORT
HEALTH_CHECK_URL="http://127.0.0.1:${STANDBY_PORT}/actuator/health/readiness"
}
HEALTH_CHECK_RETRIES=60 # Increased for Spring Boot startup (60 * 2s = 120s max)
HEALTH_CHECK_INTERVAL=2
GRACE_PERIOD=10
# Check for KEEP_FAILED_CONTAINER environment variable (preserve it for rollback)
# This allows keeping failed containers for debugging even when using sudo
if [ "${KEEP_FAILED_CONTAINER:-}" = "true" ]; then
SCRIPT_KEEP_FAILED_CONTAINER="true"
export SCRIPT_KEEP_FAILED_CONTAINER
log "KEEP_FAILED_CONTAINER=true - failed containers will be kept for debugging"
fi
# Detect docker compose command (newer Docker uses 'docker compose', older uses 'docker-compose')
DOCKER_COMPOSE_CMD=""
if docker compose version &> /dev/null; then
DOCKER_COMPOSE_CMD="docker compose"
elif command -v docker-compose &> /dev/null; then
DOCKER_COMPOSE_CMD="docker-compose"
else
error "Neither 'docker compose' nor 'docker-compose' is available"
exit 1
fi
# Check prerequisites
check_prerequisites() {
log "Checking prerequisites..."
# Check if running as root
if [ "$EUID" -ne 0 ]; then
error "This script must be run as root (or with sudo)"
exit 1
fi
# Check if docker compose is available (already detected above)
log "Using Docker Compose command: $DOCKER_COMPOSE_CMD"
log "Using compose file: $COMPOSE_FILE (staged)"
# Check if Nginx config exists
if [ ! -f "$NGINX_CONF" ]; then
error "Nginx config not found at $NGINX_CONF"
exit 1
fi
# Check if DB_ROOT_PASSWORD is set
if [ -z "${DB_ROOT_PASSWORD:-}" ]; then
warn "DB_ROOT_PASSWORD not set, attempting to load from secret file..."
if [ -f "${SCRIPT_DIR}/load-db-password.sh" ]; then
source "${SCRIPT_DIR}/load-db-password.sh"
else
error "Cannot load DB_ROOT_PASSWORD. Please set it or run: source scripts/load-db-password.sh"
exit 1
fi
fi
# Detect which backend is currently active
detect_active_backend
# Check if active backend is running
if ! docker ps --format '{{.Names}}' | grep -q "^${ACTIVE_CONTAINER}$"; then
error "Active backend container (${ACTIVE_CONTAINER}) is not running"
error "Please start it first: docker compose -f ${COMPOSE_FILE} up -d backend"
exit 1
fi
log "✅ Prerequisites check passed"
log "Active backend: ${ACTIVE_CONTAINER} on port ${ACTIVE_PORT}"
log "New backend will use: ${STANDBY_CONTAINER} on port ${STANDBY_PORT}"
}
# Build new backend image
build_new_image() {
log "Building new backend image..."
cd "$PROJECT_DIR"
# Determine which service to build based on which container will be used
# Both services use the same Dockerfile, but we need to build the correct one
# to ensure the image cache is updated for the service that will be started
if [ "$STANDBY_PORT" = "8082" ]; then
SERVICE_TO_BUILD="backend-new"
else
SERVICE_TO_BUILD="backend"
fi
log "Building service: ${SERVICE_TO_BUILD} (for port ${STANDBY_PORT})..."
# Build the image for the service that will be used
# This ensures the correct service's image cache is updated with latest migrations
if [ "$SERVICE_TO_BUILD" = "backend-new" ]; then
if $DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update build "$SERVICE_TO_BUILD" 2>&1 | tee /tmp/rolling-update-build.log; then
log "✅ New backend image built successfully"
else
error "Failed to build new backend image"
exit 1
fi
else
if $DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" build "$SERVICE_TO_BUILD" 2>&1 | tee /tmp/rolling-update-build.log; then
log "✅ New backend image built successfully"
else
error "Failed to build new backend image"
exit 1
fi
fi
}
# Start new backend container
start_new_container() {
log "Starting new backend container on port ${STANDBY_PORT}..."
cd "$PROJECT_DIR"
# Determine which service to start based on standby port
if [ "$STANDBY_PORT" = "8082" ]; then
SERVICE_NAME="backend-new"
CONTAINER_NAME="honey-backend-new"
else
SERVICE_NAME="backend"
CONTAINER_NAME="honey-backend"
fi
# Check if standby container exists (running or stopped)
# We need to remove it to ensure a fresh start with migrations
if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
if docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
warn "${CONTAINER_NAME} container is already running, stopping it first..."
else
warn "${CONTAINER_NAME} container exists but is stopped, removing it for fresh start..."
fi
if [ "$SERVICE_NAME" = "backend-new" ]; then
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update stop "$SERVICE_NAME" || true
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update rm -f "$SERVICE_NAME" || true
else
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" stop "$SERVICE_NAME" || true
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" rm -f "$SERVICE_NAME" || true
fi
fi
# Start the new container
if [ "$SERVICE_NAME" = "backend-new" ]; then
if $DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update up -d "$SERVICE_NAME"; then
log "✅ New backend container started"
else
error "Failed to start new backend container"
exit 1
fi
else
if $DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" up -d "$SERVICE_NAME"; then
log "✅ New backend container started"
else
error "Failed to start new backend container"
exit 1
fi
fi
# Wait for container to initialize (Spring Boot needs time to start)
log "Waiting for container to initialize (Spring Boot startup can take 60+ seconds)..."
sleep 10
# Check if container is still running (might have crashed)
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
error "Container ${CONTAINER_NAME} stopped immediately after start. Check logs:"
error " docker logs ${CONTAINER_NAME}"
exit 1
fi
}
# Health check new container
health_check_new_container() {
log "Performing health check on new backend container (port ${STANDBY_PORT})..."
# First, check if container is still running
if [ "$STANDBY_PORT" = "8082" ]; then
local container_name="honey-backend-new"
else
local container_name="honey-backend"
fi
if ! docker ps --format '{{.Names}}' | grep -q "^${container_name}$"; then
error "Container ${container_name} is not running!"
error "Check logs: docker logs ${container_name}"
return 1
fi
# Check container health status
local health_status=$(docker inspect --format='{{.State.Health.Status}}' "${container_name}" 2>/dev/null || echo "none")
if [ "$health_status" != "none" ]; then
info "Container health status: $health_status"
fi
local retries=0
while [ $retries -lt $HEALTH_CHECK_RETRIES ]; do
# Check if container is still running
if ! docker ps --format '{{.Names}}' | grep -q "^${container_name}$"; then
error "Container ${container_name} stopped during health check!"
error "Check logs: docker logs ${container_name}"
return 1
fi
# Try health check
if curl -sf "$HEALTH_CHECK_URL" > /dev/null 2>&1; then
log "✅ New backend container is healthy"
return 0
fi
retries=$((retries + 1))
if [ $retries -lt $HEALTH_CHECK_RETRIES ]; then
# Show container status every 5 attempts
if [ $((retries % 5)) -eq 0 ]; then
info "Health check failed (attempt $retries/$HEALTH_CHECK_RETRIES)"
info "Container status: $(docker ps --filter name=${container_name} --format '{{.Status}}')"
info "Last 5 log lines:"
docker logs --tail 5 "${container_name}" 2>&1 | sed 's/^/ /'
else
info "Health check failed (attempt $retries/$HEALTH_CHECK_RETRIES), retrying in ${HEALTH_CHECK_INTERVAL}s..."
fi
sleep $HEALTH_CHECK_INTERVAL
fi
done
error "Health check failed after $HEALTH_CHECK_RETRIES attempts"
error "New backend container is not responding at $HEALTH_CHECK_URL"
error ""
error "Container status:"
docker ps --filter name=${container_name} --format 'table {{.Names}}\t{{.Status}}\t{{.Ports}}' || true
error ""
error "Last 200 log lines:"
docker logs --tail 200 "${container_name}" 2>&1 | sed 's/^/ /'
error ""
error "To debug, keep container running and check:"
error " docker logs -f ${container_name}"
error " docker logs --tail 500 ${container_name} # For even more logs"
error " curl -v $HEALTH_CHECK_URL"
return 1
}
# Update Nginx configuration
update_nginx_config() {
log "Updating Nginx configuration to point to new backend (port ${STANDBY_PORT})..."
# Backup current config
cp "$NGINX_CONF" "$NGINX_CONF_BACKUP"
log "Backed up Nginx config to: $NGINX_CONF_BACKUP"
# Use Python for reliable config manipulation
# Pass variables directly to Python (not via sys.argv)
python3 << PYTHON_SCRIPT
import re
import sys
config_file = "$NGINX_CONF"
standby_port = "$STANDBY_PORT"
active_port = "$ACTIVE_PORT"
try:
# Read the entire file
with open(config_file, 'r') as f:
lines = f.readlines()
# Find and update upstream block
new_lines = []
in_upstream = False
upstream_start_idx = -1
upstream_end_idx = -1
keepalive_line = None
keepalive_idx = -1
# First pass: find upstream block boundaries
for i, line in enumerate(lines):
if re.match(r'^\s*upstream\s+backend\s*\{', line):
upstream_start_idx = i
in_upstream = True
elif in_upstream and re.match(r'^\s*\}', line):
upstream_end_idx = i
break
elif in_upstream and re.search(r'keepalive', line):
keepalive_line = line
keepalive_idx = i
if upstream_start_idx == -1 or upstream_end_idx == -1:
raise Exception("Could not find upstream backend block")
# Build new lines
for i, line in enumerate(lines):
if i < upstream_start_idx:
# Before upstream block - keep as is
new_lines.append(line)
elif i == upstream_start_idx:
# Start of upstream block
new_lines.append(line)
elif i > upstream_start_idx and i < upstream_end_idx:
# Inside upstream block
# Skip old server lines
if re.search(r'server\s+127\.0\.0\.1:808[02]', line):
continue
# Skip keepalive (we'll add it at the end)
if re.search(r'keepalive', line):
continue
# Keep comments and other lines
new_lines.append(line)
elif i == upstream_end_idx:
# Before closing brace - add server lines and keepalive
new_lines.append(f" server 127.0.0.1:{standby_port};\n")
new_lines.append(f" server 127.0.0.1:{active_port} backup;\n")
if keepalive_line:
new_lines.append(keepalive_line)
else:
new_lines.append(" keepalive 200;\n")
new_lines.append(line)
else:
# After upstream block - keep as is
new_lines.append(line)
# Write updated config
with open(config_file, 'w') as f:
f.writelines(new_lines)
print("Nginx config updated successfully")
except Exception as e:
print(f"Error updating Nginx config: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)
PYTHON_SCRIPT
if [ $? -ne 0 ]; then
error "Failed to update Nginx config"
cp "$NGINX_CONF_BACKUP" "$NGINX_CONF"
exit 1
fi
# Test Nginx configuration
if nginx -t; then
log "✅ Nginx configuration is valid"
else
error "Nginx configuration test failed, restoring backup..."
error "Error details:"
nginx -t 2>&1 | sed 's/^/ /'
error ""
error "Current config (first 50 lines):"
head -50 "$NGINX_CONF" | sed 's/^/ /'
cp "$NGINX_CONF_BACKUP" "$NGINX_CONF"
exit 1
fi
}
# Reload Nginx (zero downtime)
reload_nginx() {
log "Reloading Nginx (zero downtime)..."
if systemctl reload nginx; then
log "✅ Nginx reloaded successfully"
log "✅ Traffic is now being served by new backend (port 8082)"
else
error "Failed to reload Nginx, restoring backup config..."
cp "$NGINX_CONF_BACKUP" "$NGINX_CONF"
systemctl reload nginx
exit 1
fi
}
# Stop old container after grace period
stop_old_container() {
log "Waiting ${GRACE_PERIOD}s grace period for active connections to finish..."
sleep $GRACE_PERIOD
log "Stopping old backend container (${ACTIVE_CONTAINER})..."
cd "$PROJECT_DIR"
if [ "$ACTIVE_CONTAINER" = "honey-backend-new" ]; then
if $DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update stop backend-new; then
log "✅ Old backend container stopped"
else
warn "Failed to stop old backend container gracefully"
fi
else
if $DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" stop backend; then
log "✅ Old backend container stopped"
else
warn "Failed to stop old backend container gracefully"
fi
fi
}
# Rollback function
rollback() {
error "Rolling back to previous version..."
# Check KEEP_FAILED_CONTAINER (check both current env and script-level variable)
local keep_container="${KEEP_FAILED_CONTAINER:-false}"
if [ "$keep_container" != "true" ] && [ "${SCRIPT_KEEP_FAILED_CONTAINER:-false}" = "true" ]; then
keep_container="true"
fi
# Restore Nginx config
if [ -f "$NGINX_CONF_BACKUP" ]; then
cp "$NGINX_CONF_BACKUP" "$NGINX_CONF"
systemctl reload nginx
log "✅ Nginx config restored"
fi
# Stop new container (but keep it for debugging if KEEP_FAILED_CONTAINER is set)
cd "$PROJECT_DIR"
if [ "$keep_container" = "true" ]; then
warn ""
warn "═══════════════════════════════════════════════════════════════"
warn "KEEP_FAILED_CONTAINER=true - Container will be KEPT for debugging"
warn "═══════════════════════════════════════════════════════════════"
if [ "$STANDBY_PORT" = "8082" ]; then
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update stop backend-new || true
warn ""
warn "Container 'honey-backend-new' is STOPPED but NOT REMOVED"
warn ""
warn "To check logs:"
warn " docker logs honey-backend-new"
warn " docker logs --tail 100 honey-backend-new"
warn ""
warn "To remove manually:"
warn " $DOCKER_COMPOSE_CMD -f $COMPOSE_FILE --profile rolling-update rm -f backend-new"
else
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" stop backend || true
warn ""
warn "Container 'honey-backend' is STOPPED but NOT REMOVED"
warn ""
warn "To check logs:"
warn " docker logs honey-backend"
warn " docker logs --tail 100 honey-backend"
warn ""
warn "To remove manually:"
warn " $DOCKER_COMPOSE_CMD -f $COMPOSE_FILE rm -f backend"
fi
warn "═══════════════════════════════════════════════════════════════"
else
if [ "$STANDBY_PORT" = "8082" ]; then
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update stop backend-new || true
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update rm -f backend-new || true
else
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" stop backend || true
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" rm -f backend || true
fi
fi
# Start old container if it was stopped
if ! docker ps --format '{{.Names}}' | grep -q "^${ACTIVE_CONTAINER}$"; then
if [ "$ACTIVE_CONTAINER" = "honey-backend-new" ]; then
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update start backend-new || \
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" --profile rolling-update up -d backend-new
else
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" start backend || \
$DOCKER_COMPOSE_CMD -f "$COMPOSE_FILE" up -d backend
fi
fi
error "Rollback completed"
exit 1
}
# Main deployment flow
main() {
log "Starting rolling update deployment (staged)..."
# Trap errors for rollback
trap rollback ERR
check_prerequisites
build_new_image
start_new_container
if ! health_check_new_container; then
rollback
fi
update_nginx_config
reload_nginx
# Clear error trap after successful switch
trap - ERR
stop_old_container
log "✅ Rolling update completed successfully!"
log ""
log "Summary:"
log " - New backend is running on port ${STANDBY_PORT} (${STANDBY_CONTAINER})"
log " - Nginx is serving traffic from new backend"
log " - Old backend (${ACTIVE_CONTAINER}) has been stopped"
log ""
log "To rollback (if needed):"
log " 1. Restore Nginx config: cp $NGINX_CONF_BACKUP $NGINX_CONF"
log " 2. Reload Nginx: systemctl reload nginx"
if [ "$ACTIVE_CONTAINER" = "honey-backend-new" ]; then
log " 3. Start old backend: docker compose -f $COMPOSE_FILE --profile rolling-update start backend-new"
log " 4. Stop new backend: docker compose -f $COMPOSE_FILE stop backend"
else
log " 3. Start old backend: docker compose -f $COMPOSE_FILE start backend"
log " 4. Stop new backend: docker compose -f $COMPOSE_FILE --profile rolling-update stop backend-new"
fi
}
# Run main function
main "$@"