Deploying MCP Servers in Production
Complete guide to deploying MCP servers in production environments, covering process management, containerization, health checks, zero-downtime deployments, scaling strategies, reverse proxy configuration, and monitoring production MCP server infrastructure.
Deploying MCP Servers in Production
Overview
Building an MCP server is one thing. Keeping it running reliably at 3 AM when a critical workflow depends on it is another problem entirely. Production MCP deployments need process management, health monitoring, graceful shutdowns, and deployment strategies that do not drop active connections. I have deployed MCP servers that handled thousands of tool invocations per hour, and every failure taught me something about what production readiness actually requires.
Prerequisites
- Node.js 16 or later
@modelcontextprotocol/sdkpackage installed- Docker and Docker Compose installed
- PM2 process manager (
npm install -g pm2) - Nginx or similar reverse proxy
- Basic understanding of MCP transport options (stdio, SSE, WebSocket)
- Familiarity with Linux server administration
Process Management with PM2
Basic PM2 Configuration
Running an MCP server with node server.js in a terminal is fine for development. In production, you need a process manager that restarts on crash, manages logs, and handles signals properly.
// ecosystem.config.js
module.exports = {
apps: [{
name: "mcp-server",
script: "./server.js",
instances: 1,
exec_mode: "fork",
max_memory_restart: "512M",
env: {
NODE_ENV: "production",
MCP_PORT: 3100,
LOG_LEVEL: "info"
},
env_development: {
NODE_ENV: "development",
MCP_PORT: 3100,
LOG_LEVEL: "debug"
},
log_date_format: "YYYY-MM-DD HH:mm:ss Z",
error_file: "/var/log/mcp-server/error.log",
out_file: "/var/log/mcp-server/out.log",
merge_logs: true,
max_restarts: 10,
min_uptime: "10s",
restart_delay: 5000,
watch: false,
kill_timeout: 10000,
listen_timeout: 8000,
shutdown_with_message: true
}]
};
Graceful Shutdown Handling
MCP servers maintain stateful connections. A hard kill drops active tool invocations mid-execution, leaving clients with dangling requests. Graceful shutdown drains connections first.
// server.js
var http = require("http");
var McpServer = require("@modelcontextprotocol/sdk/server/mcp.js").McpServer;
var SSEServerTransport = require("@modelcontextprotocol/sdk/server/sse.js").SSEServerTransport;
var server = new McpServer({
name: "production-mcp",
version: "1.0.0"
});
var activeConnections = new Map();
var isShuttingDown = false;
var httpServer = null;
function startServer(port) {
var app = require("express")();
app.get("/health", function(req, res) {
if (isShuttingDown) {
res.status(503).json({ status: "shutting_down" });
return;
}
res.json({
status: "healthy",
connections: activeConnections.size,
uptime: process.uptime(),
memory: process.memoryUsage()
});
});
app.get("/sse", function(req, res) {
if (isShuttingDown) {
res.status(503).json({ error: "Server is shutting down" });
return;
}
var connectionId = Date.now().toString(36) + Math.random().toString(36).substr(2);
var transport = new SSEServerTransport("/messages", res);
activeConnections.set(connectionId, {
transport: transport,
connectedAt: new Date(),
lastActivity: new Date()
});
transport.onclose = function() {
activeConnections.delete(connectionId);
console.log("Connection closed: " + connectionId + ", remaining: " + activeConnections.size);
};
server.connect(transport);
console.log("New connection: " + connectionId + ", total: " + activeConnections.size);
});
app.post("/messages", function(req, res) {
// Handle incoming messages
});
httpServer = http.createServer(app);
httpServer.listen(port, function() {
console.log("MCP server listening on port " + port);
});
}
function gracefulShutdown(signal) {
console.log("Received " + signal + ". Starting graceful shutdown...");
isShuttingDown = true;
// Stop accepting new connections
if (httpServer) {
httpServer.close(function() {
console.log("HTTP server closed");
});
}
// Give active connections time to finish
var shutdownTimeout = 30000;
var checkInterval = 1000;
var elapsed = 0;
var shutdownCheck = setInterval(function() {
elapsed += checkInterval;
if (activeConnections.size === 0) {
clearInterval(shutdownCheck);
console.log("All connections drained. Exiting.");
process.exit(0);
}
if (elapsed >= shutdownTimeout) {
clearInterval(shutdownCheck);
console.log("Shutdown timeout reached. " + activeConnections.size + " connections remaining. Force closing.");
activeConnections.forEach(function(conn, id) {
try {
conn.transport.close();
} catch (e) {
console.error("Error closing connection " + id + ": " + e.message);
}
});
process.exit(1);
}
console.log("Waiting for " + activeConnections.size + " connections to drain... (" + (elapsed / 1000) + "s)");
}, checkInterval);
}
process.on("SIGTERM", function() { gracefulShutdown("SIGTERM"); });
process.on("SIGINT", function() { gracefulShutdown("SIGINT"); });
// PM2 graceful shutdown message
process.on("message", function(msg) {
if (msg === "shutdown") {
gracefulShutdown("PM2_SHUTDOWN");
}
});
startServer(process.env.MCP_PORT || 3100);
PM2 Commands for Operations
# Start the server
pm2 start ecosystem.config.js
# View logs in real-time
pm2 logs mcp-server --lines 100
# Monitor resources
pm2 monit
# Graceful reload (zero-downtime for cluster mode)
pm2 reload mcp-server
# Save current process list for startup persistence
pm2 save
# Generate startup script (runs on boot)
pm2 startup systemd
Containerization with Docker
Production Dockerfile
# Build stage
FROM node:20-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
# Production stage
FROM node:20-alpine
RUN addgroup -g 1001 -S mcpuser && \
adduser -S mcpuser -u 1001 -G mcpuser
WORKDIR /app
COPY --from=builder --chown=mcpuser:mcpuser /app/node_modules ./node_modules
COPY --chown=mcpuser:mcpuser . .
# Remove dev files
RUN rm -rf tests/ .env* .git/ docs/
USER mcpuser
EXPOSE 3100
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:3100/health || exit 1
ENV NODE_ENV=production
ENV MCP_PORT=3100
CMD ["node", "server.js"]
Docker Compose for Full Stack
# docker-compose.yml
version: "3.8"
services:
mcp-server:
build:
context: .
dockerfile: Dockerfile
ports:
- "3100:3100"
environment:
- NODE_ENV=production
- MCP_PORT=3100
- DATABASE_URL=postgresql://mcpuser:${DB_PASSWORD}@postgres:5432/mcpdata
- LOG_LEVEL=info
depends_on:
postgres:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
memory: 512M
cpus: "1.0"
reservations:
memory: 128M
cpus: "0.25"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 15s
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
postgres:
image: postgres:16-alpine
environment:
POSTGRES_USER: mcpuser
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_DB: mcpdata
volumes:
- pgdata:/var/lib/postgresql/data
- ./db/init.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U mcpuser"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
nginx:
image: nginx:alpine
ports:
- "443:443"
- "80:80"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
depends_on:
- mcp-server
restart: unless-stopped
volumes:
pgdata:
Reverse Proxy Configuration
Nginx for SSE and WebSocket
MCP servers using SSE or WebSocket transports need special proxy configuration. Standard proxy settings will buffer SSE events and break the real-time connection.
# nginx/nginx.conf
upstream mcp_backend {
server mcp-server:3100;
keepalive 32;
}
server {
listen 443 ssl http2;
server_name mcp.example.com;
ssl_certificate /etc/nginx/ssl/cert.pem;
ssl_certificate_key /etc/nginx/ssl/key.pem;
ssl_protocols TLSv1.2 TLSv1.3;
# SSE-specific settings
location /sse {
proxy_pass http://mcp_backend;
proxy_http_version 1.1;
# Critical for SSE: disable buffering
proxy_buffering off;
proxy_cache off;
# SSE connections are long-lived
proxy_read_timeout 86400s;
proxy_send_timeout 86400s;
# Pass through connection headers
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Disable chunked transfer encoding issues
chunked_transfer_encoding off;
}
# WebSocket upgrade
location /ws {
proxy_pass http://mcp_backend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_read_timeout 86400s;
}
# Regular HTTP endpoints (messages, health)
location / {
proxy_pass http://mcp_backend;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
}
}
server {
listen 80;
server_name mcp.example.com;
return 301 https://$host$request_uri;
}
Health Checks and Readiness Probes
Comprehensive Health Check Endpoint
A basic /health that returns 200 OK is insufficient. Production health checks need to verify downstream dependencies and report detailed status.
// health.js
var os = require("os");
function createHealthChecker(options) {
var checks = options.checks || {};
var startTime = Date.now();
function runCheck(name, checkFn) {
var start = Date.now();
return new Promise(function(resolve) {
try {
var result = checkFn();
if (result && typeof result.then === "function") {
result.then(function(data) {
resolve({
name: name,
status: "healthy",
duration: Date.now() - start,
details: data || null
});
}).catch(function(err) {
resolve({
name: name,
status: "unhealthy",
duration: Date.now() - start,
error: err.message
});
});
} else {
resolve({
name: name,
status: "healthy",
duration: Date.now() - start,
details: result || null
});
}
} catch (err) {
resolve({
name: name,
status: "unhealthy",
duration: Date.now() - start,
error: err.message
});
}
});
}
return {
check: function() {
var checkNames = Object.keys(checks);
var promises = checkNames.map(function(name) {
return runCheck(name, checks[name]);
});
return Promise.all(promises).then(function(results) {
var allHealthy = results.every(function(r) {
return r.status === "healthy";
});
return {
status: allHealthy ? "healthy" : "degraded",
version: process.env.APP_VERSION || "unknown",
uptime: Math.floor((Date.now() - startTime) / 1000),
timestamp: new Date().toISOString(),
system: {
memory: {
used: Math.round(process.memoryUsage().heapUsed / 1024 / 1024) + "MB",
total: Math.round(os.totalmem() / 1024 / 1024) + "MB",
free: Math.round(os.freemem() / 1024 / 1024) + "MB"
},
cpu: os.loadavg(),
pid: process.pid
},
checks: results
};
});
}
};
}
// Usage
var pool = require("./db");
var healthChecker = createHealthChecker({
checks: {
database: function() {
return pool.query("SELECT 1 AS result").then(function(res) {
return { connected: true, rows: res.rows.length };
});
},
disk: function() {
var fs = require("fs");
fs.accessSync("/tmp", fs.constants.W_OK);
return { writable: true };
},
memory: function() {
var usage = process.memoryUsage();
var heapPercent = (usage.heapUsed / usage.heapTotal) * 100;
if (heapPercent > 90) {
throw new Error("Heap usage at " + heapPercent.toFixed(1) + "%");
}
return { heapPercent: heapPercent.toFixed(1) + "%" };
}
}
});
module.exports = healthChecker;
Liveness vs Readiness
// routes/health-routes.js
var healthChecker = require("./health");
function registerHealthRoutes(app) {
// Liveness: is the process alive?
app.get("/healthz", function(req, res) {
res.json({ status: "alive", pid: process.pid });
});
// Readiness: can it serve traffic?
app.get("/readyz", function(req, res) {
healthChecker.check().then(function(report) {
var statusCode = report.status === "healthy" ? 200 : 503;
res.status(statusCode).json(report);
});
});
// Detailed health for monitoring dashboards
app.get("/health/detailed", function(req, res) {
healthChecker.check().then(function(report) {
res.json(report);
});
});
}
module.exports = registerHealthRoutes;
Zero-Downtime Deployment
Blue-Green with Docker Compose
#!/bin/bash
# deploy.sh - Blue-green deployment for MCP server
CURRENT=$(docker compose ps --format json | grep -o '"mcp-server-[a-z]*"' | head -1 | tr -d '"')
if [ "$CURRENT" = "mcp-server-blue" ]; then
NEW="green"
OLD="blue"
else
NEW="blue"
OLD="green"
fi
echo "Current: $OLD, Deploying: $NEW"
# Build and start new version
docker compose -f docker-compose.yml -f docker-compose.$NEW.yml build
docker compose -f docker-compose.yml -f docker-compose.$NEW.yml up -d mcp-server-$NEW
# Wait for new version to be healthy
echo "Waiting for $NEW to become healthy..."
for i in $(seq 1 30); do
STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:310${NEW_PORT}/health)
if [ "$STATUS" = "200" ]; then
echo "$NEW is healthy"
break
fi
if [ "$i" = "30" ]; then
echo "ERROR: $NEW failed health check. Rolling back."
docker compose -f docker-compose.$NEW.yml stop mcp-server-$NEW
exit 1
fi
sleep 2
done
# Switch nginx upstream
sed -i "s/mcp-server-$OLD/mcp-server-$NEW/g" /etc/nginx/conf.d/mcp-upstream.conf
nginx -s reload
# Drain old version
echo "Draining $OLD connections..."
sleep 30
# Stop old version
docker compose -f docker-compose.$OLD.yml stop mcp-server-$OLD
echo "Deployment complete: $NEW is live"
Rolling Updates with Docker Swarm
# docker-compose.swarm.yml
version: "3.8"
services:
mcp-server:
image: registry.example.com/mcp-server:${VERSION}
deploy:
replicas: 3
update_config:
parallelism: 1
delay: 30s
failure_action: rollback
monitor: 60s
max_failure_ratio: 0.3
order: start-first
rollback_config:
parallelism: 1
delay: 10s
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
resources:
limits:
memory: 512M
cpus: "1.0"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/readyz"]
interval: 15s
timeout: 5s
retries: 3
start_period: 20s
networks:
- mcp-net
networks:
mcp-net:
driver: overlay
Scaling MCP Servers
Horizontal Scaling Considerations
MCP servers maintain stateful SSE connections. Unlike stateless REST APIs, you cannot simply round-robin requests across instances. Each client connection is bound to a specific server instance.
// sticky-sessions.js
var cluster = require("cluster");
var os = require("os");
var net = require("net");
var farmhash = require("farmhash");
var numWorkers = process.env.MCP_WORKERS || Math.min(os.cpus().length, 4);
if (cluster.isMaster) {
console.log("Master process " + process.pid + " starting " + numWorkers + " workers");
var workers = [];
for (var i = 0; i < numWorkers; i++) {
workers.push(cluster.fork());
}
// Sticky session based on client IP
var server = net.createServer({ pauseOnConnect: true }, function(connection) {
var ip = connection.remoteAddress || "127.0.0.1";
var workerIndex = farmhash.fingerprint32(ip) % numWorkers;
workers[workerIndex].send("sticky-session:connection", connection);
});
server.listen(process.env.MCP_PORT || 3100, function() {
console.log("Sticky session proxy listening on port " + (process.env.MCP_PORT || 3100));
});
cluster.on("exit", function(worker, code, signal) {
console.log("Worker " + worker.process.pid + " died. Restarting...");
var idx = workers.indexOf(worker);
if (idx > -1) {
workers[idx] = cluster.fork();
}
});
} else {
var app = require("./app");
var http = require("http");
var httpServer = http.createServer(app);
process.on("message", function(message, connection) {
if (message === "sticky-session:connection") {
httpServer.emit("connection", connection);
connection.resume();
}
});
// Worker listens on random port (master proxies to it)
httpServer.listen(0, function() {
console.log("Worker " + process.pid + " started");
});
}
Connection-Aware Load Balancing
# Least connections strategy for MCP
upstream mcp_cluster {
least_conn;
server mcp-1:3100 max_fails=3 fail_timeout=30s;
server mcp-2:3100 max_fails=3 fail_timeout=30s;
server mcp-3:3100 max_fails=3 fail_timeout=30s;
keepalive 64;
}
# Sticky sessions using IP hash (important for SSE)
upstream mcp_sse {
ip_hash;
server mcp-1:3100;
server mcp-2:3100;
server mcp-3:3100;
}
server {
listen 443 ssl;
location /sse {
proxy_pass http://mcp_sse;
proxy_buffering off;
proxy_read_timeout 86400s;
proxy_http_version 1.1;
proxy_set_header Connection "";
}
location / {
proxy_pass http://mcp_cluster;
proxy_http_version 1.1;
proxy_set_header Connection "";
}
}
Environment Configuration
Production Environment Setup
# .env.production
NODE_ENV=production
MCP_PORT=3100
LOG_LEVEL=info
LOG_FORMAT=json
# Database
DATABASE_URL=postgresql://mcpuser:${DB_PASSWORD}@db-primary:5432/mcpdata
DATABASE_POOL_MIN=2
DATABASE_POOL_MAX=10
# Security
API_KEY_HASH_ROUNDS=12
JWT_SECRET=${JWT_SECRET}
CORS_ORIGINS=https://app.example.com,https://admin.example.com
# Performance
MAX_CONNECTIONS=100
REQUEST_TIMEOUT=30000
KEEP_ALIVE_TIMEOUT=65000
# Monitoring
METRICS_ENABLED=true
METRICS_PORT=9090
Configuration Validation on Startup
// config.js
function loadConfig() {
var required = [
"DATABASE_URL",
"JWT_SECRET"
];
var missing = required.filter(function(key) {
return !process.env[key];
});
if (missing.length > 0) {
console.error("Missing required environment variables: " + missing.join(", "));
process.exit(1);
}
var config = {
port: parseInt(process.env.MCP_PORT, 10) || 3100,
nodeEnv: process.env.NODE_ENV || "development",
logLevel: process.env.LOG_LEVEL || "info",
logFormat: process.env.LOG_FORMAT || "text",
database: {
url: process.env.DATABASE_URL,
poolMin: parseInt(process.env.DATABASE_POOL_MIN, 10) || 2,
poolMax: parseInt(process.env.DATABASE_POOL_MAX, 10) || 10
},
security: {
jwtSecret: process.env.JWT_SECRET,
corsOrigins: (process.env.CORS_ORIGINS || "").split(",").filter(Boolean)
},
limits: {
maxConnections: parseInt(process.env.MAX_CONNECTIONS, 10) || 100,
requestTimeout: parseInt(process.env.REQUEST_TIMEOUT, 10) || 30000,
keepAliveTimeout: parseInt(process.env.KEEP_ALIVE_TIMEOUT, 10) || 65000
},
metrics: {
enabled: process.env.METRICS_ENABLED === "true",
port: parseInt(process.env.METRICS_PORT, 10) || 9090
}
};
// Validate port ranges
if (config.port < 1 || config.port > 65535) {
console.error("Invalid MCP_PORT: " + config.port);
process.exit(1);
}
console.log("Configuration loaded for " + config.nodeEnv + " environment");
return config;
}
module.exports = loadConfig();
Complete Working Example
A production-ready MCP server deployment with Docker, health checks, graceful shutdown, and monitoring.
// server.js - Production MCP server
var http = require("http");
var express = require("express");
var McpServer = require("@modelcontextprotocol/sdk/server/mcp.js").McpServer;
var SSEServerTransport = require("@modelcontextprotocol/sdk/server/sse.js").SSEServerTransport;
var config = require("./config");
var healthChecker = require("./health");
var app = express();
var mcpServer = new McpServer({
name: "production-mcp",
version: process.env.APP_VERSION || "1.0.0"
});
var activeConnections = new Map();
var isShuttingDown = false;
var metrics = {
totalConnections: 0,
totalRequests: 0,
errors: 0,
startedAt: new Date()
};
// Register MCP tools
mcpServer.tool("echo", "Echo a message back", { message: { type: "string" } }, function(params) {
metrics.totalRequests++;
return { content: [{ type: "text", text: "Echo: " + params.message }] };
});
mcpServer.tool("server_status", "Get server status", {}, function() {
metrics.totalRequests++;
return {
content: [{
type: "text",
text: JSON.stringify({
connections: activeConnections.size,
totalConnections: metrics.totalConnections,
totalRequests: metrics.totalRequests,
errors: metrics.errors,
uptime: Math.floor((Date.now() - metrics.startedAt.getTime()) / 1000) + "s",
memory: Math.round(process.memoryUsage().heapUsed / 1024 / 1024) + "MB"
}, null, 2)
}]
};
});
// Middleware
app.use(function(req, res, next) {
if (isShuttingDown) {
res.status(503).json({ error: "Server is shutting down" });
return;
}
next();
});
// Connection limit enforcement
app.use("/sse", function(req, res, next) {
if (activeConnections.size >= config.limits.maxConnections) {
res.status(429).json({
error: "Connection limit reached",
limit: config.limits.maxConnections,
current: activeConnections.size
});
return;
}
next();
});
// Health endpoints
app.get("/healthz", function(req, res) {
res.json({ status: "alive" });
});
app.get("/readyz", function(req, res) {
healthChecker.check().then(function(report) {
res.status(report.status === "healthy" ? 200 : 503).json(report);
});
});
// Metrics endpoint (Prometheus format)
app.get("/metrics", function(req, res) {
if (!config.metrics.enabled) {
res.status(404).send("Metrics disabled");
return;
}
var memUsage = process.memoryUsage();
var lines = [
"# HELP mcp_active_connections Current active SSE connections",
"# TYPE mcp_active_connections gauge",
"mcp_active_connections " + activeConnections.size,
"",
"# HELP mcp_total_connections Total connections since start",
"# TYPE mcp_total_connections counter",
"mcp_total_connections " + metrics.totalConnections,
"",
"# HELP mcp_total_requests Total tool invocations",
"# TYPE mcp_total_requests counter",
"mcp_total_requests " + metrics.totalRequests,
"",
"# HELP mcp_errors_total Total errors",
"# TYPE mcp_errors_total counter",
"mcp_errors_total " + metrics.errors,
"",
"# HELP mcp_heap_used_bytes Heap memory used",
"# TYPE mcp_heap_used_bytes gauge",
"mcp_heap_used_bytes " + memUsage.heapUsed,
"",
"# HELP mcp_uptime_seconds Server uptime",
"# TYPE mcp_uptime_seconds gauge",
"mcp_uptime_seconds " + Math.floor(process.uptime())
];
res.set("Content-Type", "text/plain; version=0.0.4");
res.send(lines.join("\n") + "\n");
});
// SSE transport
app.get("/sse", function(req, res) {
var connectionId = Date.now().toString(36) + Math.random().toString(36).substr(2);
var transport = new SSEServerTransport("/messages", res);
activeConnections.set(connectionId, {
transport: transport,
connectedAt: new Date(),
remoteAddress: req.ip
});
metrics.totalConnections++;
console.log(JSON.stringify({
event: "connection_opened",
connectionId: connectionId,
remoteAddress: req.ip,
activeConnections: activeConnections.size
}));
transport.onclose = function() {
activeConnections.delete(connectionId);
console.log(JSON.stringify({
event: "connection_closed",
connectionId: connectionId,
activeConnections: activeConnections.size
}));
};
mcpServer.connect(transport);
});
app.post("/messages", express.json(), function(req, res) {
// Route messages to appropriate transport
});
// Error handling
app.use(function(err, req, res, next) {
metrics.errors++;
console.error(JSON.stringify({
event: "error",
message: err.message,
stack: err.stack,
path: req.path
}));
res.status(500).json({ error: "Internal server error" });
});
// Start server
var httpServer = http.createServer(app);
httpServer.keepAliveTimeout = config.limits.keepAliveTimeout;
httpServer.headersTimeout = config.limits.keepAliveTimeout + 5000;
httpServer.listen(config.port, function() {
console.log(JSON.stringify({
event: "server_started",
port: config.port,
env: config.nodeEnv,
pid: process.pid,
version: process.env.APP_VERSION || "1.0.0"
}));
});
// Graceful shutdown
function shutdown(signal) {
console.log(JSON.stringify({ event: "shutdown_initiated", signal: signal }));
isShuttingDown = true;
httpServer.close(function() {
console.log(JSON.stringify({ event: "http_server_closed" }));
});
var timeout = setTimeout(function() {
console.log(JSON.stringify({
event: "shutdown_timeout",
remainingConnections: activeConnections.size
}));
process.exit(1);
}, 30000);
var drainCheck = setInterval(function() {
if (activeConnections.size === 0) {
clearInterval(drainCheck);
clearTimeout(timeout);
console.log(JSON.stringify({ event: "shutdown_complete" }));
process.exit(0);
}
}, 1000);
}
process.on("SIGTERM", function() { shutdown("SIGTERM"); });
process.on("SIGINT", function() { shutdown("SIGINT"); });
process.on("message", function(msg) {
if (msg === "shutdown") { shutdown("PM2"); }
});
# Dockerfile
FROM node:20-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
FROM node:20-alpine
RUN addgroup -g 1001 -S mcp && adduser -S mcp -u 1001 -G mcp
WORKDIR /app
COPY --from=builder --chown=mcp:mcp /app/node_modules ./node_modules
COPY --chown=mcp:mcp . .
RUN rm -rf tests/ .env* .git/
USER mcp
EXPOSE 3100
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
CMD wget --spider -q http://localhost:3100/healthz || exit 1
ENV NODE_ENV=production
CMD ["node", "server.js"]
# Deploy
docker build -t mcp-server:v1.2.0 .
docker compose up -d
docker compose ps
curl -s http://localhost:3100/readyz | jq .
Output:
{
"status": "healthy",
"version": "1.2.0",
"uptime": 42,
"timestamp": "2026-02-10T15:30:00.000Z",
"system": {
"memory": {
"used": "45MB",
"total": "512MB",
"free": "234MB"
},
"cpu": [0.12, 0.08, 0.05],
"pid": 1
},
"checks": [
{ "name": "database", "status": "healthy", "duration": 3, "details": { "connected": true } },
{ "name": "disk", "status": "healthy", "duration": 0, "details": { "writable": true } },
{ "name": "memory", "status": "healthy", "duration": 0, "details": { "heapPercent": "34.2%" } }
]
}
Common Issues and Troubleshooting
SSE Connections Dropping Behind Nginx
Error: Client disconnected after 60 seconds
Nginx's default proxy_read_timeout is 60 seconds. SSE connections are long-lived and need extended timeouts.
# Fix: Set long timeout for SSE endpoint
location /sse {
proxy_read_timeout 86400s; # 24 hours
proxy_buffering off;
proxy_cache off;
}
Container Running Out of Memory
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
Node.js defaults to ~1.5GB heap. Containers with lower memory limits need explicit heap configuration.
# Fix: Set heap size to 75% of container memory limit
ENV NODE_OPTIONS="--max-old-space-size=384"
Health Check Failing During Startup
unhealthy: retries exceeded (3/3)
The database connection pool takes time to initialize. The health check runs before the pool is ready.
# Fix: Increase start_period
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/healthz"]
interval: 15s
timeout: 5s
retries: 5
start_period: 30s # Give the app time to connect to dependencies
PM2 Not Forwarding SIGTERM Properly
Error: Worker process killed without graceful shutdown
PM2 sends a shutdown message before SIGTERM. If you only listen for SIGTERM, you miss the graceful window.
// Fix: Handle both PM2 message and SIGTERM
process.on("message", function(msg) {
if (msg === "shutdown") { gracefulShutdown("PM2"); }
});
process.on("SIGTERM", function() { gracefulShutdown("SIGTERM"); });
Docker Compose Service Ordering Race Condition
Error: connect ECONNREFUSED 172.18.0.2:5432
depends_on only waits for the container to start, not for the service inside to be ready.
# Fix: Use condition with healthcheck
depends_on:
postgres:
condition: service_healthy
Best Practices
- Always implement graceful shutdown — drain active connections before stopping the process. A 30-second drain window covers most in-flight tool invocations.
- Use structured JSON logging in production — text logs are human-friendly but impossible to query at scale. JSON logs integrate with ELK, Datadog, and CloudWatch.
- Set explicit memory limits — both at the container level (Docker
--memory) and Node.js level (--max-old-space-size). Set Node heap to 75% of container memory to leave room for native allocations. - Separate liveness from readiness probes — liveness (
/healthz) checks if the process is alive. Readiness (/readyz) checks if it can serve traffic. Conflating them causes unnecessary restarts when a dependency is temporarily down. - Pin your base Docker images — use
node:20.11-alpinenotnode:20-alpine. A minor Node.js update breaking your server in production is a bad way to start the week. - Never store secrets in Docker images — use environment variables, Docker secrets, or a vault. Secrets baked into images end up in registries and CI logs.
- Test your shutdown path — send SIGTERM to your running server and verify connections drain properly. Most teams never test this until a deployment drops requests in production.
- Use connection-aware load balancing for SSE — round-robin breaks SSE connections. Use
ip_hashor sticky sessions to keep clients connected to the same backend instance.