Testing in Production: Strategies and Safety
A practical guide to testing in production safely using feature flags, canary deployments, observability, synthetic monitoring, and traffic shadowing for Node.js applications.
Testing in Production: Strategies and Safety
No staging environment perfectly mirrors production. Different data volumes, different traffic patterns, different network conditions, different third-party service behavior. The bugs that survive your test suite and staging environment are, by definition, bugs that only appear in production.
Testing in production is not "pushing untested code and hoping." It is a set of disciplined practices — feature flags, canary deployments, synthetic monitoring, and traffic shadowing — that let you validate code in the real environment while controlling blast radius. This guide covers each technique with practical implementations for Node.js applications.
Prerequisites
- A deployed Node.js application
- Basic understanding of deployment workflows
- Monitoring or logging in place
Feature Flags
Feature flags let you deploy code to production without activating it for all users. You ship the code, then gradually enable it.
Simple Feature Flag Implementation
// featureFlags.js
var flags = {};
function loadFlags() {
// In production, load from a database, config service, or environment
flags = {
newSearchAlgorithm: {
enabled: false,
percentage: 0,
allowedUsers: []
},
redesignedCheckout: {
enabled: true,
percentage: 25,
allowedUsers: ["user-123", "user-456"]
},
betaAnalytics: {
enabled: true,
percentage: 100,
allowedUsers: []
}
};
}
function isEnabled(flagName, userId) {
var flag = flags[flagName];
if (!flag || !flag.enabled) return false;
// Check if user is explicitly allowed
if (userId && flag.allowedUsers.indexOf(userId) !== -1) {
return true;
}
// Percentage rollout — deterministic based on userId
if (flag.percentage >= 100) return true;
if (flag.percentage <= 0) return false;
if (userId) {
var hash = simpleHash(flagName + userId);
return (hash % 100) < flag.percentage;
}
return false;
}
function simpleHash(str) {
var hash = 0;
for (var i = 0; i < str.length; i++) {
var char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash);
}
// Load flags on startup
loadFlags();
module.exports = { isEnabled: isEnabled, loadFlags: loadFlags };
Using Feature Flags in Routes
var featureFlags = require("./featureFlags");
app.get("/api/search", function(req, res) {
var userId = req.user ? req.user.id : null;
var results;
if (featureFlags.isEnabled("newSearchAlgorithm", userId)) {
results = newSearch.query(req.query.q);
// Log that the new algorithm was used
logger.info("search_algorithm", { version: "v2", userId: userId, query: req.query.q });
} else {
results = currentSearch.query(req.query.q);
logger.info("search_algorithm", { version: "v1", userId: userId, query: req.query.q });
}
res.json(results);
});
Gradual Rollout Pattern
// Roll out new feature over several days
// Day 1: Internal users only
featureFlags.update("newCheckout", { enabled: true, percentage: 0, allowedUsers: ["internal-team"] });
// Day 2: 5% of users
featureFlags.update("newCheckout", { percentage: 5 });
// Day 3: Check metrics — if error rate is normal, increase
featureFlags.update("newCheckout", { percentage: 25 });
// Day 4: Full rollout
featureFlags.update("newCheckout", { percentage: 100 });
// Day 7: Remove the flag and old code path
Canary Deployments
Deploy the new version to a small subset of servers. If metrics are healthy, expand. If not, roll back.
Express Middleware for Canary Routing
// canary.js
function canaryMiddleware(options) {
var canaryPercentage = options.percentage || 10;
var canaryHeader = options.header || "X-Canary";
return function(req, res, next) {
// Check if request should go to canary
var isCanary = false;
// Explicit canary header (for testing)
if (req.headers[canaryHeader.toLowerCase()] === "true") {
isCanary = true;
}
// Percentage-based routing
if (!isCanary && Math.random() * 100 < canaryPercentage) {
isCanary = true;
}
// Sticky sessions — same user always gets same version
if (req.cookies && req.cookies.canary !== undefined) {
isCanary = req.cookies.canary === "true";
} else if (req.user) {
// Set a cookie for session stickiness
var sticky = simpleHash(req.user.id) % 100 < canaryPercentage;
res.cookie("canary", String(sticky), { maxAge: 3600000 });
isCanary = sticky;
}
req.isCanary = isCanary;
res.setHeader("X-Served-By", isCanary ? "canary" : "stable");
next();
};
}
module.exports = canaryMiddleware;
Monitoring Canary Health
// canaryMonitor.js
var metrics = {
canary: { requests: 0, errors: 0, totalLatency: 0 },
stable: { requests: 0, errors: 0, totalLatency: 0 }
};
function recordRequest(isCanary, statusCode, latencyMs) {
var bucket = isCanary ? metrics.canary : metrics.stable;
bucket.requests++;
bucket.totalLatency += latencyMs;
if (statusCode >= 500) {
bucket.errors++;
}
}
function getHealth() {
var canaryErrorRate = metrics.canary.requests > 0
? metrics.canary.errors / metrics.canary.requests
: 0;
var stableErrorRate = metrics.stable.requests > 0
? metrics.stable.errors / metrics.stable.requests
: 0;
var canaryAvgLatency = metrics.canary.requests > 0
? metrics.canary.totalLatency / metrics.canary.requests
: 0;
var stableAvgLatency = metrics.stable.requests > 0
? metrics.stable.totalLatency / metrics.stable.requests
: 0;
return {
canary: {
errorRate: canaryErrorRate,
avgLatency: Math.round(canaryAvgLatency),
requests: metrics.canary.requests
},
stable: {
errorRate: stableErrorRate,
avgLatency: Math.round(stableAvgLatency),
requests: metrics.stable.requests
},
healthy: canaryErrorRate <= stableErrorRate * 1.5 &&
canaryAvgLatency <= stableAvgLatency * 1.5
};
}
function reset() {
metrics.canary = { requests: 0, errors: 0, totalLatency: 0 };
metrics.stable = { requests: 0, errors: 0, totalLatency: 0 };
}
module.exports = { recordRequest: recordRequest, getHealth: getHealth, reset: reset };
Synthetic Monitoring
Automated tests that run continuously against production, verifying critical user flows work.
Simple Synthetic Monitor
// synthetic-monitor.js
var http = require("http");
var https = require("https");
function SyntheticMonitor(baseUrl) {
this.baseUrl = baseUrl;
this.checks = [];
this.results = [];
}
SyntheticMonitor.prototype.addCheck = function(name, options) {
this.checks.push({
name: name,
path: options.path,
method: options.method || "GET",
expectedStatus: options.expectedStatus || 200,
bodyCheck: options.bodyCheck || null,
timeout: options.timeout || 5000
});
};
SyntheticMonitor.prototype.runCheck = function(check, callback) {
var startTime = Date.now();
var isHttps = this.baseUrl.indexOf("https") === 0;
var client = isHttps ? https : http;
var req = client.get(this.baseUrl + check.path, function(res) {
var data = "";
res.on("data", function(chunk) { data += chunk; });
res.on("end", function() {
var elapsed = Date.now() - startTime;
var result = {
name: check.name,
status: "pass",
statusCode: res.statusCode,
latency: elapsed,
timestamp: new Date().toISOString()
};
if (res.statusCode !== check.expectedStatus) {
result.status = "fail";
result.error = "Expected status " + check.expectedStatus + ", got " + res.statusCode;
}
if (result.status === "pass" && check.bodyCheck) {
try {
var body = JSON.parse(data);
if (!check.bodyCheck(body)) {
result.status = "fail";
result.error = "Body check failed";
}
} catch (e) {
result.status = "fail";
result.error = "Failed to parse response body";
}
}
callback(null, result);
});
});
req.on("error", function(err) {
callback(null, {
name: check.name,
status: "fail",
error: err.message,
latency: Date.now() - startTime,
timestamp: new Date().toISOString()
});
});
req.setTimeout(check.timeout, function() {
req.destroy();
callback(null, {
name: check.name,
status: "fail",
error: "Timeout after " + check.timeout + "ms",
latency: check.timeout,
timestamp: new Date().toISOString()
});
});
};
SyntheticMonitor.prototype.runAll = function(callback) {
var self = this;
var results = [];
var completed = 0;
self.checks.forEach(function(check) {
self.runCheck(check, function(err, result) {
results.push(result);
completed++;
if (completed === self.checks.length) {
callback(null, results);
}
});
});
};
module.exports = SyntheticMonitor;
Running Synthetic Checks
var SyntheticMonitor = require("./synthetic-monitor");
var monitor = new SyntheticMonitor("https://myapp.example.com");
// Define critical user flows
monitor.addCheck("Homepage loads", {
path: "/",
expectedStatus: 200
});
monitor.addCheck("API health check", {
path: "/api/health",
expectedStatus: 200,
bodyCheck: function(body) {
return body.status === "ok" && body.database === "connected";
}
});
monitor.addCheck("Articles API responds", {
path: "/api/articles?limit=5",
expectedStatus: 200,
bodyCheck: function(body) {
return Array.isArray(body) && body.length > 0;
}
});
monitor.addCheck("Search works", {
path: "/api/search?q=javascript",
expectedStatus: 200,
timeout: 10000
});
// Run every 60 seconds
setInterval(function() {
monitor.runAll(function(err, results) {
var failures = results.filter(function(r) { return r.status === "fail"; });
if (failures.length > 0) {
console.error("SYNTHETIC CHECK FAILURES:");
failures.forEach(function(f) {
console.error(" " + f.name + ": " + f.error);
});
// Alert the team (email, Slack, PagerDuty)
alertTeam(failures);
} else {
console.log("All " + results.length + " synthetic checks passed");
}
});
}, 60000);
Traffic Shadowing
Copy production traffic to a test environment without affecting users. The shadow receives the same requests but its responses are discarded.
// trafficShadow.js
var http = require("http");
function shadowTraffic(options) {
var shadowHost = options.host;
var shadowPort = options.port;
var sampleRate = options.sampleRate || 0.1; // Shadow 10% of traffic
return function(req, res, next) {
// Only shadow a percentage of traffic
if (Math.random() > sampleRate) {
return next();
}
// Clone the request to the shadow service
var shadowReq = http.request({
hostname: shadowHost,
port: shadowPort,
path: req.originalUrl,
method: req.method,
headers: Object.assign({}, req.headers, { "X-Shadow": "true" })
});
// Do not wait for the shadow response
shadowReq.on("error", function(err) {
// Log but do not affect the real request
console.warn("Shadow request failed:", err.message);
});
// Forward the body if present
if (req.body) {
shadowReq.write(JSON.stringify(req.body));
}
shadowReq.end();
// Continue with the real request immediately
next();
};
}
module.exports = shadowTraffic;
// Usage in Express
var shadowTraffic = require("./trafficShadow");
if (process.env.SHADOW_HOST) {
app.use(shadowTraffic({
host: process.env.SHADOW_HOST,
port: process.env.SHADOW_PORT || 3001,
sampleRate: 0.05 // Shadow 5% of traffic
}));
}
Observability for Production Testing
Structured Logging
// logger.js
function createLogger(service) {
return {
info: function(event, data) {
console.log(JSON.stringify({
level: "info",
service: service,
event: event,
data: data,
timestamp: new Date().toISOString()
}));
},
error: function(event, data) {
console.error(JSON.stringify({
level: "error",
service: service,
event: event,
data: data,
timestamp: new Date().toISOString()
}));
},
metric: function(name, value, tags) {
console.log(JSON.stringify({
level: "metric",
name: name,
value: value,
tags: tags,
timestamp: new Date().toISOString()
}));
}
};
}
module.exports = createLogger;
Health Check Endpoint
// healthCheck.js
var db = require("./db");
function createHealthCheck(app) {
app.get("/api/health", function(req, res) {
var checks = {};
var healthy = true;
// Check database
db.query("SELECT 1")
.then(function() {
checks.database = "connected";
})
.catch(function(err) {
checks.database = "error: " + err.message;
healthy = false;
})
.then(function() {
// Check memory
var mem = process.memoryUsage();
var heapUsedMB = Math.round(mem.heapUsed / 1024 / 1024);
checks.memory = heapUsedMB + "MB";
if (heapUsedMB > 500) {
checks.memoryWarning = "High memory usage";
}
// Check uptime
checks.uptime = Math.round(process.uptime()) + "s";
res.status(healthy ? 200 : 503).json({
status: healthy ? "ok" : "degraded",
checks: checks,
version: process.env.APP_VERSION || "unknown",
timestamp: new Date().toISOString()
});
});
});
}
module.exports = createHealthCheck;
Rollback Strategies
Instant Rollback with Feature Flags
// If the new feature causes problems, disable it instantly
featureFlags.update("newCheckout", { enabled: false });
// No deployment needed — the flag change takes effect immediately
Automated Rollback Based on Metrics
// autoRollback.js
function AutoRollback(options) {
this.errorThreshold = options.errorThreshold || 0.05; // 5% error rate
this.latencyThreshold = options.latencyThreshold || 2000; // 2 seconds
this.checkInterval = options.checkInterval || 30000; // 30 seconds
this.onRollback = options.onRollback;
this.metrics = { errors: 0, requests: 0, totalLatency: 0 };
}
AutoRollback.prototype.record = function(statusCode, latencyMs) {
this.metrics.requests++;
this.metrics.totalLatency += latencyMs;
if (statusCode >= 500) this.metrics.errors++;
};
AutoRollback.prototype.start = function() {
var self = this;
self.timer = setInterval(function() {
if (self.metrics.requests < 10) return; // Not enough data
var errorRate = self.metrics.errors / self.metrics.requests;
var avgLatency = self.metrics.totalLatency / self.metrics.requests;
if (errorRate > self.errorThreshold) {
console.error("ROLLBACK: Error rate " + (errorRate * 100).toFixed(1) + "% exceeds threshold");
self.onRollback("error_rate", errorRate);
self.stop();
}
if (avgLatency > self.latencyThreshold) {
console.error("ROLLBACK: Avg latency " + avgLatency + "ms exceeds threshold");
self.onRollback("latency", avgLatency);
self.stop();
}
// Reset metrics for next interval
self.metrics = { errors: 0, requests: 0, totalLatency: 0 };
}, self.checkInterval);
};
AutoRollback.prototype.stop = function() {
clearInterval(self.timer);
};
module.exports = AutoRollback;
Common Issues and Troubleshooting
Feature flag state is inconsistent across servers
Each server has a local copy of flags that can be out of sync:
Fix: Store feature flags in a shared data store (Redis, database, or feature flag service). Poll for updates on a short interval (10-30 seconds). Use a feature flag service like LaunchDarkly or Unleash for production systems.
Canary deployment shows good metrics but breaks for specific users
The canary sample is too small or unrepresentative:
Fix: Ensure canary traffic includes a representative mix of user types, geographies, and device types. Use sticky sessions so individual users get a consistent experience. Monitor per-user-segment metrics, not just aggregates.
Synthetic monitors create false alerts
External network issues or rate limiting trigger alerts for healthy services:
Fix: Run synthetic monitors from multiple locations and require failures from at least two before alerting. Add retry logic for transient network errors. Exclude known maintenance windows from alerting.
Traffic shadowing affects production performance
Shadow requests consume production server resources:
Fix: Shadow to a separate service, not back to production. Keep sample rates low (1-5%). Make shadow requests fire-and-forget — do not wait for responses. Monitor production performance before and after enabling shadowing.
Best Practices
- Start with feature flags. They are the simplest production testing technique and provide instant rollback capability. Every new feature should be behind a flag.
- Monitor before, during, and after changes. Establish baseline metrics before deploying. Watch metrics during rollout. Continue monitoring for days after reaching 100%.
- Keep blast radius small. Start with 1% of users, then 5%, then 25%, then 100%. If something breaks at 5%, 95% of your users are unaffected.
- Automate rollback criteria. Define what "unhealthy" means in code: error rate above 5%, latency above 2 seconds, specific error codes appearing. Automate the rollback trigger.
- Run synthetic monitors continuously. They catch issues that individual user reports take hours to surface. A synthetic check that runs every minute finds outages in minutes, not hours.
- Clean up feature flags. Old flags that are 100% enabled or 0% enabled are dead code. Remove them within a sprint of reaching 100% rollout.
- Log everything about production tests. Which flag was active, which version served the request, which canary group the user was in. This data is essential for debugging issues.
- Test rollback before you need it. Practice rolling back a feature flag or reverting a canary during normal operations. The first time you rollback should not be during an incident.