Testing in Production: Strategies and Safety

Shane

2/14/2026

12 min read

A practical guide to testing in production safely using feature flags, canary deployments, observability, synthetic monitoring, and traffic shadowing for Node.js applications.

testing production monitoring canary deployment observability feature flags

Testing in Production: Strategies and Safety

No staging environment perfectly mirrors production. Different data volumes, different traffic patterns, different network conditions, different third-party service behavior. The bugs that survive your test suite and staging environment are, by definition, bugs that only appear in production.

Testing in production is not "pushing untested code and hoping." It is a set of disciplined practices — feature flags, canary deployments, synthetic monitoring, and traffic shadowing — that let you validate code in the real environment while controlling blast radius. This guide covers each technique with practical implementations for Node.js applications.

Prerequisites

A deployed Node.js application
Basic understanding of deployment workflows
Monitoring or logging in place

Feature Flags

Feature flags let you deploy code to production without activating it for all users. You ship the code, then gradually enable it.

Simple Feature Flag Implementation

// featureFlags.js
var flags = {};

function loadFlags() {
  // In production, load from a database, config service, or environment
  flags = {
    newSearchAlgorithm: {
      enabled: false,
      percentage: 0,
      allowedUsers: []
    },
    redesignedCheckout: {
      enabled: true,
      percentage: 25,
      allowedUsers: ["user-123", "user-456"]
    },
    betaAnalytics: {
      enabled: true,
      percentage: 100,
      allowedUsers: []
    }
  };
}

function isEnabled(flagName, userId) {
  var flag = flags[flagName];
  if (!flag || !flag.enabled) return false;

  // Check if user is explicitly allowed
  if (userId && flag.allowedUsers.indexOf(userId) !== -1) {
    return true;
  }

  // Percentage rollout — deterministic based on userId
  if (flag.percentage >= 100) return true;
  if (flag.percentage <= 0) return false;

  if (userId) {
    var hash = simpleHash(flagName + userId);
    return (hash % 100) < flag.percentage;
  }

  return false;
}

function simpleHash(str) {
  var hash = 0;
  for (var i = 0; i < str.length; i++) {
    var char = str.charCodeAt(i);
    hash = ((hash << 5) - hash) + char;
    hash = hash & hash; // Convert to 32-bit integer
  }
  return Math.abs(hash);
}

// Load flags on startup
loadFlags();

module.exports = { isEnabled: isEnabled, loadFlags: loadFlags };

Using Feature Flags in Routes

var featureFlags = require("./featureFlags");

app.get("/api/search", function(req, res) {
  var userId = req.user ? req.user.id : null;
  var results;

  if (featureFlags.isEnabled("newSearchAlgorithm", userId)) {
    results = newSearch.query(req.query.q);
    // Log that the new algorithm was used
    logger.info("search_algorithm", { version: "v2", userId: userId, query: req.query.q });
  } else {
    results = currentSearch.query(req.query.q);
    logger.info("search_algorithm", { version: "v1", userId: userId, query: req.query.q });
  }

  res.json(results);
});

Gradual Rollout Pattern

// Roll out new feature over several days
// Day 1: Internal users only
featureFlags.update("newCheckout", { enabled: true, percentage: 0, allowedUsers: ["internal-team"] });

// Day 2: 5% of users
featureFlags.update("newCheckout", { percentage: 5 });

// Day 3: Check metrics — if error rate is normal, increase
featureFlags.update("newCheckout", { percentage: 25 });

// Day 4: Full rollout
featureFlags.update("newCheckout", { percentage: 100 });

// Day 7: Remove the flag and old code path

Canary Deployments

Deploy the new version to a small subset of servers. If metrics are healthy, expand. If not, roll back.

Express Middleware for Canary Routing

// canary.js
function canaryMiddleware(options) {
  var canaryPercentage = options.percentage || 10;
  var canaryHeader = options.header || "X-Canary";

  return function(req, res, next) {
    // Check if request should go to canary
    var isCanary = false;

    // Explicit canary header (for testing)
    if (req.headers[canaryHeader.toLowerCase()] === "true") {
      isCanary = true;
    }

    // Percentage-based routing
    if (!isCanary && Math.random() * 100 < canaryPercentage) {
      isCanary = true;
    }

    // Sticky sessions — same user always gets same version
    if (req.cookies && req.cookies.canary !== undefined) {
      isCanary = req.cookies.canary === "true";
    } else if (req.user) {
      // Set a cookie for session stickiness
      var sticky = simpleHash(req.user.id) % 100 < canaryPercentage;
      res.cookie("canary", String(sticky), { maxAge: 3600000 });
      isCanary = sticky;
    }

    req.isCanary = isCanary;
    res.setHeader("X-Served-By", isCanary ? "canary" : "stable");

    next();
  };
}

module.exports = canaryMiddleware;

Monitoring Canary Health

// canaryMonitor.js
var metrics = {
  canary: { requests: 0, errors: 0, totalLatency: 0 },
  stable: { requests: 0, errors: 0, totalLatency: 0 }
};

function recordRequest(isCanary, statusCode, latencyMs) {
  var bucket = isCanary ? metrics.canary : metrics.stable;
  bucket.requests++;
  bucket.totalLatency += latencyMs;

  if (statusCode >= 500) {
    bucket.errors++;
  }
}

function getHealth() {
  var canaryErrorRate = metrics.canary.requests > 0
    ? metrics.canary.errors / metrics.canary.requests
    : 0;

  var stableErrorRate = metrics.stable.requests > 0
    ? metrics.stable.errors / metrics.stable.requests
    : 0;

  var canaryAvgLatency = metrics.canary.requests > 0
    ? metrics.canary.totalLatency / metrics.canary.requests
    : 0;

  var stableAvgLatency = metrics.stable.requests > 0
    ? metrics.stable.totalLatency / metrics.stable.requests
    : 0;

  return {
    canary: {
      errorRate: canaryErrorRate,
      avgLatency: Math.round(canaryAvgLatency),
      requests: metrics.canary.requests
    },
    stable: {
      errorRate: stableErrorRate,
      avgLatency: Math.round(stableAvgLatency),
      requests: metrics.stable.requests
    },
    healthy: canaryErrorRate <= stableErrorRate * 1.5 &&
             canaryAvgLatency <= stableAvgLatency * 1.5
  };
}

function reset() {
  metrics.canary = { requests: 0, errors: 0, totalLatency: 0 };
  metrics.stable = { requests: 0, errors: 0, totalLatency: 0 };
}

module.exports = { recordRequest: recordRequest, getHealth: getHealth, reset: reset };

Synthetic Monitoring

Automated tests that run continuously against production, verifying critical user flows work.

Simple Synthetic Monitor

// synthetic-monitor.js
var http = require("http");
var https = require("https");

function SyntheticMonitor(baseUrl) {
  this.baseUrl = baseUrl;
  this.checks = [];
  this.results = [];
}

SyntheticMonitor.prototype.addCheck = function(name, options) {
  this.checks.push({
    name: name,
    path: options.path,
    method: options.method || "GET",
    expectedStatus: options.expectedStatus || 200,
    bodyCheck: options.bodyCheck || null,
    timeout: options.timeout || 5000
  });
};

SyntheticMonitor.prototype.runCheck = function(check, callback) {
  var startTime = Date.now();
  var isHttps = this.baseUrl.indexOf("https") === 0;
  var client = isHttps ? https : http;

  var req = client.get(this.baseUrl + check.path, function(res) {
    var data = "";
    res.on("data", function(chunk) { data += chunk; });
    res.on("end", function() {
      var elapsed = Date.now() - startTime;
      var result = {
        name: check.name,
        status: "pass",
        statusCode: res.statusCode,
        latency: elapsed,
        timestamp: new Date().toISOString()
      };

      if (res.statusCode !== check.expectedStatus) {
        result.status = "fail";
        result.error = "Expected status " + check.expectedStatus + ", got " + res.statusCode;
      }

      if (result.status === "pass" && check.bodyCheck) {
        try {
          var body = JSON.parse(data);
          if (!check.bodyCheck(body)) {
            result.status = "fail";
            result.error = "Body check failed";
          }
        } catch (e) {
          result.status = "fail";
          result.error = "Failed to parse response body";
        }
      }

      callback(null, result);
    });
  });

  req.on("error", function(err) {
    callback(null, {
      name: check.name,
      status: "fail",
      error: err.message,
      latency: Date.now() - startTime,
      timestamp: new Date().toISOString()
    });
  });

  req.setTimeout(check.timeout, function() {
    req.destroy();
    callback(null, {
      name: check.name,
      status: "fail",
      error: "Timeout after " + check.timeout + "ms",
      latency: check.timeout,
      timestamp: new Date().toISOString()
    });
  });
};

SyntheticMonitor.prototype.runAll = function(callback) {
  var self = this;
  var results = [];
  var completed = 0;

  self.checks.forEach(function(check) {
    self.runCheck(check, function(err, result) {
      results.push(result);
      completed++;
      if (completed === self.checks.length) {
        callback(null, results);
      }
    });
  });
};

module.exports = SyntheticMonitor;

Running Synthetic Checks

var SyntheticMonitor = require("./synthetic-monitor");

var monitor = new SyntheticMonitor("https://myapp.example.com");

// Define critical user flows
monitor.addCheck("Homepage loads", {
  path: "/",
  expectedStatus: 200
});

monitor.addCheck("API health check", {
  path: "/api/health",
  expectedStatus: 200,
  bodyCheck: function(body) {
    return body.status === "ok" && body.database === "connected";
  }
});

monitor.addCheck("Articles API responds", {
  path: "/api/articles?limit=5",
  expectedStatus: 200,
  bodyCheck: function(body) {
    return Array.isArray(body) && body.length > 0;
  }
});

monitor.addCheck("Search works", {
  path: "/api/search?q=javascript",
  expectedStatus: 200,
  timeout: 10000
});

// Run every 60 seconds
setInterval(function() {
  monitor.runAll(function(err, results) {
    var failures = results.filter(function(r) { return r.status === "fail"; });

    if (failures.length > 0) {
      console.error("SYNTHETIC CHECK FAILURES:");
      failures.forEach(function(f) {
        console.error("  " + f.name + ": " + f.error);
      });
      // Alert the team (email, Slack, PagerDuty)
      alertTeam(failures);
    } else {
      console.log("All " + results.length + " synthetic checks passed");
    }
  });
}, 60000);

Traffic Shadowing

Copy production traffic to a test environment without affecting users. The shadow receives the same requests but its responses are discarded.

// trafficShadow.js
var http = require("http");

function shadowTraffic(options) {
  var shadowHost = options.host;
  var shadowPort = options.port;
  var sampleRate = options.sampleRate || 0.1; // Shadow 10% of traffic

  return function(req, res, next) {
    // Only shadow a percentage of traffic
    if (Math.random() > sampleRate) {
      return next();
    }

    // Clone the request to the shadow service
    var shadowReq = http.request({
      hostname: shadowHost,
      port: shadowPort,
      path: req.originalUrl,
      method: req.method,
      headers: Object.assign({}, req.headers, { "X-Shadow": "true" })
    });

    // Do not wait for the shadow response
    shadowReq.on("error", function(err) {
      // Log but do not affect the real request
      console.warn("Shadow request failed:", err.message);
    });

    // Forward the body if present
    if (req.body) {
      shadowReq.write(JSON.stringify(req.body));
    }
    shadowReq.end();

    // Continue with the real request immediately
    next();
  };
}

module.exports = shadowTraffic;

// Usage in Express
var shadowTraffic = require("./trafficShadow");

if (process.env.SHADOW_HOST) {
  app.use(shadowTraffic({
    host: process.env.SHADOW_HOST,
    port: process.env.SHADOW_PORT || 3001,
    sampleRate: 0.05 // Shadow 5% of traffic
  }));
}

Observability for Production Testing

Structured Logging

// logger.js
function createLogger(service) {
  return {
    info: function(event, data) {
      console.log(JSON.stringify({
        level: "info",
        service: service,
        event: event,
        data: data,
        timestamp: new Date().toISOString()
      }));
    },
    error: function(event, data) {
      console.error(JSON.stringify({
        level: "error",
        service: service,
        event: event,
        data: data,
        timestamp: new Date().toISOString()
      }));
    },
    metric: function(name, value, tags) {
      console.log(JSON.stringify({
        level: "metric",
        name: name,
        value: value,
        tags: tags,
        timestamp: new Date().toISOString()
      }));
    }
  };
}

module.exports = createLogger;

Health Check Endpoint

// healthCheck.js
var db = require("./db");

function createHealthCheck(app) {
  app.get("/api/health", function(req, res) {
    var checks = {};
    var healthy = true;

    // Check database
    db.query("SELECT 1")
      .then(function() {
        checks.database = "connected";
      })
      .catch(function(err) {
        checks.database = "error: " + err.message;
        healthy = false;
      })
      .then(function() {
        // Check memory
        var mem = process.memoryUsage();
        var heapUsedMB = Math.round(mem.heapUsed / 1024 / 1024);
        checks.memory = heapUsedMB + "MB";
        if (heapUsedMB > 500) {
          checks.memoryWarning = "High memory usage";
        }

        // Check uptime
        checks.uptime = Math.round(process.uptime()) + "s";

        res.status(healthy ? 200 : 503).json({
          status: healthy ? "ok" : "degraded",
          checks: checks,
          version: process.env.APP_VERSION || "unknown",
          timestamp: new Date().toISOString()
        });
      });
  });
}

module.exports = createHealthCheck;

Rollback Strategies

Instant Rollback with Feature Flags

// If the new feature causes problems, disable it instantly
featureFlags.update("newCheckout", { enabled: false });
// No deployment needed — the flag change takes effect immediately

Automated Rollback Based on Metrics

// autoRollback.js
function AutoRollback(options) {
  this.errorThreshold = options.errorThreshold || 0.05; // 5% error rate
  this.latencyThreshold = options.latencyThreshold || 2000; // 2 seconds
  this.checkInterval = options.checkInterval || 30000; // 30 seconds
  this.onRollback = options.onRollback;
  this.metrics = { errors: 0, requests: 0, totalLatency: 0 };
}

AutoRollback.prototype.record = function(statusCode, latencyMs) {
  this.metrics.requests++;
  this.metrics.totalLatency += latencyMs;
  if (statusCode >= 500) this.metrics.errors++;
};

AutoRollback.prototype.start = function() {
  var self = this;

  self.timer = setInterval(function() {
    if (self.metrics.requests < 10) return; // Not enough data

    var errorRate = self.metrics.errors / self.metrics.requests;
    var avgLatency = self.metrics.totalLatency / self.metrics.requests;

    if (errorRate > self.errorThreshold) {
      console.error("ROLLBACK: Error rate " + (errorRate * 100).toFixed(1) + "% exceeds threshold");
      self.onRollback("error_rate", errorRate);
      self.stop();
    }

    if (avgLatency > self.latencyThreshold) {
      console.error("ROLLBACK: Avg latency " + avgLatency + "ms exceeds threshold");
      self.onRollback("latency", avgLatency);
      self.stop();
    }

    // Reset metrics for next interval
    self.metrics = { errors: 0, requests: 0, totalLatency: 0 };
  }, self.checkInterval);
};

AutoRollback.prototype.stop = function() {
  clearInterval(self.timer);
};

module.exports = AutoRollback;

Common Issues and Troubleshooting

Feature flag state is inconsistent across servers

Each server has a local copy of flags that can be out of sync:

Fix: Store feature flags in a shared data store (Redis, database, or feature flag service). Poll for updates on a short interval (10-30 seconds). Use a feature flag service like LaunchDarkly or Unleash for production systems.

Canary deployment shows good metrics but breaks for specific users

The canary sample is too small or unrepresentative:

Fix: Ensure canary traffic includes a representative mix of user types, geographies, and device types. Use sticky sessions so individual users get a consistent experience. Monitor per-user-segment metrics, not just aggregates.

Synthetic monitors create false alerts

External network issues or rate limiting trigger alerts for healthy services:

Fix: Run synthetic monitors from multiple locations and require failures from at least two before alerting. Add retry logic for transient network errors. Exclude known maintenance windows from alerting.

Traffic shadowing affects production performance

Shadow requests consume production server resources:

Fix: Shadow to a separate service, not back to production. Keep sample rates low (1-5%). Make shadow requests fire-and-forget — do not wait for responses. Monitor production performance before and after enabling shadowing.

Best Practices

Start with feature flags. They are the simplest production testing technique and provide instant rollback capability. Every new feature should be behind a flag.
Monitor before, during, and after changes. Establish baseline metrics before deploying. Watch metrics during rollout. Continue monitoring for days after reaching 100%.
Keep blast radius small. Start with 1% of users, then 5%, then 25%, then 100%. If something breaks at 5%, 95% of your users are unaffected.
Automate rollback criteria. Define what "unhealthy" means in code: error rate above 5%, latency above 2 seconds, specific error codes appearing. Automate the rollback trigger.
Run synthetic monitors continuously. They catch issues that individual user reports take hours to surface. A synthetic check that runs every minute finds outages in minutes, not hours.
Clean up feature flags. Old flags that are 100% enabled or 0% enabled are dead code. Remove them within a sprint of reaching 100% rollout.
Log everything about production tests. Which flag was active, which version served the request, which canary group the user was in. This data is essential for debugging issues.
Test rollback before you need it. Practice rolling back a feature flag or reverting a canary during normal operations. The first time you rollback should not be during an incident.

Testing in Production: Strategies and Safety

Testing in Production: Strategies and Safety

Prerequisites

Feature Flags

Simple Feature Flag Implementation

Using Feature Flags in Routes

Gradual Rollout Pattern

Canary Deployments

Express Middleware for Canary Routing

Monitoring Canary Health

Synthetic Monitoring

Simple Synthetic Monitor

Running Synthetic Checks

Traffic Shadowing

Observability for Production Testing

Structured Logging

Health Check Endpoint

Rollback Strategies

Instant Rollback with Feature Flags

Automated Rollback Based on Metrics

Common Issues and Troubleshooting

Best Practices

References

Quick Links

Need Expert Help?