/* * XIAO ESP32S3 Sense - Face Detection Web Server * * This sketch captures camera frames, runs face detection, * and serves both the video stream and detection results via web server. * * Board: XIAO_ESP32S3 * Required: ESP32 board package 2.0.8+ * * IMPORTANT: In Arduino IDE, go to Tools menu and set: * - PSRAM: "OPI PSRAM" */ #include "esp_camera.h" #include #include "esp_http_server.h" // Try to include face detection - available in ESP32 Arduino Core with ESP-WHO #if __has_include("human_face_detect_msr01.hpp") #include "human_face_detect_msr01.hpp" #include "human_face_detect_mnp01.hpp" #define FACE_DETECTION_AVAILABLE 1 #elif __has_include("esp_face_detect.h") #include "esp_face_detect.h" #define FACE_DETECTION_AVAILABLE 2 #else #define FACE_DETECTION_AVAILABLE 0 #warning "Face detection headers not found - using motion detection fallback" #endif // WiFi credentials const char* ssid = "Police Surveillance Van"; const char* password = "ourpassword"; // =========================================== // XIAO ESP32S3 Sense Camera Pin Definitions // =========================================== #define PWDN_GPIO_NUM -1 #define RESET_GPIO_NUM -1 #define XCLK_GPIO_NUM 10 #define SIOD_GPIO_NUM 40 #define SIOC_GPIO_NUM 39 #define Y9_GPIO_NUM 48 #define Y8_GPIO_NUM 11 #define Y7_GPIO_NUM 12 #define Y6_GPIO_NUM 14 #define Y5_GPIO_NUM 16 #define Y4_GPIO_NUM 18 #define Y3_GPIO_NUM 17 #define Y2_GPIO_NUM 15 #define VSYNC_GPIO_NUM 38 #define HREF_GPIO_NUM 47 #define PCLK_GPIO_NUM 13 // LED pin for status #define LED_GPIO_NUM 21 // Global variables httpd_handle_t stream_httpd = NULL; httpd_handle_t camera_httpd = NULL; // Detection settings static bool detectionEnabled = true; static int detectionCount = 0; static unsigned long lastDetectionTime = 0; // For motion/change detection fallback static uint8_t* prevFrame = NULL; static size_t prevFrameLen = 0; // Part boundary for MJPEG stream #define PART_BOUNDARY "123456789000000000000987654321" static const char* _STREAM_CONTENT_TYPE = "multipart/x-mixed-replace;boundary=" PART_BOUNDARY; static const char* _STREAM_BOUNDARY = "\r\n--" PART_BOUNDARY "\r\n"; static const char* _STREAM_PART = "Content-Type: image/jpeg\r\nContent-Length: %u\r\nX-Faces: %d\r\n\r\n"; #if FACE_DETECTION_AVAILABLE == 1 // ESP-DL based face detection HumanFaceDetectMSR01 *s_detector = nullptr; HumanFaceDetectMNP01 *s_detector2 = nullptr; static int detect_faces_dl(camera_fb_t *fb, uint8_t **out_buf, size_t *out_len) { if (!s_detector) { s_detector = new HumanFaceDetectMSR01(0.1F, 0.5F, 10, 0.2F); s_detector2 = new HumanFaceDetectMNP01(0.5F, 0.3F, 5); } int faces = 0; if (fb->format == PIXFORMAT_RGB565) { // Convert to RGB888 size_t rgb_len = fb->width * fb->height * 3; uint8_t *rgb_buf = (uint8_t*)ps_malloc(rgb_len); if (rgb_buf) { // Convert RGB565 to RGB888 uint16_t *src = (uint16_t*)fb->buf; for (size_t i = 0; i < fb->width * fb->height; i++) { uint16_t p = src[i]; rgb_buf[i*3] = ((p >> 11) & 0x1F) << 3; rgb_buf[i*3+1] = ((p >> 5) & 0x3F) << 2; rgb_buf[i*3+2] = (p & 0x1F) << 3; } // Run detection std::list &results = s_detector->infer(rgb_buf, {(int)fb->height, (int)fb->width, 3}); if (results.size() > 0) { results = s_detector2->infer(rgb_buf, {(int)fb->height, (int)fb->width, 3}, results); faces = results.size(); // Draw boxes for (auto &r : results) { int x1 = constrain(r.box[0], 0, fb->width-1); int y1 = constrain(r.box[1], 0, fb->height-1); int x2 = constrain(r.box[2], 0, fb->width-1); int y2 = constrain(r.box[3], 0, fb->height-1); // Draw green rectangle for (int x = x1; x <= x2; x++) { rgb_buf[(y1 * fb->width + x) * 3 + 1] = 255; rgb_buf[(y2 * fb->width + x) * 3 + 1] = 255; } for (int y = y1; y <= y2; y++) { rgb_buf[(y * fb->width + x1) * 3 + 1] = 255; rgb_buf[(y * fb->width + x2) * 3 + 1] = 255; } } } // Convert to JPEG if (!fmt2jpg(rgb_buf, rgb_len, fb->width, fb->height, PIXFORMAT_RGB888, 80, out_buf, out_len)) { *out_buf = NULL; *out_len = 0; } free(rgb_buf); } } return faces; } #endif // Simple skin-tone based face detection (works without ESP-DL) static int detect_faces_simple(uint8_t *rgb565_buf, int width, int height) { int skinPixels = 0; int totalPixels = width * height; uint16_t *pixels = (uint16_t*)rgb565_buf; // Count skin-tone pixels (simplified detection) for (int i = 0; i < totalPixels; i++) { uint16_t p = pixels[i]; uint8_t r = ((p >> 11) & 0x1F) << 3; uint8_t g = ((p >> 5) & 0x3F) << 2; uint8_t b = (p & 0x1F) << 3; // Simple skin tone detection in RGB // Skin typically has R > 95, G > 40, B > 20 // and R > G > B with R-G > 15 if (r > 95 && g > 40 && b > 20 && r > g && g > b && (r - g) > 15 && (r - b) > 15) { skinPixels++; } } // If more than 5% skin pixels, likely a face is present float skinRatio = (float)skinPixels / totalPixels; if (skinRatio > 0.05 && skinRatio < 0.6) { return 1; // Face likely detected } return 0; } // Stream handler static esp_err_t stream_handler(httpd_req_t *req) { camera_fb_t *fb = NULL; esp_err_t res = ESP_OK; char part_buf[128]; res = httpd_resp_set_type(req, _STREAM_CONTENT_TYPE); if (res != ESP_OK) return res; httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); httpd_resp_set_hdr(req, "X-Framerate", "15"); while (true) { fb = esp_camera_fb_get(); if (!fb) { Serial.println("Camera capture failed"); res = ESP_FAIL; break; } uint8_t *jpg_buf = NULL; size_t jpg_len = 0; int faces = 0; if (detectionEnabled) { #if FACE_DETECTION_AVAILABLE == 1 if (fb->format == PIXFORMAT_RGB565) { faces = detect_faces_dl(fb, &jpg_buf, &jpg_len); } #else // Fallback: simple skin-tone detection if (fb->format == PIXFORMAT_RGB565) { faces = detect_faces_simple(fb->buf, fb->width, fb->height); } #endif } // Use original frame if detection didn't produce output if (jpg_buf == NULL) { if (fb->format == PIXFORMAT_JPEG) { jpg_buf = fb->buf; jpg_len = fb->len; } else { bool converted = frame2jpg(fb, 80, &jpg_buf, &jpg_len); if (!converted) { esp_camera_fb_return(fb); continue; } } } // Update detection status if (faces > 0) { detectionCount = faces; lastDetectionTime = millis(); digitalWrite(LED_GPIO_NUM, LOW); // LED on } else if (millis() - lastDetectionTime > 500) { detectionCount = 0; digitalWrite(LED_GPIO_NUM, HIGH); // LED off } // Send frame if (res == ESP_OK) { res = httpd_resp_send_chunk(req, _STREAM_BOUNDARY, strlen(_STREAM_BOUNDARY)); } if (res == ESP_OK) { size_t hlen = snprintf(part_buf, sizeof(part_buf), _STREAM_PART, jpg_len, detectionCount); res = httpd_resp_send_chunk(req, part_buf, hlen); } if (res == ESP_OK) { res = httpd_resp_send_chunk(req, (const char*)jpg_buf, jpg_len); } // Free JPEG buffer if we allocated it if (jpg_buf != fb->buf) { free(jpg_buf); } esp_camera_fb_return(fb); if (res != ESP_OK) break; } return res; } // Single capture handler static esp_err_t capture_handler(httpd_req_t *req) { camera_fb_t *fb = esp_camera_fb_get(); if (!fb) { httpd_resp_send_500(req); return ESP_FAIL; } httpd_resp_set_type(req, "image/jpeg"); httpd_resp_set_hdr(req, "Content-Disposition", "inline; filename=capture.jpg"); httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); esp_err_t res; if (fb->format == PIXFORMAT_JPEG) { res = httpd_resp_send(req, (const char*)fb->buf, fb->len); } else { uint8_t *jpg_buf = NULL; size_t jpg_len = 0; if (frame2jpg(fb, 80, &jpg_buf, &jpg_len)) { res = httpd_resp_send(req, (const char*)jpg_buf, jpg_len); free(jpg_buf); } else { res = ESP_FAIL; httpd_resp_send_500(req); } } esp_camera_fb_return(fb); return res; } // Status handler static esp_err_t status_handler(httpd_req_t *req) { char json[256]; snprintf(json, sizeof(json), "{\"detection\":%s,\"count\":%d,\"method\":\"%s\",\"heap\":%lu,\"psram\":%lu}", detectionEnabled ? "true" : "false", detectionCount, #if FACE_DETECTION_AVAILABLE == 1 "neural-network", #else "skin-tone", #endif (unsigned long)ESP.getFreeHeap(), (unsigned long)ESP.getFreePsram()); httpd_resp_set_type(req, "application/json"); httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); return httpd_resp_send(req, json, strlen(json)); } // Toggle detection static esp_err_t toggle_handler(httpd_req_t *req) { detectionEnabled = !detectionEnabled; char json[64]; snprintf(json, sizeof(json), "{\"detection\":%s}", detectionEnabled ? "true" : "false"); httpd_resp_set_type(req, "application/json"); httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); return httpd_resp_send(req, json, strlen(json)); } // Main page - HTML in static storage to avoid stack overflow static esp_err_t index_handler(httpd_req_t *req) { static const char html[] = R"rawliteral( XIAO ESP32S3 Face Detection

XIAO ESP32S3 Face Detection

Camera Stream
No faces
Detection
ON
Faces
0
Method
-
Free RAM
-
)rawliteral"; httpd_resp_set_type(req, "text/html"); return httpd_resp_send(req, html, strlen(html)); } void startCameraServer() { httpd_config_t config = HTTPD_DEFAULT_CONFIG(); config.server_port = 80; config.ctrl_port = 32768; config.max_open_sockets = 7; httpd_uri_t index_uri = { .uri = "/", .method = HTTP_GET, .handler = index_handler }; httpd_uri_t capture_uri = { .uri = "/capture", .method = HTTP_GET, .handler = capture_handler }; httpd_uri_t status_uri = { .uri = "/status", .method = HTTP_GET, .handler = status_handler }; httpd_uri_t toggle_uri = { .uri = "/toggle", .method = HTTP_GET, .handler = toggle_handler }; httpd_uri_t stream_uri = { .uri = "/stream", .method = HTTP_GET, .handler = stream_handler }; Serial.printf("Starting web server on port %d\n", config.server_port); if (httpd_start(&camera_httpd, &config) == ESP_OK) { httpd_register_uri_handler(camera_httpd, &index_uri); httpd_register_uri_handler(camera_httpd, &capture_uri); httpd_register_uri_handler(camera_httpd, &status_uri); httpd_register_uri_handler(camera_httpd, &toggle_uri); } config.server_port = 81; config.ctrl_port = 32769; Serial.printf("Starting stream server on port %d\n", config.server_port); if (httpd_start(&stream_httpd, &config) == ESP_OK) { httpd_register_uri_handler(stream_httpd, &stream_uri); } } void setup() { Serial.begin(115200); for (int i = 0; i < 10; i++){ Serial.println(i); delay(500); } Serial.setDebugOutput(true); Serial.println(); // Configure LED pinMode(LED_GPIO_NUM, OUTPUT); digitalWrite(LED_GPIO_NUM, HIGH); // Check PSRAM if (psramFound()) { Serial.printf("PSRAM found: %d bytes\n", ESP.getPsramSize()); } else { Serial.println("WARNING: No PSRAM found! Face detection may not work."); Serial.println("Make sure PSRAM is set to 'OPI PSRAM' in Tools menu."); } // Camera configuration camera_config_t config; config.ledc_channel = LEDC_CHANNEL_0; config.ledc_timer = LEDC_TIMER_0; config.pin_d0 = Y2_GPIO_NUM; config.pin_d1 = Y3_GPIO_NUM; config.pin_d2 = Y4_GPIO_NUM; config.pin_d3 = Y5_GPIO_NUM; config.pin_d4 = Y6_GPIO_NUM; config.pin_d5 = Y7_GPIO_NUM; config.pin_d6 = Y8_GPIO_NUM; config.pin_d7 = Y9_GPIO_NUM; config.pin_xclk = XCLK_GPIO_NUM; config.pin_pclk = PCLK_GPIO_NUM; config.pin_vsync = VSYNC_GPIO_NUM; config.pin_href = HREF_GPIO_NUM; config.pin_sccb_sda = SIOD_GPIO_NUM; config.pin_sccb_scl = SIOC_GPIO_NUM; config.pin_pwdn = PWDN_GPIO_NUM; config.pin_reset = RESET_GPIO_NUM; config.xclk_freq_hz = 20000000; config.grab_mode = CAMERA_GRAB_LATEST; config.fb_location = CAMERA_FB_IN_PSRAM; #if FACE_DETECTION_AVAILABLE == 1 // For neural network face detection, use RGB565 config.frame_size = FRAMESIZE_QVGA; // 320x240 config.pixel_format = PIXFORMAT_RGB565; config.fb_count = 2; config.jpeg_quality = 12; Serial.println("Face detection: Neural Network (ESP-DL)"); #else // For skin-tone detection, JPEG is fine and faster config.frame_size = FRAMESIZE_VGA; // 640x480 config.pixel_format = PIXFORMAT_JPEG; config.fb_count = 2; config.jpeg_quality = 10; Serial.println("Face detection: Skin-tone heuristic (fallback)"); #endif // Initialize camera esp_err_t err = esp_camera_init(&config); if (err != ESP_OK) { Serial.printf("Camera init failed with error 0x%x\n", err); while (true) { digitalWrite(LED_GPIO_NUM, LOW); delay(100); digitalWrite(LED_GPIO_NUM, HIGH); delay(100); } } Serial.println("Camera initialized successfully"); // Camera settings sensor_t *s = esp_camera_sensor_get(); if (s) { s->set_vflip(s, 0); s->set_hmirror(s, 0); s->set_brightness(s, 1); s->set_contrast(s, 1); } // Connect to WiFi WiFi.begin(ssid, password); WiFi.setSleep(false); Serial.print("Connecting to WiFi"); int attempts = 0; while (WiFi.status() != WL_CONNECTED && attempts < 30) { delay(500); Serial.print("."); digitalWrite(LED_GPIO_NUM, (attempts % 2) ? LOW : HIGH); attempts++; } if (WiFi.status() == WL_CONNECTED) { Serial.println("\nWiFi connected!"); digitalWrite(LED_GPIO_NUM, HIGH); startCameraServer(); Serial.println("\n========================================"); Serial.println(" Face Detection Web Server Ready!"); Serial.println("========================================"); Serial.print(" Open: http://"); Serial.println(WiFi.localIP()); Serial.print(" Stream: http://"); Serial.print(WiFi.localIP()); Serial.println(":81/stream"); Serial.println("========================================\n"); } else { Serial.println("\nWiFi connection failed!"); while (true) { digitalWrite(LED_GPIO_NUM, LOW); delay(1000); digitalWrite(LED_GPIO_NUM, HIGH); delay(1000); } } } void loop() { delay(10000); Serial.printf("Status - Heap: %lu, PSRAM: %lu, Faces: %d\n", (unsigned long)ESP.getFreeHeap(), (unsigned long)ESP.getFreePsram(), detectionCount); }