commit efc1c13b50ce4b3b98d3d9a437ec64e9b8a0ecf8 Author: Jake Date: Sat Feb 7 16:31:52 2026 +0800 stream working, no face detect diff --git a/esp32_sense_cam.ino b/esp32_sense_cam.ino new file mode 100644 index 0000000..402187a --- /dev/null +++ b/esp32_sense_cam.ino @@ -0,0 +1,674 @@ +/* + * XIAO ESP32S3 Sense - Face Detection Web Server + * + * This sketch captures camera frames, runs face detection, + * and serves both the video stream and detection results via web server. + * + * Board: XIAO_ESP32S3 + * Required: ESP32 board package 2.0.8+ + * + * IMPORTANT: In Arduino IDE, go to Tools menu and set: + * - PSRAM: "OPI PSRAM" + */ + +#include "esp_camera.h" +#include +#include "esp_http_server.h" + +// Try to include face detection - available in ESP32 Arduino Core with ESP-WHO +#if __has_include("human_face_detect_msr01.hpp") + #include "human_face_detect_msr01.hpp" + #include "human_face_detect_mnp01.hpp" + #define FACE_DETECTION_AVAILABLE 1 +#elif __has_include("esp_face_detect.h") + #include "esp_face_detect.h" + #define FACE_DETECTION_AVAILABLE 2 +#else + #define FACE_DETECTION_AVAILABLE 0 + #warning "Face detection headers not found - using motion detection fallback" +#endif + +// WiFi credentials +const char* ssid = "Police Surveillance Van"; +const char* password = "ourpassword"; + +// =========================================== +// XIAO ESP32S3 Sense Camera Pin Definitions +// =========================================== +#define PWDN_GPIO_NUM -1 +#define RESET_GPIO_NUM -1 +#define XCLK_GPIO_NUM 10 +#define SIOD_GPIO_NUM 40 +#define SIOC_GPIO_NUM 39 + +#define Y9_GPIO_NUM 48 +#define Y8_GPIO_NUM 11 +#define Y7_GPIO_NUM 12 +#define Y6_GPIO_NUM 14 +#define Y5_GPIO_NUM 16 +#define Y4_GPIO_NUM 18 +#define Y3_GPIO_NUM 17 +#define Y2_GPIO_NUM 15 +#define VSYNC_GPIO_NUM 38 +#define HREF_GPIO_NUM 47 +#define PCLK_GPIO_NUM 13 + +// LED pin for status +#define LED_GPIO_NUM 21 + +// Global variables +httpd_handle_t stream_httpd = NULL; +httpd_handle_t camera_httpd = NULL; + +// Detection settings +static bool detectionEnabled = true; +static int detectionCount = 0; +static unsigned long lastDetectionTime = 0; + +// For motion/change detection fallback +static uint8_t* prevFrame = NULL; +static size_t prevFrameLen = 0; + +// Part boundary for MJPEG stream +#define PART_BOUNDARY "123456789000000000000987654321" +static const char* _STREAM_CONTENT_TYPE = "multipart/x-mixed-replace;boundary=" PART_BOUNDARY; +static const char* _STREAM_BOUNDARY = "\r\n--" PART_BOUNDARY "\r\n"; +static const char* _STREAM_PART = "Content-Type: image/jpeg\r\nContent-Length: %u\r\nX-Faces: %d\r\n\r\n"; + +#if FACE_DETECTION_AVAILABLE == 1 +// ESP-DL based face detection +HumanFaceDetectMSR01 *s_detector = nullptr; +HumanFaceDetectMNP01 *s_detector2 = nullptr; + +static int detect_faces_dl(camera_fb_t *fb, uint8_t **out_buf, size_t *out_len) { + if (!s_detector) { + s_detector = new HumanFaceDetectMSR01(0.1F, 0.5F, 10, 0.2F); + s_detector2 = new HumanFaceDetectMNP01(0.5F, 0.3F, 5); + } + + int faces = 0; + + if (fb->format == PIXFORMAT_RGB565) { + // Convert to RGB888 + size_t rgb_len = fb->width * fb->height * 3; + uint8_t *rgb_buf = (uint8_t*)ps_malloc(rgb_len); + + if (rgb_buf) { + // Convert RGB565 to RGB888 + uint16_t *src = (uint16_t*)fb->buf; + for (size_t i = 0; i < fb->width * fb->height; i++) { + uint16_t p = src[i]; + rgb_buf[i*3] = ((p >> 11) & 0x1F) << 3; + rgb_buf[i*3+1] = ((p >> 5) & 0x3F) << 2; + rgb_buf[i*3+2] = (p & 0x1F) << 3; + } + + // Run detection + std::list &results = s_detector->infer(rgb_buf, {(int)fb->height, (int)fb->width, 3}); + + if (results.size() > 0) { + results = s_detector2->infer(rgb_buf, {(int)fb->height, (int)fb->width, 3}, results); + faces = results.size(); + + // Draw boxes + for (auto &r : results) { + int x1 = constrain(r.box[0], 0, fb->width-1); + int y1 = constrain(r.box[1], 0, fb->height-1); + int x2 = constrain(r.box[2], 0, fb->width-1); + int y2 = constrain(r.box[3], 0, fb->height-1); + + // Draw green rectangle + for (int x = x1; x <= x2; x++) { + rgb_buf[(y1 * fb->width + x) * 3 + 1] = 255; + rgb_buf[(y2 * fb->width + x) * 3 + 1] = 255; + } + for (int y = y1; y <= y2; y++) { + rgb_buf[(y * fb->width + x1) * 3 + 1] = 255; + rgb_buf[(y * fb->width + x2) * 3 + 1] = 255; + } + } + } + + // Convert to JPEG + if (!fmt2jpg(rgb_buf, rgb_len, fb->width, fb->height, PIXFORMAT_RGB888, 80, out_buf, out_len)) { + *out_buf = NULL; + *out_len = 0; + } + + free(rgb_buf); + } + } + + return faces; +} +#endif + +// Simple skin-tone based face detection (works without ESP-DL) +static int detect_faces_simple(uint8_t *rgb565_buf, int width, int height) { + int skinPixels = 0; + int totalPixels = width * height; + uint16_t *pixels = (uint16_t*)rgb565_buf; + + // Count skin-tone pixels (simplified detection) + for (int i = 0; i < totalPixels; i++) { + uint16_t p = pixels[i]; + uint8_t r = ((p >> 11) & 0x1F) << 3; + uint8_t g = ((p >> 5) & 0x3F) << 2; + uint8_t b = (p & 0x1F) << 3; + + // Simple skin tone detection in RGB + // Skin typically has R > 95, G > 40, B > 20 + // and R > G > B with R-G > 15 + if (r > 95 && g > 40 && b > 20 && + r > g && g > b && (r - g) > 15 && + (r - b) > 15) { + skinPixels++; + } + } + + // If more than 5% skin pixels, likely a face is present + float skinRatio = (float)skinPixels / totalPixels; + + if (skinRatio > 0.05 && skinRatio < 0.6) { + return 1; // Face likely detected + } + return 0; +} + +// Stream handler +static esp_err_t stream_handler(httpd_req_t *req) { + camera_fb_t *fb = NULL; + esp_err_t res = ESP_OK; + char part_buf[128]; + + res = httpd_resp_set_type(req, _STREAM_CONTENT_TYPE); + if (res != ESP_OK) return res; + + httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); + httpd_resp_set_hdr(req, "X-Framerate", "15"); + + while (true) { + fb = esp_camera_fb_get(); + if (!fb) { + Serial.println("Camera capture failed"); + res = ESP_FAIL; + break; + } + + uint8_t *jpg_buf = NULL; + size_t jpg_len = 0; + int faces = 0; + + if (detectionEnabled) { +#if FACE_DETECTION_AVAILABLE == 1 + if (fb->format == PIXFORMAT_RGB565) { + faces = detect_faces_dl(fb, &jpg_buf, &jpg_len); + } +#else + // Fallback: simple skin-tone detection + if (fb->format == PIXFORMAT_RGB565) { + faces = detect_faces_simple(fb->buf, fb->width, fb->height); + } +#endif + } + + // Use original frame if detection didn't produce output + if (jpg_buf == NULL) { + if (fb->format == PIXFORMAT_JPEG) { + jpg_buf = fb->buf; + jpg_len = fb->len; + } else { + bool converted = frame2jpg(fb, 80, &jpg_buf, &jpg_len); + if (!converted) { + esp_camera_fb_return(fb); + continue; + } + } + } + + // Update detection status + if (faces > 0) { + detectionCount = faces; + lastDetectionTime = millis(); + digitalWrite(LED_GPIO_NUM, LOW); // LED on + } else if (millis() - lastDetectionTime > 500) { + detectionCount = 0; + digitalWrite(LED_GPIO_NUM, HIGH); // LED off + } + + // Send frame + if (res == ESP_OK) { + res = httpd_resp_send_chunk(req, _STREAM_BOUNDARY, strlen(_STREAM_BOUNDARY)); + } + if (res == ESP_OK) { + size_t hlen = snprintf(part_buf, sizeof(part_buf), _STREAM_PART, jpg_len, detectionCount); + res = httpd_resp_send_chunk(req, part_buf, hlen); + } + if (res == ESP_OK) { + res = httpd_resp_send_chunk(req, (const char*)jpg_buf, jpg_len); + } + + // Free JPEG buffer if we allocated it + if (jpg_buf != fb->buf) { + free(jpg_buf); + } + + esp_camera_fb_return(fb); + + if (res != ESP_OK) break; + } + + return res; +} + +// Single capture handler +static esp_err_t capture_handler(httpd_req_t *req) { + camera_fb_t *fb = esp_camera_fb_get(); + if (!fb) { + httpd_resp_send_500(req); + return ESP_FAIL; + } + + httpd_resp_set_type(req, "image/jpeg"); + httpd_resp_set_hdr(req, "Content-Disposition", "inline; filename=capture.jpg"); + httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); + + esp_err_t res; + if (fb->format == PIXFORMAT_JPEG) { + res = httpd_resp_send(req, (const char*)fb->buf, fb->len); + } else { + uint8_t *jpg_buf = NULL; + size_t jpg_len = 0; + if (frame2jpg(fb, 80, &jpg_buf, &jpg_len)) { + res = httpd_resp_send(req, (const char*)jpg_buf, jpg_len); + free(jpg_buf); + } else { + res = ESP_FAIL; + httpd_resp_send_500(req); + } + } + + esp_camera_fb_return(fb); + return res; +} + +// Status handler +static esp_err_t status_handler(httpd_req_t *req) { + char json[256]; + snprintf(json, sizeof(json), + "{\"detection\":%s,\"count\":%d,\"method\":\"%s\",\"heap\":%lu,\"psram\":%lu}", + detectionEnabled ? "true" : "false", + detectionCount, +#if FACE_DETECTION_AVAILABLE == 1 + "neural-network", +#else + "skin-tone", +#endif + (unsigned long)ESP.getFreeHeap(), + (unsigned long)ESP.getFreePsram()); + + httpd_resp_set_type(req, "application/json"); + httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); + return httpd_resp_send(req, json, strlen(json)); +} + +// Toggle detection +static esp_err_t toggle_handler(httpd_req_t *req) { + detectionEnabled = !detectionEnabled; + + char json[64]; + snprintf(json, sizeof(json), "{\"detection\":%s}", detectionEnabled ? "true" : "false"); + + httpd_resp_set_type(req, "application/json"); + httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*"); + return httpd_resp_send(req, json, strlen(json)); +} + +// Main page - HTML in static storage to avoid stack overflow +static esp_err_t index_handler(httpd_req_t *req) { + static const char html[] = R"rawliteral( + + + + + + XIAO ESP32S3 Face Detection + + + +
+

XIAO ESP32S3 Face Detection

+
+ Camera Stream +
+ + No faces +
+
+
+ + + +
+
+
+
Detection
+
ON
+
+
+
Faces
+
0
+
+
+
Method
+
-
+
+
+
Free RAM
+
-
+
+
+
+ + + +)rawliteral"; + + httpd_resp_set_type(req, "text/html"); + return httpd_resp_send(req, html, strlen(html)); +} + +void startCameraServer() { + httpd_config_t config = HTTPD_DEFAULT_CONFIG(); + config.server_port = 80; + config.ctrl_port = 32768; + config.max_open_sockets = 7; + + httpd_uri_t index_uri = { .uri = "/", .method = HTTP_GET, .handler = index_handler }; + httpd_uri_t capture_uri = { .uri = "/capture", .method = HTTP_GET, .handler = capture_handler }; + httpd_uri_t status_uri = { .uri = "/status", .method = HTTP_GET, .handler = status_handler }; + httpd_uri_t toggle_uri = { .uri = "/toggle", .method = HTTP_GET, .handler = toggle_handler }; + httpd_uri_t stream_uri = { .uri = "/stream", .method = HTTP_GET, .handler = stream_handler }; + + Serial.printf("Starting web server on port %d\n", config.server_port); + if (httpd_start(&camera_httpd, &config) == ESP_OK) { + httpd_register_uri_handler(camera_httpd, &index_uri); + httpd_register_uri_handler(camera_httpd, &capture_uri); + httpd_register_uri_handler(camera_httpd, &status_uri); + httpd_register_uri_handler(camera_httpd, &toggle_uri); + } + + config.server_port = 81; + config.ctrl_port = 32769; + + Serial.printf("Starting stream server on port %d\n", config.server_port); + if (httpd_start(&stream_httpd, &config) == ESP_OK) { + httpd_register_uri_handler(stream_httpd, &stream_uri); + } +} + +void setup() { + Serial.begin(115200); + for (int i = 0; i < 10; i++){ + Serial.println(i); + delay(500); + } + Serial.setDebugOutput(true); + Serial.println(); + + // Configure LED + pinMode(LED_GPIO_NUM, OUTPUT); + digitalWrite(LED_GPIO_NUM, HIGH); + + // Check PSRAM + if (psramFound()) { + Serial.printf("PSRAM found: %d bytes\n", ESP.getPsramSize()); + } else { + Serial.println("WARNING: No PSRAM found! Face detection may not work."); + Serial.println("Make sure PSRAM is set to 'OPI PSRAM' in Tools menu."); + } + + // Camera configuration + camera_config_t config; + config.ledc_channel = LEDC_CHANNEL_0; + config.ledc_timer = LEDC_TIMER_0; + config.pin_d0 = Y2_GPIO_NUM; + config.pin_d1 = Y3_GPIO_NUM; + config.pin_d2 = Y4_GPIO_NUM; + config.pin_d3 = Y5_GPIO_NUM; + config.pin_d4 = Y6_GPIO_NUM; + config.pin_d5 = Y7_GPIO_NUM; + config.pin_d6 = Y8_GPIO_NUM; + config.pin_d7 = Y9_GPIO_NUM; + config.pin_xclk = XCLK_GPIO_NUM; + config.pin_pclk = PCLK_GPIO_NUM; + config.pin_vsync = VSYNC_GPIO_NUM; + config.pin_href = HREF_GPIO_NUM; + config.pin_sccb_sda = SIOD_GPIO_NUM; + config.pin_sccb_scl = SIOC_GPIO_NUM; + config.pin_pwdn = PWDN_GPIO_NUM; + config.pin_reset = RESET_GPIO_NUM; + config.xclk_freq_hz = 20000000; + config.grab_mode = CAMERA_GRAB_LATEST; + config.fb_location = CAMERA_FB_IN_PSRAM; + +#if FACE_DETECTION_AVAILABLE == 1 + // For neural network face detection, use RGB565 + config.frame_size = FRAMESIZE_QVGA; // 320x240 + config.pixel_format = PIXFORMAT_RGB565; + config.fb_count = 2; + config.jpeg_quality = 12; + Serial.println("Face detection: Neural Network (ESP-DL)"); +#else + // For skin-tone detection, JPEG is fine and faster + config.frame_size = FRAMESIZE_VGA; // 640x480 + config.pixel_format = PIXFORMAT_JPEG; + config.fb_count = 2; + config.jpeg_quality = 10; + Serial.println("Face detection: Skin-tone heuristic (fallback)"); +#endif + + // Initialize camera + esp_err_t err = esp_camera_init(&config); + if (err != ESP_OK) { + Serial.printf("Camera init failed with error 0x%x\n", err); + while (true) { + digitalWrite(LED_GPIO_NUM, LOW); + delay(100); + digitalWrite(LED_GPIO_NUM, HIGH); + delay(100); + } + } + + Serial.println("Camera initialized successfully"); + + // Camera settings + sensor_t *s = esp_camera_sensor_get(); + if (s) { + s->set_vflip(s, 0); + s->set_hmirror(s, 0); + s->set_brightness(s, 1); + s->set_contrast(s, 1); + } + + // Connect to WiFi + WiFi.begin(ssid, password); + WiFi.setSleep(false); + + Serial.print("Connecting to WiFi"); + int attempts = 0; + while (WiFi.status() != WL_CONNECTED && attempts < 30) { + delay(500); + Serial.print("."); + digitalWrite(LED_GPIO_NUM, (attempts % 2) ? LOW : HIGH); + attempts++; + } + + if (WiFi.status() == WL_CONNECTED) { + Serial.println("\nWiFi connected!"); + digitalWrite(LED_GPIO_NUM, HIGH); + + startCameraServer(); + + Serial.println("\n========================================"); + Serial.println(" Face Detection Web Server Ready!"); + Serial.println("========================================"); + Serial.print(" Open: http://"); + Serial.println(WiFi.localIP()); + Serial.print(" Stream: http://"); + Serial.print(WiFi.localIP()); + Serial.println(":81/stream"); + Serial.println("========================================\n"); + } else { + Serial.println("\nWiFi connection failed!"); + while (true) { + digitalWrite(LED_GPIO_NUM, LOW); + delay(1000); + digitalWrite(LED_GPIO_NUM, HIGH); + delay(1000); + } + } +} + +void loop() { + delay(10000); + Serial.printf("Status - Heap: %lu, PSRAM: %lu, Faces: %d\n", + (unsigned long)ESP.getFreeHeap(), + (unsigned long)ESP.getFreePsram(), + detectionCount); +}