基于 DNN 的人脸检测与识别


原作者	Chengrui Wang, Yuantao Feng
兼容性	OpenCV >= 4.5.4

简介

在本节中，我们介绍了用于人脸检测的cv::FaceDetectorYN类和用于人脸识别的cv::FaceRecognizerSF类。

模型

本模块预训练并需要两个ONNX格式的模型

人脸检测:
- 大小：338KB
- 在WIDER Face Val数据集上的结果：0.830(简单), 0.824(中等), 0.708(困难)
人脸识别
- 大小：36.9MB
- 结果

数据库	准确率	阈值 (normL2)	阈值 (cosine)
LFW	99.60%	1.128	0.363
CALFW	93.95%	1.149	0.340
CPLFW	91.05%	1.204	0.275
AgeDB-30	94.90%	1.202	0.277
CFP-FP	94.80%	1.253	0.212

代码

C++

可下载代码：点击此处
代码速览
#include <opencv2/dnn.hpp>

#include <opencv2/imgproc.hpp>

#include <opencv2/highgui.hpp>

#include <opencv2/objdetect.hpp>

#include <iostream>

using namespace cv;

using namespace std;

static

void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)

{

std::string fpsString = cv::format("FPS : %.2f", (float)fps);

if (frame >= 0)

cout << "Frame " << frame << ", ";

cout << "FPS: " << fpsString << endl;

for (int i = 0; i < faces.rows; i++)

{

// 打印结果

cout << "Face " << i

<< ", 左上角坐标: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "

<< "框宽度: " << faces.at<float>(i, 2) << ", 框高度: " << faces.at<float>(i, 3) << ", "

<< "分数: " << cv::format("%.2f", faces.at<float>(i, 14))

<< endl;

// 绘制边界框

rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);

// 绘制关键点

circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);

circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);

circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);

circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);

circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);

}

putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);

}

int main(int argc, char** argv)

{

CommandLineParser parser(argc, argv,

"{help h | | 打印此消息}"

"{image1 i1 | | 输入图像1的路径。省略则通过VideoCapture检测}"

"{image2 i2 | | 输入图像2的路径。当同时给出image1和image2参数时，程序会尝试在两张图像中找到人脸并运行人脸识别算法}"

"{video v | 0 | 输入视频的路径}"

"{scale sc | 1.0 | 用于调整输入视频帧大小的缩放因子}"

"{fd_model fd | face_detection_yunet_2021dec.onnx| 模型的路径。从https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet下载yunet.onnx}"

"{fr_model fr | face_recognition_sface_2021dec.onnx | 人脸识别模型的路径。从https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface下载模型}"

"{score_threshold | 0.9 | 过滤掉分数 < score_threshold 的人脸}"

"{nms_threshold | 0.3 | 抑制iou >= nms_threshold 的边界框}"

"{top_k | 5000 | 在NMS之前保留top_k个边界框}"

"{save s | false | 设置为true以保存结果。使用相机时此标志无效}"

);

if (parser.has("help"))

{

parser.printMessage();

return 0;

}

String fd_modelPath = parser.get<String>("fd_model");

String fr_modelPath = parser.get<String>("fr_model");

float scoreThreshold = parser.get<float>("score_threshold");

float nmsThreshold = parser.get<float>("nms_threshold");

int topK = parser.get<int>("top_k");

bool save = parser.get<bool>("save");

float scale = parser.get<float>("scale");

double cosine_similar_thresh = 0.363;

double l2norm_similar_thresh = 1.128;

// 初始化FaceDetectorYN

Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);

TickMeter tm;

// 如果输入是图像

if (parser.has("image1"))

{

String input1 = parser.get<String>("image1");

Mat image1 = imread(samples::findFile(input1));

if (image1.empty())

{

std::cerr << "无法读取图像: " << input1 << std::endl;

return 2;

}

int imageWidth = int(image1.cols * scale);

int imageHeight = int(image1.rows * scale);

resize(image1, image1, Size(imageWidth, imageHeight));

tm.start();

// 在推理前设置输入大小

detector->setInputSize(image1.size());

Mat faces1;

detector->detect(image1, faces1);

if (faces1.rows < 1)

{

std::cerr << "在 " << input1 << " 中找不到人脸" << std::endl;

return 1;

}

tm.stop();

// 在输入图像上绘制结果

visualize(image1, -1, faces1, tm.getFPS());

// 如果save为true，则保存结果

if (save)

{

cout << "正在保存 result.jpg...\n";

imwrite("result.jpg", image1);

}

// 可视化结果

imshow("image1", image1);

pollKey(); // 处理UI事件以显示内容

if (parser.has("image2"))

{

String input2 = parser.get<String>("image2");

Mat image2 = imread(samples::findFile(input2));

if (image2.empty())

{

std::cerr << "无法读取图像2: " << input2 << std::endl;

return 2;

}

tm.reset();

tm.start();

detector->setInputSize(image2.size());

Mat faces2;

detector->detect(image2, faces2);

if (faces2.rows < 1)

{

std::cerr << "在 " << input2 << " 中找不到人脸" << std::endl;

return 1;

}

tm.stop();

visualize(image2, -1, faces2, tm.getFPS());

if (save)

{

cout << "正在保存 result2.jpg...\n";

imwrite("result2.jpg", image2);

}

imshow("image2", image2);

pollKey();

// 初始化FaceRecognizerSF

Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");

// 通过检测到的人脸中的第一个人脸进行对齐和裁剪。

Mat aligned_face1, aligned_face2;

faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);

faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);

// 使用给定aligned_face进行特征提取

Mat feature1, feature2;

faceRecognizer->feature(aligned_face1, feature1);

feature1 = feature1.clone();

faceRecognizer->feature(aligned_face2, feature2);

feature2 = feature2.clone();

double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);

double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);

if (cos_score >= cosine_similar_thresh)

{

std::cout << "它们是同一个人；";

}

else

{

std::cout << "它们是不同的人；";

}

std::cout << " 余弦相似度: " << cos_score << ", 阈值: " << cosine_similar_thresh << ". (值越高相似度越高，最大1.0)\n";

if (L2_score <= l2norm_similar_thresh)

{

std::cout << "它们是同一个人；";

}

else

{

std::cout << "它们是不同的人。";

}

std::cout << " L2范数距离: " << L2_score << ", 阈值: " << l2norm_similar_thresh << ". (值越低相似度越高，最小0.0)\n";

}

cout << "按任意键退出..." << endl;

waitKey(0);

}

else

{

int frameWidth, frameHeight;

VideoCapture capture;

std::string video = parser.get<string>("video");

if (video.size() == 1 && isdigit(video[0]))

capture.open(parser.get<int>("video"));

else

capture.open(samples::findFileOrKeep(video)); // 保留GStreamer管道

if (capture.isOpened())

{

frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);

frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);

cout << "视频 " << video

<< ": 宽度=" << frameWidth

<< ", 高度=" << frameHeight

<< endl;

}

else

{

cout << "无法初始化视频捕捉: " << video << "\n";

return 1;

}

detector->setInputSize(Size(frameWidth, frameHeight));

cout << "按 'SPACE' 保存帧，按其他键退出..." << endl;

int nFrame = 0;

for (;;)

{

// 获取帧

Mat frame;

if (!capture.read(frame))

{

cerr << "无法抓取帧! 停止\n";

break;

}

resize(frame, frame, Size(frameWidth, frameHeight));

// 推理

Mat faces;

tm.start();

detector->detect(frame, faces);

tm.stop();

Mat result = frame.clone();

// 在输入图像上绘制结果

visualize(result, nFrame, faces, tm.getFPS());

// 可视化结果

imshow("实时", result);

int key = waitKey(1);

bool saveFrame = save;

if (key == ' ')

{

saveFrame = true;

key = 0; // 已处理

}

if (saveFrame)

{

std::string frame_name = cv::format("frame_%05d.png", nFrame);

std::string result_name = cv::format("result_%05d.jpg", nFrame);

cout << "正在保存 '" << frame_name << "' 和 '" << result_name << "' ...\n";

imwrite(frame_name, frame);

imwrite(result_name, result);

}

++nFrame;

if (key > 0)

break;

}

cout << "已处理 " << nFrame << " 帧" << endl;

}

cout << "完成。" << endl;

return 0;

}

bool empty() const
如果数组没有元素，则返回 true。
int64_t int64

cv::Mat
n 维密集数组类
定义 mat.hpp:830

cv::Mat::clone
CV_NODISCARD_STD Mat clone() const
创建数组及其底层数据的完整副本。

cv::Mat::row
Mat row(int y) const
为指定的矩阵行创建矩阵头。

cv::Mat::at
_Tp & at(int i0=0)
返回指定数组元素的引用。

cv::Mat::rows
int rows
矩阵的行数和列数，或当矩阵有超过2个维度时为(-1, -1)
定义 mat.hpp:2165

cv::Point_< int >

cv::Rect_
2D 矩形的模板类。
定义 types.hpp:444

cv::Scalar_< double >

cv::Size_
用于指定图像或矩形大小的模板类。
Definition types.hpp:335

cv::TickMeter
一个用于测量流逝时间的类。
定义 utility.hpp:326

cv::TickMeter::getFPS
double getFPS() const
返回平均 FPS（帧每秒）值。
定义 utility.hpp:407

cv::TickMeter::start
void start()
开始计时。
定义 utility.hpp:335

cv::TickMeter::stop
void stop()
停止计时。
定义 utility.hpp:341

cv::TickMeter::reset
void reset()
重置内部值。
定义 utility.hpp:430

cv::VideoCapture
用于从视频文件、图像序列或摄像头捕获视频的类。
Definition videoio.hpp:772

cv::VideoCapture::read
virtual bool read(OutputArray image)
抓取、解码并返回下一个视频帧。

cv::VideoCapture::open
virtual bool open(const String &filename, int apiPreference=CAP_ANY)
打开视频文件、捕获设备或IP视频流进行视频捕获。

cv::VideoCapture::isOpened
virtual bool isOpened() const
如果视频捕获已初始化，则返回 true。

cv::VideoCapture::get
virtual double get(int propId) const
返回指定的 VideoCapture 属性。

dnn.hpp

cv::String
std::string String
定义 cvstd.hpp:151

cv::Ptr
std::shared_ptr< _Tp > Ptr
Definition cvstd_wrapper.hpp:23

cv::format
String format(const char *fmt,...)
返回使用printf类表达式格式化的文本字符串。

cv::imshow
void imshow(const String &winname, InputArray mat)
在指定窗口中显示图像。

cv::waitKey
int waitKey(int delay=0)
等待按键按下。

cv::pollKey
int pollKey()
轮询按下的键。

cv::imwrite
CV_EXPORTS_W bool imwrite(const String &filename, InputArray img, const std::vector< int > &params=std::vector< int >())
将图像保存到指定文件。

cv::imread
CV_EXPORTS_W Mat imread(const String &filename, int flags=IMREAD_COLOR_BGR)
从文件加载图像。

cv::resize
void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR)
调整图像大小。

highgui.hpp

main
int main(int argc, char *argv[])
定义 highgui_qt.cpp:3

imgproc.hpp

cv
定义 core.hpp:107

std
STL 命名空间。

objdetect.hpp

Python

可下载代码：点击此处
代码速览
import argparse

import numpy as np

import cv2 as cv

def str2bool(v)

if v.lower() in ['on', 'yes', 'true', 'y', 't']

return True

elif v.lower() in ['off', 'no', 'false', 'n', 'f']

return False

else:

raise NotImplementedError

parser = argparse.ArgumentParser()

parser.add_argument('--image1', '-i1', type=str, help='输入图像1的路径。省略则在默认相机上检测。')

parser.add_argument('--image2', '-i2', type=str, help='输入图像2的路径。当同时给出image1和image2参数时，程序会尝试在两张图像中找到人脸并运行人脸识别算法。')

parser.add_argument('--video', '-v', type=str, help='输入视频的路径。')

parser.add_argument('--scale', '-sc', type=float, default=1.0, help='用于调整输入视频帧大小的缩放因子。')

parser.add_argument('--face_detection_model', '-fd', type=str, default='face_detection_yunet_2021dec.onnx', help='人脸检测模型的路径。从https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet下载模型')

parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognition_sface_2021dec.onnx', help='人脸识别模型的路径。从https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface下载模型')

parser.add_argument('--score_threshold', type=float, default=0.9, help='过滤掉分数 < score_threshold 的人脸。')

parser.add_argument('--nms_threshold', type=float, default=0.3, help='抑制iou >= nms_threshold 的边界框。')

parser.add_argument('--top_k', type=int, default=5000, help='在NMS之前保留top_k个边界框。')

parser.add_argument('--save', '-s', type=str2bool, default=False, help='设置为true以保存结果。使用相机时此标志无效。')

args = parser.parse_args()

def visualize(input, faces, fps, thickness=2)

if faces[1] is not None

for idx, face in enumerate(faces[1])

print('人脸 {}, 左上角坐标: ({:.0f}, {:.0f}), 框宽度: {:.0f}, 框高度 {:.0f}, 分数: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))

coords = face[:-1].astype(np.int32)

cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), thickness)

cv.circle(input, (coords[4], coords[5]), 2, (255, 0, 0), thickness)

cv.circle(input, (coords[6], coords[7]), 2, (0, 0, 255), thickness)

cv.circle(input, (coords[8], coords[9]), 2, (0, 255, 0), thickness)

cv.circle(input, (coords[10], coords[11]), 2, (255, 0, 255), thickness)

cv.circle(input, (coords[12], coords[13]), 2, (0, 255, 255), thickness)

cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

if __name__ == '__main__'

detector = cv.FaceDetectorYN.create(

args.face_detection_model,

"",

(320, 320),

args.score_threshold,

args.nms_threshold,

args.top_k

)

tm = cv.TickMeter()

# 如果输入是图像

if args.image1 is not None

img1 = cv.imread(cv.samples.findFile(args.image1))

img1Width = int(img1.shape[1]*args.scale)

img1Height = int(img1.shape[0]*args.scale)

img1 = cv.resize(img1, (img1Width, img1Height))

tm.start()

detector.setInputSize((img1Width, img1Height))

faces1 = detector.detect(img1)

tm.stop()

assert faces1[1] is not None, '在 {} 中找不到人脸'.format(args.image1)

# 在输入图像上绘制结果

visualize(img1, faces1, tm.getFPS())

# 如果save为true，则保存结果

if args.save

print('结果已保存到 result.jpg\n')

cv.imwrite('result.jpg', img1)

# 在新窗口中可视化结果

cv.imshow("image1", img1)

if args.image2 is not None

img2 = cv.imread(cv.samples.findFile(args.image2))

tm.reset()

tm.start()

detector.setInputSize((img2.shape[1], img2.shape[0]))

faces2 = detector.detect(img2)

tm.stop()

assert faces2[1] is not None, '在 {} 中找不到人脸'.format(args.image2)

visualize(img2, faces2, tm.getFPS())

cv.imshow("image2", img2)

recognizer = cv.FaceRecognizerSF.create(

args.face_recognition_model,"")

face1_align = recognizer.alignCrop(img1, faces1[1][0])

face2_align = recognizer.alignCrop(img2, faces2[1][0])

# 提取特征

face1_feature = recognizer.feature(face1_align)

face2_feature = recognizer.feature(face2_align)

cosine_similarity_threshold = 0.363

l2_similarity_threshold = 1.128

cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)

l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)

msg = '不同的人'

if cosine_score >= cosine_similarity_threshold

msg = '同一个人'

print('它们是{}. 余弦相似度: {}, 阈值: {} (值越高相似度越高，最大1.0)。'.format(msg, cosine_score, cosine_similarity_threshold))

msg = '不同的人'

if l2_score <= l2_similarity_threshold

msg = '同一个人'

print('它们是{}. L2范数距离: {}, 阈值: {} (值越低相似度越高，最小0.0)。'.format(msg, l2_score, l2_similarity_threshold))

cv.waitKey(0)

else: # 省略输入以调用默认相机

if args.video is not None

deviceId = args.video

else:

deviceId = 0

cap = cv.VideoCapture(deviceId)

frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)*args.scale)

frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)*args.scale)

detector.setInputSize([frameWidth, frameHeight])

while cv.waitKey(1) < 0

hasFrame, frame = cap.read()

if not hasFrame

print('No frames grabbed!')

break

frame = cv.resize(frame, (frameWidth, frameHeight))

# 推理

tm.start()

faces = detector.detect(frame) # faces 是一个元组

tm.stop()

# 在输入图像上绘制结果

visualize(frame, faces, tm.getFPS())

# 可视化结果

cv.imshow('实时', frame)

cv.destroyAllWindows()

cv::FaceDetectorYN::create
static Ptr< FaceDetectorYN > create(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config, const Size &input_size, float score_threshold=0.9f, float nms_threshold=0.3f, int top_k=5000, int backend_id=0, int target_id=0)
使用给定参数创建人脸检测器类的实例。

cv::FaceRecognizerSF::create
static Ptr< FaceRecognizerSF > create(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config, int backend_id=0, int target_id=0)
使用给定参数创建此类的实例。

cv::samples::findFile
cv::String findFile(const cv::String &relative_path, bool required=true, bool silentMode=false)
尝试查找请求的数据文件。

cv::destroyAllWindows
void destroyAllWindows()
销毁所有HighGUI窗口。

cv::rectangle
void rectangle(InputOutputArray img, Point pt1, Point pt2, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
绘制一个简单、粗或填充的矩形。

cv::putText
void putText(InputOutputArray img, const String &text, Point org, int fontFace, double fontScale, Scalar color, int thickness=1, int lineType=LINE_8, bool bottomLeftOrigin=false)
绘制文本字符串。

cv::circle
void circle(InputOutputArray img, Point center, int radius, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
Draws a circle.

解释

C++

// 初始化FaceDetectorYN

Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);

        // 在推理前设置输入大小
detector->setInputSize(image1.size());
 
        Mat faces1;
detector->detect(image1, faces1);
        if (faces1.rows < 1)
        {
std::cerr << "在 " << input1 << " 中找不到人脸" << std::endl;
            return 1;
        }

Python

detector = cv.FaceDetectorYN.create(
args.face_detection_model,
        "",
        (320, 320),
args.score_threshold,
args.nms_threshold,
args.top_k
    )

detector.setInputSize((img1Width, img1Height))
 
faces1 = detector.detect(img1)

检测输出faces是一个CV_32F类型的二维数组，其行是检测到的人脸实例，列是人脸的位置和5个面部关键点。每行的格式如下：

x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt, x_rcm, y_rcm, x_lcm, y_lcm

，其中x1, y1, w, h是人脸边界框的左上角坐标、宽度和高度，{x, y}_{re, le, nt, rcm, lcm}分别代表右眼、左眼、鼻尖、嘴角右角和嘴角左角的坐标。

人脸识别

在人脸检测之后，运行以下代码从面部图像中提取人脸特征。

C++

// 初始化FaceRecognizerSF

Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");

            // 通过检测到的人脸中的第一个人脸进行对齐和裁剪。
            Mat aligned_face1, aligned_face2;
faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
 
            // 使用给定aligned_face进行特征提取
            Mat feature1, feature2;
faceRecognizer->feature(aligned_face1, feature1);
feature1 = feature1.clone();
faceRecognizer->feature(aligned_face2, feature2);
feature2 = feature2.clone();

Python

recognizer = cv.FaceRecognizerSF.create(

args.face_recognition_model,"")

face1_align = recognizer.alignCrop(img1, faces1[1][0])
face2_align = recognizer.alignCrop(img2, faces2[1][0])
 
            # 提取特征
face1_feature = recognizer.feature(face1_align)
face2_feature = recognizer.feature(face2_align)

获得两张面部图像的人脸特征feature1和feature2后，运行以下代码计算两个人脸之间的身份差异。

C++

double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);

double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);

Python

cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)

l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)

例如，如果余弦距离大于或等于0.363，或者L2范数距离小于或等于1.128，则两个人脸具有相同的身份。

参考

https://github.com/ShiqiYu/libfacedetection
https://github.com/ShiqiYu/libfacedetection.train
https://github.com/zhongyy/SFace

致谢

感谢虞士琦教授和封元涛训练并提供了人脸检测模型。

感谢邓伟洪教授，钟永逸博士生和王程睿硕士生训练并提供了人脸识别模型。

为OpenCV生成于 Thu Jul 3 2025 12:14:35，由

1.12.0

目录

简介

模型

代码

解释

人脸识别

参考

致谢