介绍

在本教程中，我们将使用 AKAZE 和 ORB 局部特征来比较它们，并使用它们来查找视频帧之间的匹配项并跟踪物体的运动。

算法如下：

在第一帧上检测并描述关键点，手动设置物体边界
对于每一帧
1. 检测并描述关键点
2. 使用暴力匹配器进行匹配
3. 使用 RANSAC 估计单应性变换
4. 从所有匹配项中过滤内点
5. 将单应性变换应用于边界框以找到物体
6. 绘制边界框和内点，计算内点比率作为评估指标

数据

要进行跟踪，我们需要一个视频和第一帧上的物体位置。

您可以从这里下载我们的示例视频和数据。

要运行代码，您必须指定输入（摄像头 ID 或视频文件）。然后，使用鼠标选择一个边界框，并按下任意键开始跟踪

./planar_tracking blais.mp4

源代码

#include <opencv2/features2d.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/highgui.hpp> //for imshow
#include <vector>
#include <iostream>
#include <iomanip>
 
#include "stats.h" // Stats 结构体定义
#include "utils.h" // 绘制和打印函数
 
using namespace std;
using namespace cv;
 
const double akaze_thresh = 3e-4; // AKAZE 检测阈值设置为定位大约 1000 个关键点
const double ransac_thresh = 2.5f; // RANSAC 内点阈值
const double nn_match_ratio = 0.8f; // 最近邻匹配比率
const int bb_min_inliers = 100; // 绘制边界框的最小内点数
const int stats_update_period = 10; // 屏幕上的统计信息每 10 帧更新一次
 
namespace example {
class Tracker
{
public:
    Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher)
detector(_detector),
matcher(_matcher)
    {}
 
    void setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats);
    Mat process(const Mat frame, Stats& stats);
    Ptr<Feature2D> getDetector() {
        return detector;
    }
protected:
    Ptr<Feature2D> detector;
    Ptr<DescriptorMatcher> matcher;
    Mat first_frame, first_desc;
vector<KeyPoint> first_kp;
vector<Point2f> object_bb;
};
 
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)
{
    cv::Point *ptMask = new cv::Point[bb.size()];
    const Point* ptContain = { &ptMask[0] };
    int iSize = static_cast<int>(bb.size());
    for (size_t i=0; i<bb.size(); i++) {
ptMask[i].x = static_cast<int>(bb[i].x);
ptMask[i].y = static_cast<int>(bb[i].y);
    }
first_frame = frame.clone();
    cv::Mat matMask = cv::Mat::zeros(frame.size(), CV_8UC1);
    cv::fillPoly(matMask, &ptContain, &iSize, 1, cv::Scalar::all(255));
detector->detectAndCompute(first_frame, matMask, first_kp, first_desc);
stats.keypoints = (int)first_kp.size();
drawBoundingBox(first_frame, bb);
    putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
object_bb = bb;
    delete[] ptMask;
}
 
Mat Tracker::process(const Mat frame, Stats& stats)
{
    TickMeter tm;
vector<KeyPoint> kp;
    Mat desc;
 
tm.start();
detector->detectAndCompute(frame, noArray(), kp, desc);
stats.keypoints = (int)kp.size();
 
vector< vector<DMatch> > matches;
vector<KeyPoint> matched1, matched2;
matcher->knnMatch(first_desc, desc, matches, 2);
    for(unsigned i = 0; i < matches.size(); i++) {
        if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {
matched1.push_back(first_kp[matches[i][0].queryIdx]);
matched2.push_back( kp[matches[i][0].trainIdx]);
        }
    }
stats.matches = (int)matched1.size();
 
    Mat inlier_mask, homography;
vector<KeyPoint> inliers1, inliers2;
vector<DMatch> inlier_matches;
    if(matched1.size() >= 4) {
homography = findHomography(Points(matched1), Points(matched2),
RANSAC, ransac_thresh, inlier_mask);
    }
tm.stop();
stats.fps = 1. / tm.getTimeSec();
 
    if(matched1.size() < 4 || homography.empty()) {
        Mat res;
        hconcat(first_frame, frame, res);
stats.inliers = 0;
stats.ratio = 0;
        return res;
    }
    for(unsigned i = 0; i < matched1.size(); i++) {
        if(inlier_mask.at<uchar>(i)) {
            int new_i = static_cast<int>(inliers1.size());
inliers1.push_back(matched1[i]);
inliers2.push_back(matched2[i]);
inlier_matches.push_back(DMatch(new_i, new_i, 0));
        }
    }
stats.inliers = (int)inliers1.size();
stats.ratio = stats.inliers * 1.0 / stats.matches;
 
vector<Point2f> new_bb;
    perspectiveTransform(object_bb, new_bb, homography);
    Mat frame_with_bb = frame.clone();
    if(stats.inliers >= bb_min_inliers) {
drawBoundingBox(frame_with_bb, new_bb);
    }
    Mat res;
    drawMatches(first_frame, inliers1, frame_with_bb, inliers2,
inlier_matches, res,
                Scalar(255, 0, 0), Scalar(255, 0, 0));
    return res;
}
}
 
int main(int argc, char **argv)
{
    CommandLineParser parser(argc, argv, "{@input_path |0|input path can be a camera id, like 0,1,2 or a video filename}");
parser.printMessage();
    string input_path = parser.get<string>(0);
    string video_name = input_path;
 
    VideoCapture video_in;
 
    if ( ( isdigit(input_path[0]) && input_path.size() == 1 ) )
    {
    int camera_no = input_path[0] - '0';
video_in.open( camera_no );
    }
    else {
video_in.open(video_name);
    }
 
    if(!video_in.isOpened()) {
cerr << "Couldn't open " << video_name << endl;
        return 1;
    }
 
Stats stats, akaze_stats, orb_stats;
    Ptr<AKAZE> akaze = AKAZE::create();
akaze->setThreshold(akaze_thresh);
    Ptr<ORB> orb = ORB::create();
    Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming");
example::Tracker akaze_tracker(akaze, matcher);
example::Tracker orb_tracker(orb, matcher);
 
    Mat frame;
    namedWindow(video_name, WINDOW_NORMAL);
cout << "\nPress any key to stop the video and select a bounding box" << endl;
 
    while ( waitKey(1) < 1 )
    {
video_in >> frame;
        cv::resizeWindow(video_name, frame.size());
        imshow(video_name, frame);
    }
 
vector<Point2f> bb;
    cv::Rect uBox = cv::selectROI(video_name, frame);
bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y)));
bb.push_back(cv::Point2f(static_cast<float>(uBox.x+uBox.width), static_cast<float>(uBox.y)));
bb.push_back(cv::Point2f(static_cast<float>(uBox.x+uBox.width), static_cast<float>(uBox.y+uBox.height)));
bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y+uBox.height)));
 
akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats);
orb_tracker.setFirstFrame(frame, bb, "ORB", stats);
 
Stats akaze_draw_stats, orb_draw_stats;
    Mat akaze_res, orb_res, res_frame;
    int i = 0;
    for(;;) {
i++;
        bool update_stats = (i % stats_update_period == 0);
video_in >> frame;
        // 停止程序，如果不再有图像
        if(frame.empty()) break;
 
akaze_res = akaze_tracker.process(frame, stats);
akaze_stats += stats;
        if(update_stats) {
akaze_draw_stats = stats;
        }
 
orb->setMaxFeatures(stats.keypoints);
orb_res = orb_tracker.process(frame, stats);
orb_stats += stats;
        if(update_stats) {
orb_draw_stats = stats;
        }
 
drawStatistics(akaze_res, akaze_draw_stats);
drawStatistics(orb_res, orb_draw_stats);
        vconcat(akaze_res, orb_res, res_frame);
        cv::imshow(video_name, res_frame);
        if(waitKey(1)==27) break; // 按 ESC 键退出
    }
akaze_stats /= i - 1;
orb_stats /= i - 1;
printStatistics("AKAZE", akaze_stats);
printStatistics("ORB", orb_stats);
    return 0;
}

解释

Tracker 类

此类使用给定的特征检测器和描述符匹配器实现上面描述的算法。

设置第一帧
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)

{

first_frame = frame.clone();

(*detector)(first_frame, noArray(), first_kp, first_desc);

stats.keypoints = (int)first_kp.size();

drawBoundingBox(first_frame, bb);

putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);

object_bb = bb;

}

我们计算并存储第一帧的关键点和描述符，并为输出准备它。

我们需要保存检测到的关键点数，以确保两个检测器都定位了大致相同数量的关键点。
处理帧
1. 定位关键点并计算描述符
  (*detector)(frame, noArray(), kp, desc);
  
  为了找到帧之间的匹配，我们首先必须定位关键点。
  
  在本教程中，检测器被设置为在每一帧上找到大约 1000 个关键点。
2. 使用 2-nn 匹配器来查找对应关系
  matcher->knnMatch(first_desc, desc, matches, 2);
  
  for(unsigned i = 0; i < matches.size(); i++) {
  
  if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {
  
  matched1.push_back(first_kp[matches[i][0].queryIdx]);
  
  matched2.push_back( kp[matches[i][0].trainIdx]);
  
  }
  
  }
  
  如果最近匹配比第二近匹配更接近 nn_match_ratio，则它就是匹配项。
3. 使用 RANSAC 估计单应性变换
  homography = findHomography(Points(matched1), Points(matched2),
  
  RANSAC, ransac_thresh, inlier_mask);
  
  如果至少有 4 个匹配项，我们可以使用随机抽样一致性来估计图像变换。
4. 保存内点
  for(unsigned i = 0; i < matched1.size(); i++) {
  
  if(inlier_mask.at<uchar>(i)) {
  
  int new_i = static_cast<int>(inliers1.size());
  
  inliers1.push_back(matched1[i]);
  
  inliers2.push_back(matched2[i]);
  
  inlier_matches.push_back(DMatch(new_i, new_i, 0));
  
  }
  
  }
  
  由于 findHomography 计算了内点，我们只需要保存选择的点和匹配项。
5. 投影目标边界框
  perspectiveTransform(object_bb, new_bb, homography);
  
  如果有合理数量的内点，我们可以使用估计的变换来定位目标。

结果

您可以在 youtube 上观看最终的视频。

AKAZE 统计数据

匹配项 626
内点 410
内点比率 0.58
关键点 1117

ORB 统计数据

匹配项 504
内点 319
内点比率 0.56
关键点 1112


原始作者	Fedor Morozov
兼容性	OpenCV >= 3.0

目录

介绍

数据

源代码

解释

Tracker 类

结果