Middlebury立体匹配源码总结
优化方法 | 图像可否预处理 | 代价计算可否采用BT方式 | 可选代价计算方法 | 可否代价聚合 | 可否MinFilter优化原始代价 |
WTA-Box | 可以 | 可以 | AD/SD | 可以,聚合尺寸可变,迭代次数1次 | 可以 |
WTA-Binomial | 可以 | 可以 | AD/SD | 可以,聚合尺寸固定,迭代次数可变 | 不可以 |
WTA-Diffusion | 可以 | 可以 | AD/SD | 可以,聚合尺寸固定,迭代次数可变 | 不可以 |
WTA-membrane | 可以 | 可以 | AD/SD | 可以,聚合尺寸固定,迭代次数可变 | 不可以 |
WTA-Bayesian | 可以 | 可以 | AD/SD | 可以,聚合尺寸固定,迭代次数可变 | 不可以 |
WTA-LASW | 可以 | 可以 | AD/SD | 可以,聚合尺寸可变,迭代次数1次 | 不可以 |
SO | 可以 | 可以 | AD/SD | 不可以 | 不可以 |
DP | 可以 | 可以 | AD/SD | 不可以 | 不可以 |
GC | 可以 | 可以 | AD/SD | 不可以 | 不可以 |
SA | 可以 | 可以 | AD/SD | 不可以 | 不可以 |
BPAccel | 可以 | 可以 | AD/SD | 不可以 | 不可以 |
BPSync | 可以 | 可以 | AD/SD | 不可以 | 不可以 |
1. 主线函数
1.0 ComputeCorrespondence
void ComputeCorrespondence()
{
CShape sh = m_frame[frame_ref].input_image.Shape();
//1.计算m_frame_xxx, m_disp_xxx, disp_step, disp_n, m_match_outside
//只考虑disp_step==1的情况,所以可进行以下简化
//且后文件将除m_disp_n外的所有m_frame_xxx和m_disp_xxx都去掉
m_frame_diff = 1;// frame_match - frame_ref;
m_frame_diff_sign = 1;// (m_frame_diff > 0) ? 1 : -1;
m_disp_num = 1;// disp_step < 1.0f ? 1 : ROUND(disp_step);
m_disp_den = 1;// disp_step < 1.0f ? ROUND(1.0 / disp_step) : 1;
m_disp_step_inv = 1;// m_disp_den / (float)m_disp_num;
m_disp_step = disp_step;// m_disp_num / (float)m_disp_den;
m_disp_n = disp_n = disp_max-disp_min + 1;// int(m_disp_step_inv * (disp_max - disp_min)) + 1;
//disp_step = m_disp_step;
//disp_n = m_disp_n;
// Special value for border matches
int worst_match = sh.nBands * ((match_fn == eSD) ? 255 * 255 : 255);
int cutoff = (match_fn == eSD) ? match_max * match_max : abs(match_max);
m_match_outside = __min(worst_match, cutoff); // trim to cutoff
//2.设置左右图像
m_reference.ReAllocate(sh);
CopyPixels(m_frame[frame_ref].input_image, m_reference);
m_matching.ReAllocate(sh);
CopyPixels(m_frame[frame_match].input_image, m_matching);
//3.设置标准视差图像
sh.nBands = 1;
m_true_disparity.ReAllocate(sh); // ground truth
ScaleAndOffset(m_frame[frame_ref].truth_image, m_true_disparity, 1.0f / disp_scale, disp_min);
//4.生成浮点视差图像
sh.nBands = 1;
m_float_disparity.ReAllocate(sh);
m_float_disparity.ClearPixels();
//5.生成整型视差图像
sh.nBands = 1;
m_disparity.ReAllocate(sh); // winning disparities
//6.生成代价函数图像
sh.nBands = m_disp_n;// number of disparity levels
m_cost.ReAllocate(sh); // raw matching costs (# bands = # disparities)
//if (evaluate_only){暂且略去}
//7.执行算法
clock_t time0 = clock();
PreProcess(); // see StcPreProcess.cpp
RawCosts(); // see StcRawCosts.cpp
Aggregate(); // see StcAggregate.cpp
Optimize(); // see StcOptimize.cpp
Refine(); // see StcRefine.cpp
clock_t time1 = clock(); // record end time
total_time = (float)(time1 - time0) / (float)CLOCKS_PER_SEC;
//8.生成并设置深度图像
sh.nBands = 1;
m_frame[frame_ref].depth_image.ReAllocate(sh);
m_frame[frame_ref].depth_image.ClearPixels(); // set to 0 if we just reallocated
ScaleAndOffset(m_float_disparity, m_frame[frame_ref].depth_image, disp_scale, -disp_min * disp_scale + 0.5);
//9.
CopyPixels(m_frame[frame_ref].input_image, m_reference);
}
1.1 PreProcess
void PreProcess()
2 {
3 for (int iter = 0; iter < preproc_blur_iter; iter++)
4 {
5 ConvolveSeparable(m_reference, m_reference, ConvolveKernel_121, ConvolveKernel_14641, 1.0f, 0.0f, 1, 1);
6 ConvolveSeparable(m_matching, m_matching, ConvolveKernel_121, ConvolveKernel_14641, 1.0f, 0.0f, 1, 1);
7 }
8 //Currently, we only support iterated binomial blur, to clean up the images a little.
9 //This should help sub-pixel fitting work better, by making image shifts closer to a Taylor series expansion,
10 //but will result in worse performance near discontinuity regions and in finely textured regions.
11 //Other potential pre-processing operations (currently not implemented),might include:
12 //(1)bias and gain normalization
13 //(2)histogram equalization (global or local)
14 //(3)rank statistics pre-processing
15 }
1.2 RawCosts
void RawCosts()
{
CShape sh = m_reference.Shape();
int cols = sh.width;
int rows = sh.height;
int cn = sh.nBands;
fprintf(stderr, match_fn == eAD ? "\nmatch_fn=AD, match_max=%d\n" : (match_fn == eSD ? "\nmatch_fn=SD, match_max=%d\n" : "\nmatch_fn=unknown, match_max=%d\n"), match_max);
int cutoff = (match_fn == eSD) ? match_max * match_max : abs(match_max);
for (int d = 0; d < disp_n; d++)
{
int disp = -(disp_min + d);//计算取不同视差值的代价(一个视差值对应一个cost的通道)
for (int i = 0; i < rows; i++)
{
uchar *ref = &m_reference.Pixel(0, i, 0);
uchar *match = &m_matching.Pixel(0, i, 0);
float *cost = &m_cost.Pixel(0, i, d);
for (int j = 0, jj = 0; j < cols; j++, jj += disp_n)//m_cost的通道数为disp_n
{
//1.肯定为错误匹配则代价无穷大
if ((j + disp) < 0)
{
cost[jj] = m_match_outside;
continue;
}
//2.否则计算AD代价或SD代价
int diff_sum = 0;//多通道则是所有通道代价之和
uchar *pixel0 = &ref[j*cn];
uchar *pixel1 = &match[(j + disp)*cn];
for (int k = 0; k < cn; k++)
{
int diff1 = (int)pixel1[k] - (int)pixel0[k];
int diff2 = (match_fn == eSD) ? diff1 * diff1 : abs(diff1);
diff_sum = diff_sum + diff2;
}
cost[jj] = __min(diff_sum, cutoff);
}
}
}
}
1.2.1 PadCosts
void PadCosts()
{ // fill the outside parts of the DSI
CShape sh = m_cost.Shape();
int cols = sh.width;
int rows = sh.height;
for (int d = 0; d < m_disp_n; d++)
{
int disp = -(disp_min + d);
for (int i = 0; i < rows; i++)
{
float* cost = &m_cost.Pixel(0, i, d);
for (int j = 0, jj = 0; j < cols; j++, jj += disp_n)//m_cost的通道数为disp_n
cost[jj] = ((j + disp) < 0) ? m_match_outside : cost[jj];
}
}
}
1.3 Aggregate
void Aggregate()
{
// Save the raw matching costs in m_cost0;
CopyPixels(m_cost, m_cost0);
//1.Perform given number of iteration steps
for (int iter = 0; iter < aggr_iter; iter++)
switch (aggr_fn)
{
case eBox:
if (verbose == eVerboseSummary && iter < 1) fprintf(stderr, ", box=%d", aggr_window_size);
BoxFilter(m_cost, m_cost, aggr_window_size, aggr_window_size, true);//可以用cv::boxFilter()代替
break;
case eASWeight:
if (verbose == eVerboseSummary && iter < 1) fprintf(stderr, ", AdaptiveWeight (box=%d gamma_p=%g gamma_s=%g color_space=%d )", aggr_window_size, aggr_gamma_proximity, aggr_gamma_similarity, aggr_color_space);
LASW(m_cost, // initial matching cost
m_cost, // aggregated matching cost
m_reference, // reference image
m_matching, // target image
aggr_window_size, // window size - x
aggr_window_size, // window size - y
aggr_gamma_proximity, // gamma_p
aggr_gamma_similarity, // gamma_c
aggr_color_space, // color space
aggr_iter // iteration number (aggregation)
);
iter = aggr_iter;
break;
default:
throw CError("CStereoMatcher::Aggregate(): unknown aggregation function");
}
//2.Simulate the effect of shiftable windows
if (aggr_minfilter > 1) MinFilter(m_cost, m_cost, aggr_minfilter, aggr_minfilter);
//3.Pad the outside costs back up to bad values
PadCosts();
}
1.3.1 MinFilter
{
2 //略
3 }
1.4 Optimize
void Optimize()
{
// Select the best matches using local or global optimization
// set up the smoothness cost function for the methods that need it
if (opt_fn == eDynamicProg || opt_fn == eScanlineOpt || opt_fn == eGraphCut || opt_fn == eSimulAnnl || opt_fn == eBPAccel || opt_fn == eBPSync)
{
if (verbose == eVerboseSummary) fprintf(stderr, ", smooth=%g, grad_thres=%g, penalty=%g", opt_smoothness, opt_grad_thresh, opt_grad_penalty);
SmoothCostAll();
}
switch (opt_fn)
{
case eNoOpt: // no optimization (pass through input depth maps)
if (verbose == eVerboseSummary) fprintf(stderr, ", NO OPT");
break;
case eWTA: // winner-take-all (local minimum)
if (verbose == eVerboseSummary) fprintf(stderr, ", WTA");
OptWTA();
break;
case eGraphCut: // graph-cut global minimization
if (verbose == eVerboseSummary) fprintf(stderr, ", GC");
OptWTA(); // get an initial labelling (or just set to 0???)
OptGraphCut(); // run the optimization
break;
case eDynamicProg: // scanline dynamic programming
if (verbose == eVerboseSummary) fprintf(stderr, ", DP (occl_cost=%d)", opt_occlusion_cost);
OptDP(); // see StcOptDP.cpp
break;
case eScanlineOpt: // scanline optimization
if (verbose == eVerboseSummary) fprintf(stderr, ", SO");
OptSO(); // see StcOptSO.cpp
break;
case eSimulAnnl: // simulated annealing
if (verbose == eVerboseSummary) fprintf(stderr, ", SA");
OptWTA(); // initialize to reasonable starting point (for low-T gradient descent)
OptSimulAnnl(); // see StcSimulAnn.cpp
break;
case eBPAccel:
OptBP(); // run the optimization
break;
case eBPSync:
OptBPSync(); // run the optimization
break;
default:
throw CError("CStereoMatcher::Optimize(): unknown optimization function");
}
if (final_energy < 0.0f)
{
if (!m_cost.Shape().SameIgnoringNBands(m_smooth.Shape()))
SmoothCostAll();
float finalEd, finalEn;
CStereoMatcher::ComputeEnergy(finalEd, finalEn);
final_energy = finalEd + finalEn;
}
}
1.4.1 SmoothCostOne
float SmoothCostOne(uchar *pixel1, uchar *pixel2, int cn)
{
float tmp = 0.0;
for (int k = 0; k < cn; k++)
{
float tm = int(pixel1[k]) - int(pixel2[k]);
tmp += tm*tm;
}
tmp = tmp/(cn - (cn > 1));//归一化为单通道, ppm图像的通道为4
tmp = sqrt(tmp);
return (tmp < opt_grad_thresh) ? (opt_smoothness*opt_grad_penalty) : opt_smoothness;
}
1.4.2 SmoothCostAll
void SmoothCostAll()
{ //calculate smoothness costs for DP and GC
CShape sh = m_cost.m_shape;
sh.nBands = 2;//分为垂直和水平平滑代价
m_smooth.ReAllocate(sh, false);
int rows = sh.height;
int cols = sh.width;
int cn = m_reference.m_shape.nBands;
char *im_data0_cr = m_reference.m_memStart;
char *im_data0_dw = im_data0_cr + m_reference.m_rowSize;
char *smooth_data0 = m_smooth.m_memStart;
for (int i = 0; i < rows; i++, im_data0_cr += m_reference.m_rowSize, im_data0_dw += m_reference.m_rowSize, smooth_data0 += m_smooth.m_rowSize)
{
uchar *im_data1_cr = (uchar*)im_data0_cr;
uchar *im_data1_dw = (uchar*)((i < rows - 1) ? im_data0_dw : im_data0_cr);
float *smooth_data1 = (float*)smooth_data0;
for (int j = 0; j < cols; j++, im_data1_cr += cn, im_data1_dw += cn, smooth_data1 += 2)
{
smooth_data1[0] = (i < rows - 1) ? SmoothCostOne(im_data1_cr, im_data1_dw, cn) : 0;
smooth_data1[1] = (j < cols - 1) ? SmoothCostOne(im_data1_cr, im_data1_cr + cn, cn) : 0;
}
}
}
1.4.3 ComputeEnergy
static void ComputeEnergy(CFloatImage& m_cost, CFloatImage& m_smooth, CIntImage& m_disparity, float& dataEnergy, float& smoothEnergy)
{
int cols = m_cost.m_shape.width;
int rows = m_cost.m_shape.height;
int cn1 = m_cost.m_shape.nBands;
int cn2 = m_smooth.m_shape.nBands;
float sum1 = 0.0f;
float sum2 = 0.0f;
char *disp_data0_cr = m_disparity.m_memStart;
char *disp_data0_dw = disp_data0_cr + m_disparity.m_rowSize;
char *datacost_data0 = m_cost.m_memStart;
char *smoothcost_data0 = m_smooth.m_memStart;
for (int i = 0; i < rows; i++, disp_data0_cr += m_disparity.m_rowSize, disp_data0_dw += m_disparity.m_rowSize, datacost_data0 += m_cost.m_rowSize, smoothcost_data0 += m_smooth.m_rowSize)
{
int *disp_data1_cr = (int*)disp_data0_cr;
int *disp_data1_dw = (int*)((i < rows - 1) ? disp_data0_dw : disp_data0_cr);
float *datacost_data1 = (float*)datacost_data0;
float *smoothcost_data1 = (float*)smoothcost_data0;
for (int j = 0; j < cols; j++, datacost_data1 += cn1, smoothcost_data1 += cn2)
{
int d = disp_data1_cr[j];
sum1 = sum1 + datacost_data1[d];
sum2 = sum2 + ((i < rows - 1 && d != disp_data1_dw[j]) ? smoothcost_data1[0] : 0);//水平平滑代价
sum2 = sum2 + ((j < cols - 1 && d != disp_data1_cr[j + 1]) ? smoothcost_data1[1] : 0);//垂直平滑代价
}
}
dataEnergy = sum1;
smoothEnergy = sum2;
//float GC_scale = (1 << 30) / (256 * 256);
//GC_scale = (1 << 30) / (sum1 + sum2);
}
1.5 Refine
void Refine()
{ //Refine the matching disparity to get a sub-pixel match
if (opt_fn != eNoOpt) ScaleAndOffset(m_disparity, m_float_disparity, disp_step, disp_min);//无优化则跳过
if (refine_subpix == 0 || disp_n < 3) return; //不进行提纯
for (int i = 0; i < m_cost.m_shape.height; i++)
{
float *cost = &m_cost.Pixel(0, i, 0);
int *disp = &m_disparity.Pixel(0, i, 0);
float *fdisp = &m_float_disparity.Pixel(0, i, 0);
for (int j = 0; j < m_cost.m_shape.width; j++, cost += disp_n)
{
//Get minimum, but offset by 1 from ends
int d_min = disp[j] + (disp[j] == 0) - (disp[j] == disp_n - 1);
//Compute the equations of the parabolic fit
float c0 = cost[d_min - 1]; //a*(d-1)^2+b*(d-1)+c=c0
float c1 = cost[d_min]; //a*(d )^2+b*(d )+c=c1
float c2 = cost[d_min + 1]; //a*(d+1)^2+b*(d+1)+c=c2
float a = 0.5 * (c0 - 2.0 * c1 + c2); //解得a=c2-2*c1+c0, 对称轴=-b/2*a=d-(c2-c0)/(4*a)
float b = 0.5 * (c2 - c0);
if (a <= 0.0 || a < 0.5 * fabs(b)) continue;
//Solve for minimum
float x0 = -0.5 * b / a;
float d_new = m_disp_step * (d_min + x0) + disp_min;
fdisp[j] = d_new;
}
}
}
2.代价聚合
2.1 BoxFiter
1 {
2 //与cv::boxFilter一致
3 }
2.2 LASW
void LASW(CFloatImage &srcCost, CFloatImage &dstCost, CByteImage &im0, CByteImage &im1, int xWidth, int yWidth, float proximity, float similarity, int color_space, int diff_iter)
{
int frm_total = im0.m_shape.width*im0.m_shape.height;
int win_radius = (int)(xWidth / 2.0);
int win_total = xWidth*yWidth;
//0.分配所需空间
double **Lab0 = new double *[frm_total];
double **Lab1 = new double *[frm_total];
float **rawCostf = new float *[frm_total];
float **dstCostf = new float *[frm_total];
float **sw0f = new float *[frm_total];
float **sw1f = new float *[frm_total];
for (int i = 0; i < frm_total; i++)
{
Lab0[i] = new double[3];
Lab1[i] = new double[3];
rawCostf[i] = new float[srcCost.m_shape.nBands];
dstCostf[i] = new float[srcCost.m_shape.nBands];
sw0f[i] = new float[win_total];
sw1f[i] = new float[win_total];
}
//1.计算Lab图像并
for (int i = 0, index = 0; i<im0.m_shape.height; i++)
for (int j = 0; j<im0.m_shape.width; j++, index++)
{
double R, G, B;
R = im0.Pixel(j, i, ((im0.m_shape.nBands - 1) == 3) ? 0 : 0);
G = im0.Pixel(j, i, ((im0.m_shape.nBands - 1) == 3) ? 1 : 0);
B = im0.Pixel(j, i, ((im0.m_shape.nBands - 1) == 3) ? 2 : 0);
RGB2Lab(R, G, B, Lab0[index][0], Lab0[index][1], Lab0[index][2]);
R = im1.Pixel(j, i, ((im1.m_shape.nBands - 1) == 3) ? 0 : 0);
G = im1.Pixel(j, i, ((im1.m_shape.nBands - 1) == 3) ? 1 : 0);
B = im1.Pixel(j, i, ((im1.m_shape.nBands - 1) == 3) ? 2 : 0);
RGB2Lab(R, G, B, Lab1[index][0], Lab1[index][1], Lab1[index][2]);
}
//2.取得原始代价
for (int i = 0, index = 0; i<srcCost.m_shape.height; i++)
for (int j = 0; j < srcCost.m_shape.width; j++, index++)
for (int k = 0; k<srcCost.m_shape.nBands; k++)
rawCostf[index][k] = (float)srcCost.Pixel(j, i, k);
//3.计算自适应权重
calcASW(Lab0, sw0f, proximity, similarity, win_radius, im0.m_shape.width, im0.m_shape.height);
calcASW(Lab1, sw1f, proximity, similarity, win_radius, im0.m_shape.width, im0.m_shape.height);
//4.求和自适应权重
for (int u = 0; u<diff_iter; u++)
{
aggrASW(sw0f, sw1f, rawCostf, dstCostf, srcCost.m_shape.nBands, win_radius, im0.m_shape.width, im0.m_shape.height);
for (int k = 0; k<frm_total; k++)
memcpy(rawCostf[k], dstCostf[k], sizeof(float)*srcCost.m_shape.nBands);
}
//5.返回结果
for (int i = 0, index = 0; i<dstCost.m_shape.height; i++)
for (int j = 0; j<dstCost.m_shape.width; j++, index++)
for (int k = 0; k<dstCost.m_shape.nBands; k++)
((float*)dstCost.PixelAddress(j, i, 0))[k] = dstCostf[index][k];
//6.删除分配的空间
for (int i = 0; i < frm_total; i++)
{
delete Lab0[i];
delete Lab1[i];
delete rawCostf[i];
delete dstCostf[i];
delete sw0f[i];
delete sw1f[i];
}
}
2.2.1 RGB2Lab
void RGB2Lab(double &R, double &G, double &B, double &L, double &a, double &b)
{
double X = 0.412453*R + 0.357580*G + 0.189423*B;
double Y = 0.212671*R + 0.715160*G + 0.072169*B;
double Z = 0.019334*R + 0.119193*G + 0.950227*B;
double Xo = 244.66128;
double Yo = 255.0;
double Zo = 277.63227;
double tm1 = X / Xo; tm1 = (tm1 > 0.008856) ? pow(tm1, 0.333333333) : (7.787*tm1 + 0.137931034);
double tm2 = Y / Yo; tm2 = (tm2 > 0.008856) ? pow(tm2, 0.333333333) : (7.787*tm2 + 0.137931034);
double tm3 = Z / Zo; tm3 = (tm3 > 0.008856) ? pow(tm3, 0.333333333) : (7.787*tm3 + 0.137931034);
L = 116 * tm2 - 16;
a = 500 * (tm1 - tm2);
b = 200 * (tm2 - tm3);
}
2.2.2 calcASW
void calcASW(double **Lab, float **SW, double proximity, double similarity, int win_radius, int cols, int rows)
{
int frm_total = cols*rows;
int win_total = (2 * win_radius + 1)*(2 * win_radius + 1);
//0.先清零
for (int i = 0; i<frm_total; i++)
memset(SW[i], 0, sizeof(float)*win_total);
//1.计算自适用权重
for (int i = 0, index = 0; i<rows; i++) //计算index点的领域点(共win_total个)相对index点的自适应权重,
for (int j = 0; j<cols; j++, index++) //每个自适应权重占用SW的一个通道,索引越小的通道对应越左上角的点
for (int y = -win_radius, k = 0; y <= win_radius; y++)//依次从左到右从上到下计算领域点相对于index点的自适应权重, k表示第k个领域点
{
int ii = i + y;
if (ii < 0 || ii >= rows)//此行领域点越界,所以对应的权重都为0
{
for (int x = -win_radius; x <= win_radius; x++, k++)
SW[index][k] = 0;//可用menset加快处理
continue;
}
for (int x = -win_radius; x <= win_radius; x++, k++)
{
if (SW[index][k] > 0) //之前的循环已经计算则无需再计算
continue;
int jj = j + x;
if (jj < 0 || jj >= cols)//此领域点越界,所以对应的权重为0
{
SW[index][k] = 0;
continue;
}
double L1 = Lab[index][0];
double a1 = Lab[index][1];
double b1 = Lab[index][2];
int index1 = ii*cols + jj;//领域点坐标
double L2 = Lab[index1][0];
double a2 = Lab[index1][1];
double b2 = Lab[index1][2];
double weight_prox = exp(-sqrt((double)(y*y + x*x)) / proximity);
double weight_simi = exp(-sqrt((L1 - L2)*(L1 - L2) + (a1 - a2)*(a1 - a2) + (b1 - b2)*(b1 - b2)) / similarity);
SW[index][k] = (float)(weight_prox*weight_simi);
SW[index1][win_total - 1 - k] = SW[index][k];//得到A相对O权重的同时也得到O相对A权重
}
}
}
2.2.3 aggrASW
void aggrASW(float **SW0, float **SW1, float **rawCost, float **dstCost, int cn, int win_radius, int cols, int rows)
{
for (int i = 0, index = 0; i<rows; i++)
for (int j = 0; j<cols; j++, index++)
for (int d = 0; d<cn; d++)//处理第d个通道
{
int index1 = j - d;//右图像上匹配点的坐标
if (index1<0) index1 = index1 + cols;
else if (index1 >= cols) index1 = index1 - cols;
index1 = i*cols + index1;//右图像上匹配点的坐标
double weight_sum = 0;
double cost_sum = 0;
for (int y = -win_radius, k = 0; y <= win_radius; y++)//k表示第k个领域点
{
int ii = i + y;
if (ii<0) ii = ii + rows;
if (ii >= rows) ii = ii - rows;
for (int x = -win_radius; x <= win_radius; x++, k++)
{
int jj = j + x;
if (jj<0) jj = cols + jj;
else if (jj >= cols) jj = jj - cols;
double weight = SW0[index][k] * SW1[index1][k];//权重之积
weight_sum = weight_sum + weight;
int index_k = ii*cols + jj;//index_k表示第k个领域点
cost_sum = cost_sum + rawCost[index_k][d] * weight;
}
}
dstCost[index][d] = (float)(cost_sum / weight_sum);
}
}
3.视差优化
3.1 OptWTA
void CStereoMatcher::OptWTA()
{
CShape sh = m_cost.Shape();
int cols = sh.width;
int rows = sh.height;
for (int i = 0; i < rows; i++)
{
float* cost = &m_cost.Pixel(0, i, 0);
int* disp = &m_disparity.Pixel(0, i, 0);
for (int j = 0; j < cols; j++, cost += disp_n)//m_cost的通道数为disp_n
{
int best_disp = 0;
float best_cost = cost[0];
for (int d = 1; d < disp_n; d++)
if (cost[d] < best_cost)
{
best_cost = cost[d];
best_disp = d;
}
disp[j] = best_disp;
}
}
}
3.2 OptSO
void OptSO()
{ // scanline optimization
int cols = m_cost.m_shape.width;
int rows = m_cost.m_shape.height;
int endcol = cols - 1;
int rowElem = cols*disp_n;
char *datacost_data0 = m_cost.m_memStart;
char *smoothcost_data0 = m_smooth.m_memStart;
char *disparity_data0 = m_disparity.m_memStart;
float *sumcost_data0 = (float*)malloc(rowElem*sizeof(float));//存储每一列的每一视差(通道)的最优结果
int *position_data0 = (int*)malloc(rowElem*sizeof(int));//存储每一列取得最优结果时对应的前一列哪个索引的视差(通道)
for (int i = 0; i < rows; i++, datacost_data0 += m_cost.m_rowSize, smoothcost_data0 += m_smooth.m_rowSize, disparity_data0 += m_disparity.m_rowSize)//对每一行
{
float *datacost_data1 = (float*)datacost_data0;
float *smoothcost_data1 = (float*)smoothcost_data0;
int *position_data1 = position_data0;
float *sumcost_data1 = sumcost_data0;
//1.初始化第一列
for (int d = 0; d < disp_n; d++)
{
position_data1[d] = -1;
sumcost_data1[d] = datacost_data1[d];
}
datacost_data1 += disp_n; position_data1 += disp_n; sumcost_data1 += disp_n;//定位第二列
//2.用动态归划处理后续列
for (int j = 1; j < cols; j++, datacost_data1 += disp_n, position_data1 += disp_n, sumcost_data1 += disp_n, smoothcost_data1 += 2)//对每一列
{
for (int d1 = 0; d1 < disp_n; d1++)//对每一通道(视差)
{
sumcost_data1[d1] = COST_MAX; //当前列当前通道的最小匹配代价
position_data1[d1] = -1; //最小匹配代价对应前一列的哪个通道(视差)
for (int d0 = 0; d0 < disp_n; d0++)//对前一列的每一通道(视差)
{
float tm = datacost_data1[d1]; //当前列当前通道(视差)的原始代价
tm = tm + sumcost_data1[d0 - disp_n];//前一列的每一通道(视差)的最小匹配代价
tm = (d0 != d1) ? (tm + smoothcost_data1[1]) : tm;//两通道(视差)间的平滑代价(第二通道才是水平方向的平滑代价)
if (tm < sumcost_data1[d1])
{
sumcost_data1[d1] = tm;
position_data1[d1] = d0;
}
}
}
}
//3.在尾列查看最优结果(指针来源与前面不相关)
position_data1 -= disp_n;
sumcost_data1 -= disp_n;
float best_cost = COST_MAX;
int best_disp = 0;
for (int d = 0; d < disp_n; d++)
if (sumcost_data1[d] < best_cost)
{
best_cost = sumcost_data1[d];
best_disp = d;
}
//4.回溯(从尾列到首列)
int *disparity_data1 = (int*)disparity_data0;
for (int x = endcol; x >= 0; x--, position_data1 -= disp_n)
{
disparity_data1[x] = best_disp;
best_disp = position_data1[best_disp];
}
}
free(sumcost_data0);
free(position_data0);
}
3.3 OptDP
void OptDP()
{ //dynamic programming stereo (Intille and Bobick, no GCPs)
float ocl = opt_occlusion_cost;
float ocr = opt_occlusion_cost;
int occ = -9999; // marker for occluded pixels (use 0 if you want to leave occluded pixels black)
int cols = m_cost.m_shape.width;
int rows = m_cost.m_shape.height;
int state0[7] = { 0, 0, 1, 1, 0, 2, 2 };//前一点的状态
int state1[7] = { 0, 1, 1, 0, 2, 2, 0 };//当前点的状态
int colElem = disp_n * 3;//每点的基元数=通道数*状态数
int left = -colElem, diag = -colElem - 3, up = 3;
int steps[7] = { left, left, diag, diag, up, up, left };//不同状态时最优的前一点的位置与当前点的跨度
int dleft = -disp_n, ddiag = -disp_n - 1, dup = 1;
int disp_step[7] = { dleft, dleft, ddiag, ddiag, dup, dup, dleft };//不同状态时视差的跨度
int border0[7] = { 0, 0, 1, 1, 0, 0, 0 }; //视差为0时没有左下角的前一点
int border1[7] = { 0, 0, 0, 0, 1, 1, 0 }; //视差为max没有同列的上一点
int rowElem = cols * colElem;
char *datacost_data0 = m_cost.m_memStart;
char *smoothcost_data0 = m_smooth.m_memStart;
char *disparity_data0 = m_disparity.m_memStart + (cols - 1) * m_disparity.m_pixSize;//视差是从最后列开始计算的
int *position_data0 = (int*)malloc(rowElem*sizeof(int));//存储每一列取得最优结果时对应的前一列哪个索引的视差(通道)
float *sumcost_data0 = (float*)malloc(rowElem*sizeof(float));//存储每一列的每一视差(通道)的最优结果
int *position_data1_endlcol = position_data0 + (cols - 1)*colElem;
float *sumcost_data1_endcol = sumcost_data0 + (cols - 1)*colElem;
for (int i = 0; i < rows; i++, datacost_data0 += m_cost.m_rowSize, smoothcost_data0 += m_smooth.m_rowSize, disparity_data0 += m_disparity.m_rowSize)
{
float *datacost_data1 = (float*)datacost_data0;
float *smoothcost_data1 = (float*)smoothcost_data0;
int *position_data1 = (int*)position_data0;
float *sumcost_data1 = (float*)sumcost_data0;
//1.初始化第一列(每列有disp_n个通道(视差)而每个视差又有3个状态)
{
float *datacost_data2 = datacost_data1;
int *position_data2 = position_data1;
float *sumcost_data2 = sumcost_data1;
for (int d = 0; d < disp_n; d++, datacost_data2++, position_data2 += 3, sumcost_data2 += 3)
{ //强制第一个点是非遮挡的
position_data2[0] = 0;
position_data2[1] = -1;
position_data2[2] = -1;
sumcost_data2[0] = datacost_data2[0];
sumcost_data2[1] = COST_MAX;
sumcost_data2[2] = COST_MAX;
}
datacost_data1 += disp_n; position_data1 += colElem; sumcost_data1 += colElem;//定位到第二列
}
//2.用动态归划处理后续列
for (int j = 1; j < cols; j++, datacost_data1 += disp_n, smoothcost_data1 += 2, position_data1 += colElem, sumcost_data1 += colElem)//对每一列
{
float *datacost_data2 = datacost_data1 + disp_n - 1;//先定位到第二列的最后一个通道,因为要从最后个通道开始处理
float *smoothcost_data2 = smoothcost_data1;//平滑代价只与列相关而与通道无关
int *position_data2 = position_data1 + colElem - 3;//先定位到第二列的最后一个通道,因为要从最后个通道开始处理
float *sumcost_data2 = sumcost_data1 + colElem - 3;//从最后个通道开始处理是因为m→R和r→R时处理当前通道时要用到下一通道的数据
for (int d1 = disp_n - 1; d1 >= 0; d1--, datacost_data2--, position_data2 -= 3, sumcost_data2 -= 3) //对每一通道(视差)
{
sumcost_data2[0] = COST_MAX;//当前列当前通道第0状态的最小匹配代价
sumcost_data2[1] = COST_MAX;//当前列当前通道第1状态的最小匹配代价
sumcost_data2[2] = COST_MAX;//当前列当前通道第2状态的最小匹配代价
position_data2[0] = -1; //第0状态最小匹配代价对应前一列的哪个通道(视差)
position_data2[1] = -1; //第1状态最小匹配代价对应前一列的哪个通道(视差)
position_data2[2] = -1; //第2状态最小匹配代价对应前一列的哪个通道(视差)
for (int t = 0; t < 7; t++)
{
if ((d1 == 0 && border0[t]) || (d1 == disp_n - 1 && border1[t])) continue;//前一点不存在
int pre_state = state0[t];
int cur_state = state1[t];
int pre_pos = steps[t] + pre_state;
float tm = (cur_state == 1 ? ocl : (cur_state == 2 ? ocr : datacost_data2[0]));//当前列当前通道(视差)的原始代价
tm = tm + sumcost_data2[pre_pos];//前一列的每一通道(视差)的每一状态的最小匹配代价
tm = (t == 3 || t == 6) ? (tm + smoothcost_data2[1]) : tm;//平滑代价(从遮挡到匹配时)//第二通道才是水平方向的平滑代价
if (tm < sumcost_data2[cur_state])
{
sumcost_data2[cur_state] = tm;
position_data2[cur_state] = t;
}
}
}
}
//3.在尾列查看最优结果(指针来源与前面不相关)
float best_cost = COST_MAX;
int best_disp = 0;
int best_state = 0;//只考虑左右图像都可见的状态
{
float *sumcost_data2 = sumcost_data1_endcol;//因为在遍历通道所以用data2
for (int d = 0; d < disp_n; d++, sumcost_data2 += 3)
if (sumcost_data2[best_state] < best_cost)
{
best_cost = sumcost_data2[best_state];
best_disp = d;
}
}
//4.回溯(从尾列到首列)(指针来源与前面不相关)
position_data1 = position_data1_endlcol + best_disp * 3 + best_state;//因为在遍历列所以用data1
int *disparity_data1 = (int*)disparity_data0;
while (position_data1 >= position_data0)
{
int pos = *position_data1;
int current_state = state1[pos];
int prev_state = state0[pos];
*disparity_data1 = (current_state == 0) ? best_disp : occ;
int stride = steps[pos] - current_state + prev_state;
position_data1 += stride;
best_disp += disp_step[pos];
if (best_disp < 0)
{
best_disp += disp_n;
disparity_data1--;
}
}
}
free(sumcost_data0);
free(position_data0);
//填充遮挡点(可单独写成函数)
if (occ != 0)
{
char *disp_data0 = m_disparity.m_memStart;
for (int i = 0; i < rows; i++, disp_data0 += m_disparity.m_rowSize)
{
int *disp_data1 = (int*)disp_data0;
//找到第一个非遮掩点
int nonocc;
for (int j = 0; j < cols; j++)
if (disp_data1[j] != occ)
{
nonocc = disp_data1[j];
break;
}
//除最左边的遮挡点外用与之右相邻的非遮挡点填充外, 其余遮挡点都用与之左相邻的非遮挡点填充
for (int j = 0; j < cols; j++)
{
int d = disp_data1[j];
if (d == occ)
disp_data1[j] = nonocc;
else
nonocc = d;
}
}
}
}
8.杂项函数
8.1 BirchfieldTomasiMinMax
void BirchfieldTomasiMinMax(int* buffer, int* min, int* max, int cols, int cn)
{
int cur, pre, nex;
//第一个值
cur = buffer[0];
pre = (buffer[0] + buffer[0] + 1) / 2;
nex = (buffer[0] + buffer[1] + 1) / 2;
min[0] = __min(cur, __min(pre, nex));
max[0] = __max(cur, __max(pre, nex));
//中间的值
for (int i = 1; i < cols - 1; i++)
{
cur = buffer[i];
pre = (buffer[i] + buffer[i - 1] + 1) / 2;
nex = (buffer[i] + buffer[i + 1] + 1) / 2;
min[i] = __min(cur, __min(pre, nex));
max[i] = __max(cur, __max(pre, nex));
}
//最后个值
cur = buffer[cols - 1];
pre = (buffer[cols - 2] + buffer[cols - 1] + 1) / 2;
nex = (buffer[cols - 1] + buffer[cols - 1] + 1) / 2;
min[cols - 1] = __min(cur, __min(pre, nex));
max[cols - 1] = __max(cur, __max(pre, nex));
}
9. Image.h添加
(1)将所有private及protected成员变成public
(2)添加如下代码:
#include <opencv2/opencv.hpp>
using namespace cv;//将所有权限改为public
template <class T> Mat ImgToMat(CImageOf<T> *src)
{
Mat dst;
const char *depth = src->m_pTI->name();
if (strcmp(depth, "unsigned char") == 0)
{
dst = Mat(src->m_shape.height, src->m_shape.width, CV_8UC(src->m_shape.nBands));
for (int k = 0; k < src->m_shape.nBands; k++)
for (int i = 0; i < src->m_shape.height; i++)
for (int j = 0; j < src->m_shape.width; j++)
*((unsigned char*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((unsigned char*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
}
if (strcmp(depth, "char") == 0)
{
dst = Mat(src->m_shape.height, src->m_shape.width, CV_8SC(src->m_shape.nBands));
for (int k = 0; k < src->m_shape.nBands; k++)
for (int i = 0; i < src->m_shape.height; i++)
for (int j = 0; j < src->m_shape.width; j++)
*((char*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((char*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
}
if (strcmp(depth, "unsigned short") == 0)
{
dst = Mat(src->m_shape.height, src->m_shape.width, CV_16UC(src->m_shape.nBands));
for (int k = 0; k < src->m_shape.nBands; k++)
for (int i = 0; i < src->m_shape.height; i++)
for (int j = 0; j < src->m_shape.width; j++)
*((unsigned short*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((unsigned short*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
}
if (strcmp(depth, "short") == 0)
{
dst = Mat(src->m_shape.height, src->m_shape.width, CV_16SC(src->m_shape.nBands));
for (int k = 0; k < src->m_shape.nBands; k++)
for (int i = 0; i < src->m_shape.height; i++)
for (int j = 0; j < src->m_shape.width; j++)
*((short*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((short*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
}
if (strcmp(depth, "float") == 0)
{
dst = Mat(src->m_shape.height, src->m_shape.width, CV_32FC(src->m_shape.nBands));
for (int k = 0; k < src->m_shape.nBands; k++)
for (int i = 0; i < src->m_shape.height; i++)
for (int j = 0; j < src->m_shape.width; j++)
*((float*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((float*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
}
if (strcmp(depth, "int") == 0)
{
dst = Mat(src->m_shape.height, src->m_shape.width, CV_32SC(src->m_shape.nBands));
for (int k = 0; k < src->m_shape.nBands; k++)
for (int i = 0; i < src->m_shape.height; i++)
for (int j = 0; j < src->m_shape.width; j++)
*((int*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((int*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
}
if (strcmp(depth, "double") == 0)
{
dst = Mat(src->m_shape.height, src->m_shape.width, CV_64FC(src->m_shape.nBands));
for (int k = 0; k < src->m_shape.nBands; k++)
for (int i = 0; i < src->m_shape.height; i++)
for (int j = 0; j < src->m_shape.width; j++)
*((double*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((double*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
}
return dst;
}
template <class T> CImageOf<T> MatToImg(Mat* src)
{
CImageOf<T> dst;
CShape shape(src->cols, src->rows, src->channels());
dst.ReAllocate(shape);
const char *depth = dst.m_pTI->name();
if (strcmp(depth, "unsigned char") == 0)
{
for (int k = 0; k < dst.m_shape.nBands; k++)
for (int i = 0; i < dst.m_shape.height; i++)
for (int j = 0; j < dst.m_shape.width; j++)
*((unsigned char*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((unsigned char*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
}
if (strcmp(depth, "char") == 0)
{
for (int k = 0; k < dst.m_shape.nBands; k++)
for (int i = 0; i < dst.m_shape.height; i++)
for (int j = 0; j < dst.m_shape.width; j++)
*((char*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((char*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
}
if (strcmp(depth, "unsigned short") == 0)
{
for (int k = 0; k < dst.m_shape.nBands; k++)
for (int i = 0; i < dst.m_shape.height; i++)
for (int j = 0; j < dst.m_shape.width; j++)
*((unsigned short*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((unsigned short*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
}
if (strcmp(depth, "short") == 0)
{
for (int k = 0; k < dst.m_shape.nBands; k++)
for (int i = 0; i < dst.m_shape.height; i++)
for (int j = 0; j < dst.m_shape.width; j++)
*((short*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((short*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
}
if (strcmp(depth, "float") == 0)
{
for (int k = 0; k < dst.m_shape.nBands; k++)
for (int i = 0; i < dst.m_shape.height; i++)
for (int j = 0; j < dst.m_shape.width; j++)
*((float*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((float*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
}
if (strcmp(depth, "int") == 0)
{
for (int k = 0; k < dst.m_shape.nBands; k++)
for (int i = 0; i < dst.m_shape.height; i++)
for (int j = 0; j < dst.m_shape.width; j++)
*((int*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((int*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
}
if (strcmp(depth, "double") == 0)
{
for (int k = 0; k < dst.m_shape.nBands; k++)
for (int i = 0; i < dst.m_shape.height; i++)
for (int j = 0; j < dst.m_shape.width; j++)
*((double*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((double*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
}
return dst;
}
template <class T> void saveXML(string name, CImageOf<T>* src)
{
Mat dst = ImgToMat<T>(src);
FileStorage fs;
fs.open("./../TestData/" + name, FileStorage::WRITE);
fs << "mat" << dst;
fs.release();
}
template <class T> void saveXML(string name, CImageOf<T>* src, int count)
{
vector<Mat> dst;
for (int i = 0; i<count; i++)
dst.push_back(ImgToMat<T>(&src[i]));
FileStorage fs;
fs.open("./../TestData/" + name, FileStorage::WRITE);
fs << "vectorMat" << dst;
fs.release();
}