Sift尺度不变特征变换（Scale-invariant feature transform）图像特征

Halcom · 发表于 2017-8-24 23:58:39

Sift图像特征
代码下载：http://pan.baidu.com/s/1nu6nBuD

视频链接：
【1】http://pan.baidu.com/s/1o79AkWE
【2】http://pan.baidu.com/s/1eSNDR70
录制的视频是算法底层原理讲解，底层代码实现，方便大家真正掌握算法实质，开发出更加出色的算法。录制视频的初衷是：避免读者朋友利用大把时间学习已有常见算法，本系列视频旨在让读者朋友快速深入了解这些常见算法原理，有更多的时间去研究更加高大上算法（价值）。

具体链接在halcom.cn论坛，联系人QQ：3283892722
该论坛是一个学习交流平台，我会逐一的和大家分享学习。
欢迎大家录制视频，并提交给我，我来设置视频，你可在论坛进行打赏分享。
视频专用播放器：http://halcom.cn/forum.php?mod=viewthread&tid=258&extra=page%3D1

运行环境：win7+32bit+matlab2014a

Step1：构建金字塔模型-对于各组图像而言，不同层采用不同高斯核函数进行滤波模糊化，各组所用sigma相同，具体看SigmaIJ
% 层数为scales+3

function [PytImages, SigmaIJ] = Pyt_Scale( image1, octaves, scales, initialBlurSigma, kernelSize )
if size(image1,3)>1
image1 = rgb2gray(image1);
end
image1 = im2double(image1); % double(image1)/double(255.0);
initialDoubleSizeImage = imresize(image1, 2, 'bilinear'); % 放大2倍
% octaves = 4; % 4组
% scales = 3; % 每组的层数
totScales = scales + 3; % 总层数
PytImages = cell( octaves, 1 ); % 初始化金字塔图
SigmaIJ = zeros(octaves, totScales); % 初始化
% initialBlurSigma = 0.5; % 高斯方差
% kernelSize = 15; % 基宽
initialSigma = sqrt(2);
for i=1:octaves
currentSigma = initialBlurSigma;
for j=1:totScales
k = (2^(j/scales));
bluredImage = gaussianBlur(initialDoubleSizeImage,currentSigma,kernelSize);
PytImages{i}(:, :, :, j) = bluredImage;
currentSigma = initialSigma*k;
SigmaIJ(i,j) = currentSigma;
end
initialDoubleSizeImage = DownSampling( PytImages{i}(:,:,:,totScales-3) ); % 下采样
end

复制代码

Step2：高斯差分模型，两个高斯模型的差
% 层数为scales+2

function PytImagesDOG = Pyt_DOG( PytImages )
% 高斯差分金字塔
% http://halcom.cn/forum.php?mod=viewthread&tid=855&extra=page%3D1
PytImagesDOG = cell( size(PytImages,1), 1 );
for i=1:size(PytImages,1) % 组数
PytImage = PytImages{i};
for j=2:size( PytImage, 4 )
PytImagesDOG{i}(:,:,:,j-1) = PytImage(:,:,:,j) - PytImage(:,:,:,j-1);
end
end

复制代码

Step3：计算关键节点
% 层数为scales
（1）计算极大值点和极小值点，判断为keyPoints
（2）排除contrastLimit点（可理解为光溜溜的背景噪点）；
（3）排除边缘效应节点（根据二阶差分简化计算得到）；

function [keypointsMap, count] = Key_Points( PytImagesDOG )
contrastLimit = 0.03;
isKeypoint = false;
count = 0;
keypointsMap = cell(size(PytImagesDOG,1), size(PytImagesDOG{1},4)-2); % 层数为scales
for i=1:size(PytImagesDOG,1) % 组数
for j=2:size(PytImagesDOG{1},4)-1 % 尺度层数
image = PytImagesDOG{i}(:,:,:,j); % 高斯滤波模糊图像
keypointsMap{i, j-1} = zeros( size(image,1), size(image,2) );
for row = 2:size(image,1)-1
for column = 2:size(image,2)-1
isKeypoint = false;
% 极大值点比较
if PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column-1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column-1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column-1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column+1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column+1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column+1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column-1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column-1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column-1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column+1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column+1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column+1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column-1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column-1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column-1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row+1,column+1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column+1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row-1,column+1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) > PytImagesDOG{i}(row,column,1,j+1)
if(keypointsMap{i,j-1}(row,column) == 0)
keypointsMap{i,j-1}(row,column) = 1; % 关键特征点
count = count + 1;
isKeypoint = true;
end
end
% 极小值点
if PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column-1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column-1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column-1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column+1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column+1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column+1,1,j) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column-1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column-1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column-1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column+1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column+1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column+1,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column,1,j-1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column-1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column-1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column-1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row+1,column+1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column+1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row-1,column+1,1,j+1) && ...
PytImagesDOG{i}(row,column,1,j) < PytImagesDOG{i}(row,column,1,j+1)
if(keypointsMap{i,j-1}(row,column) == 0)
keypointsMap{i,j-1}(row,column) = 1; % 关键特征点
count = count + 1;
isKeypoint = true;
end
end
if isKeypoint==true
if(abs(PytImagesDOG{i}(row,column,1,j))<contrastLimit)
keypointsMap{i,j-1}(row,column) = 0; % 不是关键特征点
isKeypoint = false;
count = count - 1;
end
end
% 排除边缘效应点
if isKeypoint==true
% 差分
DerivativeYY = (PytImagesDOG{i}(row-1,column,1,j) + ...
PytImagesDOG{i}(row+1, column, 1, j) - ...
2.0*PytImagesDOG{i}(row, column, 1, j));
DerivativeXX = (PytImagesDOG{i}(row,column-1,1,j) + ...
PytImagesDOG{i}(row, column+1, 1, j) - ...
2.0*PytImagesDOG{i}(row, column, 1, j));
DerivativeXY = (PytImagesDOG{i}(row-1,column-1,1,j) + ...
PytImagesDOG{i}(row+1,column+1,1,j) - ...
PytImagesDOG{i}(row+1, column-1, 1, j) - ...
PytImagesDOG{i}(row-1, column+1, 1, j))/4;
trTerm = DerivativeXX + DerivativeYY;
DeterminantH = DerivativeXX * DerivativeYY - DerivativeXY*DerivativeXY;
ratio = (trTerm*trTerm)/DeterminantH;
threshold = ((5+1)^2)/5;
if(ratio>=threshold || DeterminantH<0)
keypointsMap{i,j-1}(row,column) = 0; % 不是关键特征点
isKeypoint = false;
count = count -1;
end
end
end
end
end
end

复制代码

Step4：关键点定位---相位信息
（1）求解每一层金字塔图像的幅值和相位；
（2）提取每一层金字塔图像的关键特征点，对每一个关键特征点进行15x15大小的掩膜计算；
（3）对15x15的掩膜，计算HoG统计直方图（可等价为HoG理解）；
（4）求解直方图的极值点，以及对应的角度值；

function [orientationDescriptor, MagPhase] = Phase_Hist(PytImages, keypointDescriptor, SigmaIJ)
% 关键节点特征求解（相位角）
MagPhase = cell( size( keypointDescriptor,1 ), size( keypointDescriptor,2 ), 2 );
for i=1:size( keypointDescriptor,1 ) % 组数
for j=1:size( keypointDescriptor,2 ) % 组内层数
% X方向滤波
filterDiffX = [0 0 0; -1 0 1; 0 0 0];
diffXMat = imfilter(PytImages{i}(:,:,1,j), filterDiffX);
% Y方向滤波
filterDiffY = [0 1 0; 0 0 0; 0 -1 0];
diffYMat = imfilter(PytImages{i}(:,:,1,j), filterDiffY);
% 幅值
magnMat = sqrt(diffXMat.*diffXMat + diffYMat.*diffYMat);
% 相位
orientMat = atan2(diffYMat, diffXMat);
%store magnitude and orientation, so they can be used later on
MagPhase{i}{j}{1} = magnMat;
MagPhase{i}{j}{2} = orientMat;
end
end
orientationDescriptor = cell(size(keypointDescriptor, 1),size(keypointDescriptor,2));
hist = zeros(36,1);
cant = 0;
for i=1:size( keypointDescriptor,1 ) % 组数
for j=1:size( keypointDescriptor,2 ) % 组内层数
[rowKpt, colKpt] = find(keypointDescriptor{i, j} == 1); % 找出全部的关键特征点
% 高斯核函数
accumSigma = SigmaIJ(i, j)*1.5;
weightKernel = fspecial('gaussian',[round(accumSigma*6-1), round(accumSigma*6-1)], accumSigma);
knlHeight = size(weightKernel,1);
knlWidth = size(weightKernel,2);
% % 图像宽、高
% winHeight = size(MagPhase{i}{j}{1}, 1);
% winWidth = size(MagPhase{i}{j}{1}, 2);
for keypoint = 1: length( rowKpt )
xfrom = round(colKpt(keypoint)-knlWidth/2);
xto = round(colKpt(keypoint)+knlWidth/2-1);
yfrom = round(rowKpt(keypoint)-knlHeight/2);
yto = round(rowKpt(keypoint)+knlHeight/2-1);
truncXKnlLeft = 0;
truncXKnlRight = 0;
truncYKnlTop = 0;
truncYKnlBottom = 0;
% 可能此处需要对truncXKnlLeft、truncXKnlRight、truncYKnlTop、truncYKnlBottom、
% xfrom、xto、yfrom、yto进行判断，防止越界
weightKernelEval = weightKernel((1+truncYKnlTop):(size(weightKernel,1)-truncYKnlBottom), ...
(1+truncXKnlLeft):(size(weightKernel,2)-truncXKnlRight));
weightKernelEval = weightKernelEval./max(max( weightKernelEval ));
cant=cant+1;
magnitudes = MagPhase{i}{j}{1}(yfrom:yto,xfrom:xto);
magnitudes = weightKernelEval.*magnitudes; % 幅值给出权重，联合DPM可变部件算法进行理解
orientations = MagPhase{i}{j}{2}(yfrom:yto,xfrom:xto);
orientations = (orientations.*180)./pi; % 转为角度
% 角度区间统计计算（可对比HOG算法进行理解）
for bucket=1:36
bucketRangeFrom = (bucket-19)*10;
bucketRangeTo = (bucket-18)*10;
[rowOr, colOr] = find(orientations<bucketRangeTo & orientations>=bucketRangeFrom);
for rc = 1:length( rowOr )
hist(bucket) = hist(bucket) + magnitudes( rowOr(rc), colOr(rc) );
end
end
% 基于hist，找出hist里面最大的N个值所对应的下标
% 关键点的精确定位，插值算法
posMaxHist = find(hist==max(hist));
posOtherHist = find(hist>(max(hist)-max(hist)*0.2)&hist~=hist(posMaxHist(1)));
posAllHist = zeros(1,1);
if(size(posOtherHist,1)>0)
posAllHist = cat(2,posMaxHist,posOtherHist.'); % 合并操作
% posAllHist = 【posAllHist， posOtherHist】 % 等价操作
else
posAllHist = posMaxHist;
end
interpolatedOrientations = zeros(size(posAllHist,1),1);
for currentBestHist = 1:size(posAllHist, 2)
posHist = posAllHist(currentBestHist);
x1 = posHist-1;
x2 = posHist;
x3 = posHist+1;
y1 = 0;
y2 = hist(x2);
y3 = 0;
%in order not to lose the topology
if(x1<1)
y1 = hist(36);
else
y1 = hist(x1);
end
if(x3>36)
y3 = hist(1);
else
y3 = hist(x3);
end
valsX = [x1-0.5 x2-0.5 x3-0.5];
% valsX = [x1 x2 x3];
valsY = [y1 y2 y3];
pars = polyfit(valsX,valsY,2); % 二次抛物线插值
%result of derivative = 0 to see where is the parabolic maxima
xMax = (pars(2)*(-1))/(2*pars(1));
if(xMax<0)
xMax = 36+xMax;
end
if(xMax>36)
xMax = xMax-36;
end
% convert to degrees, Hist是10度一个区间
xMax = xMax * 10;
interpolatedOrientations(currentBestHist) = xMax;
end
% creates the structure with the data
histDescriptor = struct('octave', i, ...
'layer', j, ...
'position',[rowKpt(keypoint) colKpt(keypoint)], ...
'histogram', hist, ...
'bestHist', posAllHist.', ...
'interpOrien', interpolatedOrientations.', ...
'theBestHist', posMaxHist);
orientationDescriptor{i}{j}(rowKpt(keypoint),colKpt(keypoint)) = histDescriptor;
end
end
end

复制代码

Step5：关键点特征
（1）获取关键特征点、金字塔图像幅值、相位图等信息；
（2）将金字塔图像关键特征点旋转变换，在此利用了旋转矩阵，旋转角为Step4中的特征直方图最大极值对应的角度值，具体看代码注释；
（3）以变换后的关键特征点，进行Block块分析，主要为（幅值图像）权值叠加过程（可结合DPM算法进行理解）；
（4）对关键点特征进行归一化处理，截断归一化处理；

function kptDescriptors = siftDescriptors( keypointDescriptor, keypointsCount, orientationDescriptor, MagPhase)
% 特征求解
BlockSize = 16; % 特征窗口大小
% keypointDescriptor = keypointsMap;
kptDescriptors = repmat(struct('octave',0,'kptLayer',0,'kptDescriptor',zeros(4,4,8), ...
'kptX',0,'kptY',0),keypointsCount,1);
cont = 0;
for i=1:size( keypointDescriptor,1 ) % 组数
for j=1:size( keypointDescriptor,2 ) % 组内层数
[rowKpt, colKpt] = find(keypointDescriptor{i, j} == 1); % 找出全部的关键特征点
if(size(rowKpt,1)==0)
continue;
end
% 取出关键特征点、图像幅值和相位角
keypointData = orientationDescriptor{i}{j};
magnitudes = MagPhase{i}{j}{1};
orientations = MagPhase{i}{j}{2};
for keypoint = 1: length( rowKpt )
keypointDetail = keypointData(rowKpt(keypoint),colKpt(keypoint));
for orient = 1:size(keypointDetail.bestHist,1)
kptDescriptor = zeros(128,1); % 初始化
cont = cont+1;
% 旋转操作，以关键节点为圆心
row = rowKpt(keypoint);
col = colKpt(keypoint);
degrees = keypointDetail.interpOrien(orient);
rotAngle = 360 - degrees; % 旋转角
v=[row, col]'; % 关键节点坐标
c=[size(magnitudes,1)/2, size(magnitudes,2)/2]' ; % 旋转中心
% 旋转图像
rotMagnitudes= imrotate(magnitudes,rotAngle);
rotOrientations= imrotate(orientations,rotAngle);
% 旋转矩阵
RM=[cosd(rotAngle) -sind(rotAngle)
sind(rotAngle) cosd(rotAngle)];
temp_v = RM*(v-c); % 向量旋转
rot_v = temp_v+c; % 迭加到中心点坐标上，旋转后的新的原图（尺寸不变的理论原图）关键坐标点
% 旋转后，rotMagnitudes图像尺寸变化
difmat = [(size(rotMagnitudes,1) - size(magnitudes,1))/2, (size(rotMagnitudes,2) - size(magnitudes,2))/2]';
rot_v2 = rot_v + difmat; % 拉伸后的坐标
% 新的关键点坐标
rotRow = rot_v2(1);
rotCol = rot_v2(2);
gaussWeight = getGaussWeights(BlockSize, BlockSize/2); % 高斯核函数
% 以新的关键点坐标为圆心，构建新的Block块，计算特征（可结合HOG算法进行理解）
for x = 0:BlockSize-1
for y = 0:BlockSize-1
subregAxisX = floor(x/4);
subregAxisY = floor(y/4);
yCoord = rotRow + y - BlockSize/2;
xCoord = rotCol + x - BlockSize/2;
yCoord = round(yCoord);
xCoord = round(xCoord);
% get the magnitude
if(yCoord>0&&xCoord>0 && yCoord<=size(rotMagnitudes,1) && xCoord<=size(rotMagnitudes,2))
magn = rotMagnitudes(yCoord,xCoord);
magn = magn*gaussWeight(y+1,x+1);
orientation = rotOrientations(yCoord,xCoord);
orientation = orientation + pi;
bucket = (orientation)*(180/pi);
bucket = ceil(bucket/45);
kptDescriptor((subregAxisY*4+subregAxisX)*8 + bucket) = ...
kptDescriptor((subregAxisY*4+subregAxisX)*8 + bucket) + magn;
end
end
end
% 归一化操作
sqKptDescriptor = kptDescriptor.^2;
kptDescriptor = kptDescriptor./sqrt( sum(sqKptDescriptor) );
% 截断阈值分析
kptDescriptor(find(kptDescriptor>0.2))=0.2;
% 归一化操作
sqKptDescriptor = kptDescriptor.^2;
kptDescriptor = kptDescriptor./sqrt( sum(sqKptDescriptor) );
kptDescriptors(cont) = struct('octave',i,'kptLayer',j, ...
'kptDescriptor',kptDescriptor, ...
'kptX',colKpt(keypoint),'kptY',rowKpt(keypoint));
end
end
end
end

复制代码

参考：http://www.360doc.com/content/14/1013/15/18306241_416576994.shtml

		自动登录	找回密码
密码			立即注册

Sift尺度不变特征变换（Scale-invariant feature transform）图像特征

本帖子中包含更多资源