Polyora: polyora/patchtagger.cpp Source File

00001 /*  This file is part of Polyora, a multi-target tracking library.
00002     Copyright (C) 2010 Julien Pilet
00003 
00004     This program is free software; you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation; either version 2 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License along
00015     with this program. If not, see <http://www.gnu.org/licenses/>.
00016 
00017     To contact the author of this program, please send an e-mail to:
00018     julien.pilet(at)calodox.org
00019 */
00020 /* Julien Pilet, 2009. */
00021 #include <limits>
00022 #include "patchtagger.h"
00023 #include <highgui.h>
00024 #include <iostream>
00025 #include <math.h>
00026 #include "kmeantree.h"
00027 
00028 #ifndef M_2PI
00029 #define M_2PI 6.283185307179586476925286766559f
00030 #endif
00031 #ifndef M_PI
00032 #define M_PI 3.141592653589793238462643383279f
00033 #endif
00034 #ifdef WIN32
00035 static inline double drand48() {
00036         return (double)rand()/(double)RAND_MAX;
00037 }
00038 // win32 compatibility
00039 #ifdef max
00040 #undef max
00041 #endif
00042 #ifdef min
00043 #undef min
00044 #endif
00045 #endif
00046 
00047 float patch_tagger::tot_weight=0;
00048 float patch_tagger::_weight[patch_tagger::patch_size][patch_tagger::patch_size];
00049 
00050 patch_tagger::descriptor::descriptor()
00051 {
00052         total=0;
00053         cvInitMatHeader(&rotated, patch_size,patch_size, CV_32FC1, _rotated);
00054 }
00055 
00056 patch_tagger::descriptor::descriptor(const patch_tagger::descriptor &a) 
00057 {
00058         *this = a;
00059 }
00060 
00061 const patch_tagger::descriptor &patch_tagger::descriptor::operator=(const patch_tagger::descriptor &a)
00062 {
00063         memcpy(this, &a, sizeof(patch_tagger::descriptor));
00064         cvInitMatHeader(&rotated, patch_size,patch_size, CV_32FC1, _rotated);
00065         return *this;
00066 }
00067 
00068 float patch_tagger::descriptor::correl(const patch_tagger::descriptor &a)
00069 {
00070         const float *pa = &_rotated[0][0];
00071         const float *pb = &a._rotated[0][0];
00072         float r=0;
00073         float norma = 0;
00074         float normb = 0;
00075         for (unsigned n = patch_size*patch_size; n; --n) {
00076                 float va = (*pa++ - .5f);
00077                 float vb = (*pb++ - .5f);
00078 
00079                 norma += va*va;
00080                 normb += vb*vb;
00081                 r += va*vb;
00082         }
00083         //return r / (tot_weight * tot_weight);
00084         //std::cout << "norma: " << norma << ", normb: " << normb << ", totw: " << tot_weight << ", r: " << r << std::endl;
00085         return r / (sqrt(norma)*sqrt(normb));
00086 }
00087 
00088 static unsigned rand_range(unsigned max) {
00089 
00090         return (unsigned)(drand48()*max);
00091 }
00092 
00093 patch_tagger *patch_tagger::singleton() {
00094         static patch_tagger *t=0;
00095         if (!t) {
00096                 t = new patch_tagger();
00097                 t->precalc();
00098         }
00099         return t;
00100 }
00101 
00102 void patch_tagger::precalc() {
00103         for (int a=-255; a<256; a++)
00104                 for (int b=-255;b<256;b++) {
00105                         double angle = atan2((double)b,(double)a)/(2*M_PI);
00106                         if (angle<0) angle+=1;
00107                         if (angle>=1) angle-=1;
00108                         unsigned l = (unsigned)floor(256*sqrt((double)(a*a+b*b)));
00109                         float obinf = nb_orient * angle +.5;
00110                         unsigned o1 = (unsigned)floor(obinf);
00111                         float r = obinf - o1;
00112                         unsigned o2;
00113                         unsigned l1,l2;
00114                         if (r>0.5f) {
00115                                 o2 = (o1+1) % nb_orient;
00116                                 r -= .5f;
00117                                 l1 = (unsigned)(l * (1.0f-r));
00118                                 l2 = (unsigned)(l * r);
00119                         } else {
00120                                 o2 = (o1-1+nb_orient) % nb_orient;
00121                                 l1 = (unsigned)(l * (r+.5));
00122                                 l2 = (unsigned)(l * (.5f-r));
00123                         }
00124                         //if (l1>255) std::cerr << l1 << " should be <255!\n";
00125                         //if (l2>255) std::cerr << l2 << " should be <255!\n";
00126 
00127                         cart2polar_table[255+b][255+a].dir1 = o1;
00128                         cart2polar_table[255+b][255+a].length1 = l1;
00129                         cart2polar_table[255+b][255+a].dir2 = o2;
00130                         cart2polar_table[255+b][255+a].length2 = l2;
00131                 }
00132 
00133         float c= patch_size/2.0f -1 ;
00134 
00135         cvInitMatHeader(&weight, patch_size, patch_size, CV_32FC1, _weight);
00136         cvInitMatHeader(&mask, patch_size, patch_size, CV_8UC1, _mask);
00137 
00138         unsigned char patch_im[patch_size][patch_size][3];
00139 
00140         tot_weight = 0;
00141         for (unsigned y=0; y<patch_size; y++) {
00142 
00143                 for (unsigned x=0; x<patch_size; x++) {
00144                         float dx = x-c;
00145                         float dy = y-c;
00146                         
00147                         float dc2 = 4*(dx*dx+dy*dy)/(patch_size*patch_size);
00148                         float wdc = exp(-dc2*dc2*1.8);
00149                         _weight[y][x] = wdc;
00150                         tot_weight += wdc;
00151                         _mask[y][x] = (wdc>.8 ? 0xff:0);
00152 
00153                         float angle = nb_zone * atan2(dy,dx)/(2*M_PI);
00154                         if (angle<0) angle+=nb_zone;
00155                         if (angle>=nb_zone) angle-=nb_zone;
00156 
00157                         unsigned z = floor(angle);
00158                         assert(z<nb_zone);
00159                         float dz;
00160                         if (angle-z > .5) 
00161                                 dz = (angle-z-0.5)/(0.5 );
00162                         else
00163                                 dz = (z+0.5-angle)/(0.5 );
00164 
00165 
00166                         weight_table[y][x].zone1 = z;
00167                         weight_table[y][x].weight1 = (unsigned)( wdc* exp(-dz*dz*dz*dz*1.5) * 4096.0f);
00168 
00169                         dz = 1-dz;
00170                         weight_table[y][x].weight2 = (unsigned)(wdc * exp(-dz*dz*dz*dz*1.5) * 4096.0f);
00171                         if (angle-z < .5) {
00172                                 weight_table[y][x].zone2 = (z-1+nb_zone)%nb_zone;
00173                         }  else {
00174                                 weight_table[y][x].zone2 = (z+1+nb_zone)%nb_zone;
00175                         }
00176 
00177                         //unsigned w1 = weight_table[y][x].weight1*256/4096;
00178                         unsigned w2 = weight_table[y][x].weight1*256/4096;
00179                         unsigned z2 = weight_table[y][x].zone1;
00180                         patch_im[y][x][0] =  w2 * (z2&1);
00181                         patch_im[y][x][1] =  w2*((z2&1) ==0);
00182                         patch_im[y][x][2] =  w2*((z2&2) ==0);
00183                 }
00184         }
00185         CvMat mat;
00186         cvInitMatHeader(&mat, patch_size, patch_size, CV_8UC3, patch_im);
00187         //cvSaveImage("patch_weight.png", &mat);
00188 
00189         // create random projection tests
00190         for (unsigned i=0; i<nb_test; i++) {
00191                 random_tests[i].zone1 = rand_range(nb_zone);
00192                 random_tests[i].orient1 = rand_range(nb_orient);
00193                 do {
00194                         random_tests[i].zone2 = rand_range(nb_zone);
00195                         random_tests[i].orient2 = rand_range(nb_orient);
00196                 } while (random_tests[i].zone1==random_tests[i].zone2 &&
00197                         random_tests[i].orient2==random_tests[i].orient1);
00198         }
00199 }
00200 
00201 void patch_tagger::cmp_descriptor(CvMat *patch, patch_tagger::descriptor *d, float sbpix_u, float sbpix_v)
00202 {
00203         memset(d,0,sizeof(descriptor));
00204 
00205         int h = patch->rows-1;
00206         int w = patch->cols-1;
00207         for (int y=1; y<h; y++) {
00208                 unsigned char *line = &CV_MAT_ELEM(*patch, unsigned char, y, 0);
00209                 unsigned char *line_up = &CV_MAT_ELEM(*patch, unsigned char, y-1, 0);
00210                 unsigned char *line_down = &CV_MAT_ELEM(*patch, unsigned char, y+1, 0);
00211                 histo_entry *weight = &weight_table[y][0];
00212 
00213                 // striped inner loop
00214                 for (int x=1; x<w; x++) {
00215                         int dx = 255+(line[x+1]-line[x-1])/2;
00216                         int dy = 255+(line_down[x]-line_up[x])/2;
00217 
00218                         grad2polar_entry &polar(cart2polar_table[dy][dx]);
00219 
00220                         unsigned z1 = weight[x].zone1;
00221                         unsigned z2 = weight[x].zone2;
00222                         unsigned w1 = weight[x].weight1;
00223                         unsigned w2 = weight[x].weight2;
00224                         d->histo[z1][polar.dir1] += w1 * polar.length1;
00225                         d->histo[z1][polar.dir2] += w1 * polar.length2;
00226                         d->histo[z2][polar.dir1] += w2 * polar.length1;
00227                         d->histo[z2][polar.dir2] += w2 * polar.length2;
00228                 }
00229         }
00230 
00231         unsigned max_val=0; 
00232         unsigned max_o=0;
00233         for (unsigned o=0; o<nb_orient; o++) {
00234                 for (unsigned z=0; z<nb_zone; z++) {
00235                         d->sum_orient[o] += d->histo[z][o];
00236                 }
00237                 d->total += d->sum_orient[o];
00238 
00239                 if (d->sum_orient[o]>=max_val) {
00240                         max_val = d->sum_orient[o];
00241                         max_o = o;
00242                 }
00243         }
00244         /*
00245         if (d->total==0) {
00246                 cvSaveImage("buggy_patch.bmp", patch);
00247         }
00248         */
00249         //assert(d->total>0);
00250 
00251         float y0 = d->sum_orient[(max_o-1+nb_orient)%nb_orient];
00252         float y1 = d->sum_orient[max_o];
00253         float y2 = d->sum_orient[(max_o+1)%nb_orient];
00254 
00255         /*
00256          y = a*x2 + b*x + c 
00257          with y0 = a -b +c
00258               y1 = c
00259               y2 = a +b +c
00260 
00261            =>
00262               y0+y2 = 2*a + 2*y1
00263               a = (y0+y2)/2-y1
00264               b = (y0-y2)/-2 
00265               c = y1
00266 
00267               y'= 0 = 2*a*x +b;
00268               x = -b/(2*a)
00269         */
00270         float a = (y0+y2)/2.0f-y1;
00271         float b = (y0-y2)/-2.0f;
00272         float x = -b/(2*a);
00273 
00274         d->orientation = (max_o+x)*M_2PI/nb_orient;
00275         if (d->orientation<0) d->orientation+=M_2PI;
00276         
00277 
00278         /*
00279         std::cout << "orient: " << d->orientation << ", x: " << x 
00280                 << " y: " << y0 << "," << y1 << "," << y2
00281                 << std::endl;
00282                 */
00283         //d->projection = project(d);
00284         // fetch the rotated patch
00285         float scale = patch->cols / (float)patch_size ;
00286         float ca = cos(d->orientation) * scale;
00287         float sa = sin(d->orientation) * scale;
00288 
00289         float mat[] = {
00290                 ca, -sa, sbpix_u + patch->cols/2,
00291                 sa, ca, sbpix_v + patch->rows/2};
00292         CvMat rotm;
00293         cvInitMatHeader(&d->rotated,patch_size,patch_size, CV_32FC1, d->_rotated);
00294         cvInitMatHeader(&rotm,2,3,CV_32FC1,mat);
00295         cvGetQuadrangleSubPix(patch, &d->rotated, &rotm);
00296         CvScalar mean, stdev;
00297         cvAvgSdv(&d->rotated, &mean, &stdev, &mask);
00298         if (fabs(stdev.val[0]) < 0.0001) {
00299                 //cvSaveImage("buggy_patch.bmp", &d->rotated);
00300                 //std::cout << "stdev=0 in patch!! saving buggy_patch.bmp\n";
00301                 stdev.val[0]=1;
00302                 d->total=0;
00303                 cvSetZero(&d->rotated);
00304         } else {
00305                 // f = (ai-mu)*(wi/s) + .5 =
00306                 float s = .5/stdev.val[0];
00307                 for (unsigned y=0;y<patch_size; y++)
00308                         for (unsigned x=0;x<patch_size; x++)
00309                                 d->_rotated[y][x] = (d->_rotated[y][x]-mean.val[0])*_weight[y][x]*s +.5;
00310         }
00311         //cvAddS(&d->rotated,&d->rotated, cvScalarAll(-mean.val[0]));
00312         //cvMul(&d->rotated, &weight, &d->rotated, .5/stdev.val[0]);
00313         //cvAddS(&d->rotated,&d->rotated, cvScalarAll(.5));
00314         //cvScale(&d->rotated, &d->rotated, .5/stdev.val[0], .5-.5*mean.val[0]/stdev.val[0]);
00315         //cvMul(&d->rotated, &weight,&d->rotated,
00316 
00317 }
00318 
00319 unsigned patch_tagger::project(patch_tagger::descriptor *d) 
00320 {
00321         unsigned r=0;
00322         unsigned mean = d->total/(nb_zone*nb_orient);
00323         for (unsigned i=0; i<nb_test; i++) {
00324                 int oz = d->orientation*(nb_zone/M_2PI);
00325                 int o = d->orientation*(nb_orient/M_2PI);
00326                 unsigned a = d->histo[(oz + random_tests[i].zone1) %nb_zone]
00327                         [(o+ random_tests[i].orient1) % nb_orient];
00328                 //unsigned b = d->histo[(oz + random_tests[i].zone2)%nb_zone]
00329                 //      [(o+ random_tests[i].orient2) % nb_orient];
00330                 //if (a>b*2) r += 1<<i;
00331                 if (a>mean) r += 1<<i;
00332         }
00333         
00334         return r;
00335 }
00336 
00337 void patch_tagger::descriptor::array(float *array)
00338 {
00339 #ifdef WITH_PATCH_AS_DESCRIPTOR
00340         assert(patch_size*patch_size == kmean_tree::descriptor_size);
00341         memcpy(array, _rotated, patch_size*patch_size*sizeof(float));
00342 #else
00343         assert(kmean_tree::descriptor_size == 128);
00344         cv::Mat _rot(&rotated);
00345         compute_sift_descriptor(array, _rot);
00346 #endif
00347         /*
00348         float *p = array;
00349         for (unsigned z=0;z<nb_zone;z++) {
00350                 for (unsigned i=0; i<nb_orient; i++) {
00351                         int oz = orientation*(nb_zone/M_2PI);
00352                         int o = orientation*(nb_orient/M_2PI);
00353                         unsigned a = histo[(oz + z) %nb_zone] [(o+i) % nb_orient];
00354                         *p++ = ((float)a/total);
00355                 }
00356         }
00357         */
00358 }
00359 
00361 static void normalize_histogram(float* L_begin, float* L_end)
00362 {
00363   float* L_iter ;
00364   float norm = 0.0 ;
00365 
00366   for(L_iter = L_begin; L_iter != L_end ; ++L_iter)
00367     norm += (*L_iter) * (*L_iter) ;
00368 
00369   norm = sqrtf(norm) ;
00370 
00371   for(L_iter = L_begin; L_iter != L_end ; ++L_iter)
00372     *L_iter /= (norm + std::numeric_limits<float>::epsilon() ) ;
00373 }
00374 
00375 
00376 
00394 void patch_tagger::compute_sift_descriptor(float* descr_pt, cv::Mat patch)
00395 {
00396 
00397   /* The SIFT descriptor is a  three dimensional histogram of the position
00398    * and orientation of the gradient.  There are NBP bins for each spatial
00399    * dimesions and NBO  bins for the orientation dimesion,  for a total of
00400    * NBP x NBP x NBO bins.
00401    *
00402    * The support  of each  spatial bin  has an extension  of SBP  = 3sigma
00403    * pixels, where sigma is the scale  of the keypoint.  Thus all the bins
00404    * together have a  support SBP x NBP pixels wide  . Since weighting and
00405    * interpolation of  pixel is used, another  half bin is  needed at both
00406    * ends of  the extension. Therefore, we  need a square window  of SBP x
00407    * (NBP + 1) pixels. Finally, since the patch can be arbitrarly rotated,
00408    * we need to consider  a window 2W += sqrt(2) x SBP  x (NBP + 1) pixels
00409    * wide.
00410    */      
00411 
00412   // octave
00413   int o = 0; //keypoint.o ;
00414   float xperiod = 1; //getOctaveSamplingPeriod(o) ;
00415 
00416   // offsets to move in Gaussian scale space octave
00417   const int ow = patch.cols;
00418   const int oh = patch.rows;
00419   const int xo = 2 ; // siftpp uses a buffer with interlaced gradient norm and angle.
00420 
00421   // keypoint fractional geometry
00422   float x     = patch.cols/2.0f; //keypoint.x / xperiod;
00423   float y     = patch.rows/2.0f; //keypoint.y / xperiod ;
00424 
00425   float st0   = 0; //sinf( angle0 ) ;
00426   float ct0   = 1; //cosf( angle0 ) ;
00427   
00428   // shall we use keypoints.ix,iy,is here?
00429   int xi = ((int) (x+0.5)) ; 
00430   int yi = ((int) (y+0.5)) ;
00431   //int si = keypoint.is ;
00432 
00433   const float magnif = 3.0f ;
00434   const int NBO = 8 ;
00435   const int NBP = 4 ;
00436   const float sigma = 0.2357022604f*(patch.cols-3)/(NBP+1.0f); //keypoint.sigma / xperiod ;
00437   const float SBP = magnif * sigma ;
00438   const int   W = (int) floor (sqrt(2.0) * SBP * (NBP + 1) / 2.0 + 0.5) ;
00439   
00440   /* Offsets to move in the descriptor. */
00441   /* Use Lowe's convention. */
00442   const int binto = 1 ;
00443   const int binyo = NBO * NBP ;
00444   const int binxo = NBO ;
00445   // const int bino  = NBO * NBP * NBP ;
00446   
00447   int bin ;
00448   
00449   std::fill( descr_pt, descr_pt + NBO*NBP*NBP, 0 ) ;
00450 
00451   /* Center the scale space and the descriptor on the current keypoint. 
00452    * Note that dpt is pointing to the bin of center (SBP/2,SBP/2,0).
00453    */
00454   //pixel_t const * pt = temp + xi*xo + yi*yo + (si - smin - 1)*so ; // gradient (?)
00455   float *  dpt = descr_pt + (NBP/2) * binyo + (NBP/2) * binxo ;
00456      
00457 #define atd(dbinx,dbiny,dbint) *(dpt + (dbint)*binto + (dbiny)*binyo + (dbinx)*binxo)
00458       
00459   /*
00460    * Process pixels in the intersection of the image rectangle
00461    * (1,1)-(M-1,N-1) and the keypoint bounding box.
00462    */
00463   for(int dyi = std::max(-W, 1-yi) ; dyi <= std::min(+W, oh-2-yi) ; ++dyi) {
00464           
00465           float *pix_line_up = patch.ptr<float>(yi+dyi-1);
00466           float *pix_line = patch.ptr<float>(yi+dyi);
00467           float *pix_line_down = patch.ptr<float>(yi+dyi+1);
00468 
00469           for(int dxi = std::max(-W, 1-xi) ; dxi <= std::min(+W, ow-2-xi) ; ++dxi) {
00470 
00471                   float pix_dx = (pix_line[xi+dxi+1]-pix_line[xi+dxi-1])/2.0f;
00472                   float pix_dy = (pix_line_up[xi+dxi]-pix_line_down[xi+dxi])/2.0f;
00473 
00474                   // the following should go in a look-up table
00475                   float angle = atan2f(pix_dy,pix_dx);
00476                   if (angle<0) angle+= M_2PI;
00477                   else if (angle>=1) angle-= M_2PI;
00478                   float mod = sqrtf(pix_dx*pix_dx+pix_dy*pix_dy);
00479                   float theta = M_2PI - angle; // lowe compatible ?
00480 
00481                   // fractional displacement
00482                   float dx = xi + dxi - x;
00483                   float dy = yi + dyi - y;
00484 
00485                   // get the displacement normalized w.r.t. the keypoint
00486                   // orientation and extension.
00487                   float nx = dx / SBP ;
00488                   float ny = dy / SBP ; 
00489                   float nt = NBO * theta / (2*M_PI) ;
00490 
00491                   // Get the gaussian weight of the sample. The gaussian window
00492                   // has a standard deviation equal to NBP/2. Note that dx and dy
00493                   // are in the normalized frame, so that -NBP/2 <= dx <= NBP/2.
00494                   float const wsigma = NBP/2.0f ;
00495                   //float win = VL::fast_expn((nx*nx + ny*ny)/(2.0 * wsigma * wsigma)) ;
00496                   float win = expf(-(nx*nx + ny*ny)/(2.0f * wsigma * wsigma)) ;
00497 
00498                   // The sample will be distributed in 8 adjacent bins.
00499                   // We start from the ``lower-left'' bin.
00500                   int binx = (int)floorf( nx - 0.5f ) ;
00501                   int biny = (int)floorf( ny - 0.5f ) ;
00502                   int bint = (int)floorf( nt ) ;
00503                   float rbinx = nx - (binx+0.5f) ;
00504                   float rbiny = ny - (biny+0.5f) ;
00505                   float rbint = nt - bint ;
00506                   int dbinx ;
00507                   int dbiny ;
00508                   int dbint ;
00509 
00510                   // Distribute the current sample into the 8 adjacent bins
00511                   for(dbinx = 0 ; dbinx < 2 ; ++dbinx) {
00512                           for(dbiny = 0 ; dbiny < 2 ; ++dbiny) {
00513                                   for(dbint = 0 ; dbint < 2 ; ++dbint) {
00514 
00515                                           if( binx+dbinx >= -(NBP/2) &&
00516                                                           binx+dbinx <   (NBP/2) &&
00517                                                           biny+dbiny >= -(NBP/2) &&
00518                                                           biny+dbiny <   (NBP/2) ) {
00519                                                   float weight = win 
00520                                                           * mod 
00521                                                           * fabsf (1 - dbinx - rbinx)
00522                                                           * fabsf (1 - dbiny - rbiny)
00523                                                           * fabsf (1 - dbint - rbint) ;
00524 
00525                                                   atd(binx+dbinx, biny+dbiny, (bint+dbint) % NBO) += weight ;
00526                                           }
00527                                   }            
00528                           }
00529                   }
00530           }  
00531   }
00532 
00533   /* Standard SIFT descriptors are normalized, truncated and normalized again */
00534   if( 1 /*normalizeDescriptor*/ ) {
00535 
00536     /* Normalize the histogram to L2 unit length. */        
00537     normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
00538     
00539     /* Truncate at 0.2. */
00540     for(bin = 0; bin < NBO*NBP*NBP ; ++bin) {
00541       if (descr_pt[bin] > 0.2) descr_pt[bin] = 0.2;
00542     }
00543     
00544     /* Normalize again. */
00545     normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
00546   }
00547 
00548 }
00549 
00550