00001
00019 #ifndef __D_T_TEMPLATED_DATABASE__
00020 #define __D_T_TEMPLATED_DATABASE__
00021
00022 #include <vector>
00023 #include <numeric>
00024 #include <fstream>
00025 #include <string>
00026 #include <list>
00027 #include <set>
00028
00029 #include "TemplatedVocabulary.h"
00030 #include "QueryResults.h"
00031 #include "ScoringObject.h"
00032 #include "BowVector.h"
00033 #include "FeatureVector.h"
00034
00035 #include "DUtils.h"
00036
00037 using namespace std;
00038
00039 namespace DBoW2 {
00040
00041
00042 static int MIN_COMMON_WORDS = 5;
00043
00046 template<class TDescriptor, class F>
00048 class TemplatedDatabase
00049 {
00050 public:
00051
00058 explicit TemplatedDatabase(bool use_di = true, int di_levels = 0);
00059
00068 template<class T>
00069 explicit TemplatedDatabase(const T &voc, bool use_di = true,
00070 int di_levels = 0);
00071
00076 TemplatedDatabase(const TemplatedDatabase<TDescriptor, F> &db);
00077
00082 TemplatedDatabase(const std::string &filename);
00083
00088 TemplatedDatabase(const char *filename);
00089
00093 virtual ~TemplatedDatabase(void);
00094
00099 TemplatedDatabase<TDescriptor,F>& operator=(
00100 const TemplatedDatabase<TDescriptor,F> &db);
00101
00107 template<class T>
00108 inline void setVocabulary(const T &voc);
00109
00119 template<class T>
00120 void setVocabulary(const T& voc, bool use_di, int di_levels = 0);
00121
00126 inline const TemplatedVocabulary<TDescriptor,F>* getVocabulary() const;
00127
00134 void allocate(int nd = 0, int ni = 0);
00135
00143 EntryId add(const vector<TDescriptor> &features,
00144 BowVector *bowvec = NULL, FeatureVector *fvec = NULL);
00145
00153 EntryId add(const BowVector &vec,
00154 const FeatureVector &fec = FeatureVector() );
00155
00159 inline void clear();
00160
00165 inline unsigned int size() const;
00166
00171 inline bool usingDirectIndex() const;
00172
00177 inline int getDirectIndexLevels() const;
00178
00187 void query(const vector<TDescriptor> &features, QueryResults &ret,
00188 int max_results = 1, int max_id = -1) const;
00189
00198 void query(const BowVector &vec, QueryResults &ret,
00199 int max_results = 1, int max_id = -1) const;
00200
00207 const FeatureVector& retrieveFeatures(EntryId id) const;
00208
00213 void save(const string &filename) const;
00214
00219 void load(const string &filename);
00220
00226 virtual void save(cv::FileStorage &fs,
00227 const std::string &name = "database") const;
00228
00234 virtual void load(const cv::FileStorage &fs,
00235 const std::string &name = "database");
00236
00237 public:
00238
00239
00240 int bcmatching_C;
00241 double bcthresholding_beta;
00242
00243 protected:
00244
00246 void queryL1(const BowVector &vec, QueryResults &ret,
00247 int max_results, int max_id) const;
00248
00250 void queryL2(const BowVector &vec, QueryResults &ret,
00251 int max_results, int max_id) const;
00252
00254 void queryChiSquare(const BowVector &vec, QueryResults &ret,
00255 int max_results, int max_id) const;
00256
00258 void queryBhattacharyya(const BowVector &vec, QueryResults &ret,
00259 int max_results, int max_id) const;
00260
00262 void queryKL(const BowVector &vec, QueryResults &ret,
00263 int max_results, int max_id) const;
00264
00266 void queryDotProduct(const BowVector &vec, QueryResults &ret,
00267 int max_results, int max_id) const;
00268
00270 void __queryNormSqDiff(const BowVector &vec, QueryResults &ret,
00271 int max_results, int max_id) const;
00272
00274 void __queryL2Dist(const BowVector &vec, QueryResults &ret,
00275 int max_results, int max_id) const;
00276
00278 void __queryKLHalf(const BowVector &vec, QueryResults &ret,
00279 int max_results, int max_id) const;
00280
00282 void __queryBhatNorm(const BowVector &vec, QueryResults &ret,
00283 int max_results, int max_id) const;
00284
00286 void __queryBCMatching(const BowVector &vec, QueryResults &ret,
00287 int max_results, int max_id) const;
00288
00289 void __queryBCThresholding(const BowVector &vec, QueryResults &ret,
00290 int max_results, int max_id) const;
00291
00293 void __queryBCKMatching(const BowVector &vec, QueryResults &ret,
00294 int max_results, int max_id) const;
00295
00296 void __queryKLInv(const BowVector &vec, QueryResults &ret,
00297 int max_results, int max_id) const;
00298
00299 void __queryJS(const BowVector &vec, QueryResults &ret,
00300 int max_results, int max_id) const;
00301
00302 void __queryBCKMatching2(const BowVector &vec, QueryResults &ret,
00303 int max_results, int max_id) const;
00304
00305
00306
00307 protected:
00308
00309
00310
00312 struct IFPair
00313 {
00315 EntryId entry_id;
00316
00318 WordValue word_weight;
00319
00323 IFPair(){}
00324
00330 IFPair(EntryId eid, WordValue wv): entry_id(eid), word_weight(wv) {}
00331
00337 inline bool operator==(EntryId eid) const { return entry_id == eid; }
00338 };
00339
00341 typedef std::list<IFPair> IFRow;
00342
00343
00344
00346 typedef std::vector<IFRow> InvertedFile;
00347
00348
00349
00350
00352 typedef std::vector<FeatureVector> DirectFile;
00353
00354
00355 protected:
00356
00358 TemplatedVocabulary<TDescriptor, F> *m_voc;
00359
00361 bool m_use_di;
00362
00365 int m_dilevels;
00366
00368 InvertedFile m_ifile;
00369
00371 DirectFile m_dfile;
00372
00374 int m_nentries;
00375
00376 };
00377
00378
00379
00380 template<class TDescriptor, class F>
00381 TemplatedDatabase<TDescriptor, F>::TemplatedDatabase
00382 (bool use_di, int di_levels)
00383 : m_voc(NULL), m_use_di(use_di), m_dilevels(di_levels)
00384 {
00385 }
00386
00387
00388
00389 template<class TDescriptor, class F>
00390 template<class T>
00391 TemplatedDatabase<TDescriptor, F>::TemplatedDatabase
00392 (const T &voc, bool use_di, int di_levels)
00393 : m_voc(NULL), m_use_di(use_di), m_dilevels(di_levels)
00394 {
00395 setVocabulary(voc);
00396 clear();
00397 }
00398
00399
00400
00401 template<class TDescriptor, class F>
00402 TemplatedDatabase<TDescriptor,F>::TemplatedDatabase
00403 (const TemplatedDatabase<TDescriptor,F> &db)
00404 : m_voc(NULL)
00405 {
00406 *this = db;
00407 }
00408
00409
00410
00411 template<class TDescriptor, class F>
00412 TemplatedDatabase<TDescriptor, F>::TemplatedDatabase
00413 (const std::string &filename)
00414 : m_voc(NULL)
00415 {
00416 load(filename);
00417 }
00418
00419
00420
00421 template<class TDescriptor, class F>
00422 TemplatedDatabase<TDescriptor, F>::TemplatedDatabase
00423 (const char *filename)
00424 : m_voc(NULL)
00425 {
00426 load(filename);
00427 }
00428
00429
00430
00431 template<class TDescriptor, class F>
00432 TemplatedDatabase<TDescriptor, F>::~TemplatedDatabase(void)
00433 {
00434 delete m_voc;
00435 }
00436
00437
00438
00439 template<class TDescriptor, class F>
00440 TemplatedDatabase<TDescriptor,F>& TemplatedDatabase<TDescriptor,F>::operator=
00441 (const TemplatedDatabase<TDescriptor,F> &db)
00442 {
00443 if(this != &db)
00444 {
00445 m_dfile = db.m_dfile;
00446 m_dilevels = db.m_dilevels;
00447 m_ifile = db.m_ifile;
00448 m_nentries = db.m_nentries;
00449 m_use_di = db.m_use_di;
00450 setVocabulary(*db.m_voc);
00451 }
00452 return *this;
00453 }
00454
00455
00456
00457 template<class TDescriptor, class F>
00458 EntryId TemplatedDatabase<TDescriptor, F>::add(
00459 const vector<TDescriptor> &features,
00460 BowVector *bowvec, FeatureVector *fvec)
00461 {
00462 BowVector aux;
00463 BowVector& v = (bowvec ? *bowvec : aux);
00464
00465 if(m_use_di && fvec != NULL)
00466 {
00467 m_voc->transform(features, v, *fvec, m_dilevels);
00468 return add(v, *fvec);
00469 }
00470 else if(m_use_di)
00471 {
00472 FeatureVector fv;
00473 m_voc->transform(features, v, fv, m_dilevels);
00474 return add(v, fv);
00475 }
00476 else if(fvec != NULL)
00477 {
00478 m_voc->transform(features, v, *fvec, m_dilevels);
00479 return add(v);
00480 }
00481 else
00482 {
00483 m_voc->transform(features, v);
00484 return add(v);
00485 }
00486 }
00487
00488
00489
00490 template<class TDescriptor, class F>
00491 EntryId TemplatedDatabase<TDescriptor, F>::add(const BowVector &v,
00492 const FeatureVector &fv)
00493 {
00494 EntryId entry_id = m_nentries++;
00495
00496 BowVector::const_iterator vit;
00497 vector<unsigned int>::const_iterator iit;
00498
00499 if(m_use_di)
00500 {
00501
00502 if(entry_id == m_dfile.size())
00503 {
00504 m_dfile.push_back(fv);
00505 }
00506 else
00507 {
00508 m_dfile[entry_id] = fv;
00509 }
00510 }
00511
00512
00513 for(vit = v.begin(); vit != v.end(); ++vit)
00514 {
00515 const WordId& word_id = vit->first;
00516 const WordValue& word_weight = vit->second;
00517
00518 IFRow& ifrow = m_ifile[word_id];
00519 ifrow.push_back(IFPair(entry_id, word_weight));
00520 }
00521
00522 return entry_id;
00523 }
00524
00525
00526
00527 template<class TDescriptor, class F>
00528 template<class T>
00529 inline void TemplatedDatabase<TDescriptor, F>::setVocabulary
00530 (const T& voc)
00531 {
00532 delete m_voc;
00533 m_voc = new T(voc);
00534 clear();
00535 }
00536
00537
00538
00539 template<class TDescriptor, class F>
00540 template<class T>
00541 inline void TemplatedDatabase<TDescriptor, F>::setVocabulary
00542 (const T& voc, bool use_di, int di_levels)
00543 {
00544 m_use_di = use_di;
00545 m_dilevels = di_levels;
00546 delete m_voc;
00547 m_voc = new T(voc);
00548 clear();
00549 }
00550
00551
00552
00553 template<class TDescriptor, class F>
00554 inline const TemplatedVocabulary<TDescriptor,F>*
00555 TemplatedDatabase<TDescriptor, F>::getVocabulary() const
00556 {
00557 return m_voc;
00558 }
00559
00560
00561
00562 template<class TDescriptor, class F>
00563 inline void TemplatedDatabase<TDescriptor, F>::clear()
00564 {
00565
00566 m_ifile.resize(0);
00567 m_ifile.resize(m_voc->size());
00568 m_dfile.resize(0);
00569 m_nentries = 0;
00570 }
00571
00572
00573
00574 template<class TDescriptor, class F>
00575 void TemplatedDatabase<TDescriptor, F>::allocate(int nd, int ni)
00576 {
00577
00578 if(ni > 0)
00579 {
00580 typename std::vector<IFRow>::iterator rit;
00581 for(rit = m_ifile.begin(); rit != m_ifile.end(); ++rit)
00582 {
00583 int n = (int)rit->size();
00584 if(ni > n)
00585 {
00586 rit->resize(ni);
00587 rit->resize(n);
00588 }
00589 }
00590 }
00591
00592 if(m_use_di && (int)m_dfile.size() < nd)
00593 {
00594 m_dfile.resize(nd);
00595 }
00596 }
00597
00598
00599
00600 template<class TDescriptor, class F>
00601 inline unsigned int TemplatedDatabase<TDescriptor, F>::size() const
00602 {
00603 return m_nentries;
00604 }
00605
00606
00607
00608 template<class TDescriptor, class F>
00609 inline bool TemplatedDatabase<TDescriptor, F>::usingDirectIndex() const
00610 {
00611 return m_use_di;
00612 }
00613
00614
00615
00616 template<class TDescriptor, class F>
00617 inline int TemplatedDatabase<TDescriptor, F>::getDirectIndexLevels() const
00618 {
00619 return m_dilevels;
00620 }
00621
00622
00623
00624 template<class TDescriptor, class F>
00625 void TemplatedDatabase<TDescriptor, F>::query(
00626 const vector<TDescriptor> &features,
00627 QueryResults &ret, int max_results, int max_id) const
00628 {
00629 BowVector vec;
00630 m_voc->transform(features, vec);
00631 query(vec, ret, max_results, max_id);
00632 }
00633
00634
00635
00636 template<class TDescriptor, class F>
00637 void TemplatedDatabase<TDescriptor, F>::query(
00638 const BowVector &vec,
00639 QueryResults &ret, int max_results, int max_id) const
00640 {
00641 ret.resize(0);
00642
00643 switch(m_voc->getScoringType())
00644 {
00645 case L1_NORM:
00646 queryL1(vec, ret, max_results, max_id);
00647 break;
00648
00649 case L2_NORM:
00650 queryL2(vec, ret, max_results, max_id);
00651 break;
00652
00653 case CHI_SQUARE:
00654 queryChiSquare(vec, ret, max_results, max_id);
00655 break;
00656
00657 case KL:
00658 queryKL(vec, ret, max_results, max_id);
00659 break;
00660
00661 case BHATTACHARYYA:
00662 queryBhattacharyya(vec, ret, max_results, max_id);
00663 break;
00664
00665 case DOT_PRODUCT:
00666 queryDotProduct(vec, ret, max_results, max_id);
00667 break;
00668 }
00669 }
00670
00671
00672
00673 template<class TDescriptor, class F>
00674 void TemplatedDatabase<TDescriptor, F>::queryL1(const BowVector &vec,
00675 QueryResults &ret, int max_results, int max_id) const
00676 {
00677 BowVector::const_iterator vit;
00678 typename IFRow::const_iterator rit;
00679
00680 map<EntryId, double> pairs;
00681 map<EntryId, double>::iterator pit;
00682
00683 for(vit = vec.begin(); vit != vec.end(); ++vit)
00684 {
00685 const WordId word_id = vit->first;
00686 const WordValue& qvalue = vit->second;
00687
00688 const IFRow& row = m_ifile[word_id];
00689
00690
00691
00692 for(rit = row.begin(); rit != row.end(); ++rit)
00693 {
00694 const EntryId entry_id = rit->entry_id;
00695 const WordValue& dvalue = rit->word_weight;
00696
00697 if((int)entry_id < max_id || max_id == -1)
00698 {
00699 double value = fabs(qvalue - dvalue) - fabs(qvalue) - fabs(dvalue);
00700
00701 pit = pairs.lower_bound(entry_id);
00702 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
00703 {
00704 pit->second += value;
00705 }
00706 else
00707 {
00708 pairs.insert(pit,
00709 map<EntryId, double>::value_type(entry_id, value));
00710 }
00711 }
00712
00713 }
00714 }
00715
00716
00717 ret.reserve(pairs.size());
00718 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
00719 {
00720 ret.push_back(Result(pit->first, pit->second));
00721 }
00722
00723
00724
00725
00726 sort(ret.begin(), ret.end());
00727
00728
00729
00730 if(max_results > 0 && (int)ret.size() > max_results)
00731 ret.resize(max_results);
00732
00733
00734
00735
00736
00737
00738 QueryResults::iterator qit;
00739 for(qit = ret.begin(); qit != ret.end(); qit++)
00740 qit->Score = -qit->Score/2.0;
00741 }
00742
00743
00744
00745 template<class TDescriptor, class F>
00746 void TemplatedDatabase<TDescriptor, F>::queryL2(const BowVector &vec,
00747 QueryResults &ret, int max_results, int max_id) const
00748 {
00749 BowVector::const_iterator vit;
00750 typename IFRow::const_iterator rit;
00751
00752 map<EntryId, double> pairs;
00753 map<EntryId, double>::iterator pit;
00754
00755
00756
00757
00758 for(vit = vec.begin(); vit != vec.end(); ++vit)
00759 {
00760 const WordId word_id = vit->first;
00761 const WordValue& qvalue = vit->second;
00762
00763 const IFRow& row = m_ifile[word_id];
00764
00765
00766
00767 for(rit = row.begin(); rit != row.end(); ++rit)
00768 {
00769 const EntryId entry_id = rit->entry_id;
00770 const WordValue& dvalue = rit->word_weight;
00771
00772 if((int)entry_id < max_id || max_id == -1)
00773 {
00774 double value = - qvalue * dvalue;
00775
00776 pit = pairs.lower_bound(entry_id);
00777
00778 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
00779 {
00780 pit->second += value;
00781
00782 }
00783 else
00784 {
00785 pairs.insert(pit,
00786 map<EntryId, double>::value_type(entry_id, value));
00787
00788
00789
00790 }
00791 }
00792
00793 }
00794 }
00795
00796
00797 ret.reserve(pairs.size());
00798
00799 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
00800 {
00801 ret.push_back(Result(pit->first, pit->second));
00802 }
00803
00804
00805
00806
00807 sort(ret.begin(), ret.end());
00808
00809
00810
00811 if(max_results > 0 && (int)ret.size() > max_results)
00812 ret.resize(max_results);
00813
00814
00815
00816
00817
00818 QueryResults::iterator qit;
00819 for(qit = ret.begin(); qit != ret.end(); qit++)
00820 {
00821 if(qit->Score <= -1.0)
00822 qit->Score = 1.0;
00823 else
00824 qit->Score = 1.0 - sqrt(1.0 + qit->Score);
00825
00826
00827 }
00828
00829 }
00830
00831
00832
00833 template<class TDescriptor, class F>
00834 void TemplatedDatabase<TDescriptor, F>::queryChiSquare(const BowVector &vec,
00835 QueryResults &ret, int max_results, int max_id) const
00836 {
00837 BowVector::const_iterator vit;
00838 typename IFRow::const_iterator rit;
00839
00840 map<EntryId, pair<double, int> > pairs;
00841 map<EntryId, pair<double, int> >::iterator pit;
00842
00843 map<EntryId, pair<double, double> > sums;
00844 map<EntryId, pair<double, double> >::iterator sit;
00845
00846
00847
00848
00849
00850
00851 for(vit = vec.begin(); vit != vec.end(); ++vit)
00852 {
00853 const WordId word_id = vit->first;
00854 const WordValue& qvalue = vit->second;
00855
00856 const IFRow& row = m_ifile[word_id];
00857
00858
00859
00860 for(rit = row.begin(); rit != row.end(); ++rit)
00861 {
00862 const EntryId entry_id = rit->entry_id;
00863 const WordValue& dvalue = rit->word_weight;
00864
00865 if((int)entry_id < max_id || max_id == -1)
00866 {
00867
00868
00869 double value = 0;
00870 if(qvalue + dvalue != 0.0)
00871 value = - qvalue * dvalue / (qvalue + dvalue);
00872
00873 pit = pairs.lower_bound(entry_id);
00874 sit = sums.lower_bound(entry_id);
00875
00876 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
00877 {
00878 pit->second.first += value;
00879 pit->second.second += 1;
00880
00881 sit->second.first += qvalue;
00882 sit->second.second += dvalue;
00883 }
00884 else
00885 {
00886 pairs.insert(pit,
00887 map<EntryId, pair<double, int> >::value_type(entry_id,
00888 make_pair(value, 1) ));
00889
00890
00891
00892 sums.insert(sit,
00893 map<EntryId, pair<double, double> >::value_type(entry_id,
00894 make_pair(qvalue, dvalue) ));
00895 }
00896 }
00897
00898 }
00899 }
00900
00901
00902 ret.reserve(pairs.size());
00903 sit = sums.begin();
00904 for(pit = pairs.begin(); pit != pairs.end(); ++pit, ++sit)
00905 {
00906 if(pit->second.second >= MIN_COMMON_WORDS)
00907 {
00908 ret.push_back(Result(pit->first, pit->second.first));
00909 ret.back().nWords = pit->second.second;
00910 ret.back().sumCommonVi = sit->second.first;
00911 ret.back().sumCommonWi = sit->second.second;
00912 ret.back().expectedChiScore =
00913 2 * sit->second.second / (1 + sit->second.second);
00914 }
00915
00916
00917 }
00918
00919
00920
00921
00922
00923 sort(ret.begin(), ret.end());
00924
00925
00926
00927 if(max_results > 0 && (int)ret.size() > max_results)
00928 ret.resize(max_results);
00929
00930
00931 QueryResults::iterator qit;
00932 for(qit = ret.begin(); qit != ret.end(); qit++)
00933 {
00934
00935 qit->Score = - 2. * qit->Score;
00936
00937 qit->chiScore = qit->Score;
00938 }
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963 }
00964
00965
00966
00967 template<class TDescriptor, class F>
00968 void TemplatedDatabase<TDescriptor, F>::queryKL(const BowVector &vec,
00969 QueryResults &ret, int max_results, int max_id) const
00970 {
00971 BowVector::const_iterator vit;
00972 typename IFRow::const_iterator rit;
00973
00974 map<EntryId, double> pairs;
00975 map<EntryId, double>::iterator pit;
00976
00977 for(vit = vec.begin(); vit != vec.end(); ++vit)
00978 {
00979 const WordId word_id = vit->first;
00980 const WordValue& vi = vit->second;
00981
00982 const IFRow& row = m_ifile[word_id];
00983
00984
00985
00986 for(rit = row.begin(); rit != row.end(); ++rit)
00987 {
00988 const EntryId entry_id = rit->entry_id;
00989 const WordValue& wi = rit->word_weight;
00990
00991 if((int)entry_id < max_id || max_id == -1)
00992 {
00993 double value = 0;
00994 if(vi != 0 && wi != 0) value = vi * log(vi/wi);
00995
00996 pit = pairs.lower_bound(entry_id);
00997 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
00998 {
00999 pit->second += value;
01000 }
01001 else
01002 {
01003 pairs.insert(pit,
01004 map<EntryId, double>::value_type(entry_id, value));
01005 }
01006 }
01007
01008 }
01009 }
01010
01011
01012
01013
01014
01015
01016 ret.reserve(pairs.size());
01017 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
01018 {
01019 EntryId eid = pit->first;
01020 double value = 0.0;
01021
01022 for(vit = vec.begin(); vit != vec.end(); ++vit)
01023 {
01024 const WordValue &vi = vit->second;
01025 const IFRow& row = m_ifile[vit->first];
01026
01027 if(vi != 0)
01028 {
01029 if(row.end() == find(row.begin(), row.end(), eid ))
01030 {
01031 value += vi * (log(vi) - GeneralScoring::LOG_EPS);
01032 }
01033 }
01034 }
01035
01036 pit->second += value;
01037
01038
01039 ret.push_back(Result(pit->first, pit->second));
01040 }
01041
01042
01043
01044
01045
01046 sort(ret.begin(), ret.end());
01047
01048
01049 if(max_results > 0 && (int)ret.size() > max_results)
01050 ret.resize(max_results);
01051
01052
01053
01054 }
01055
01056
01057
01058 template<class TDescriptor, class F>
01059 void TemplatedDatabase<TDescriptor, F>::__queryJS(const BowVector &vec,
01060 QueryResults &ret, int max_results, int max_id) const
01061 {
01062 BowVector::const_iterator vit;
01063 typename IFRow::const_iterator rit;
01064
01065 map<EntryId, double> pairs;
01066 map<EntryId, double>::iterator pit;
01067
01068 for(vit = vec.begin(); vit != vec.end(); ++vit)
01069 {
01070 const WordId word_id = vit->first;
01071 const WordValue& qvalue = vit->second;
01072
01073 const IFRow& row = m_ifile[word_id];
01074
01075
01076
01077 for(rit = row.begin(); rit != row.end(); ++rit)
01078 {
01079 const EntryId entry_id = rit->entry_id;
01080 const WordValue& dvalue = rit->word_weight;
01081
01082 if((int)entry_id < max_id || max_id == -1)
01083 {
01084 double value = sqrt(qvalue * dvalue);
01085
01086 pit = pairs.lower_bound(entry_id);
01087 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01088 {
01089 pit->second += value;
01090 }
01091 else
01092 {
01093 pairs.insert(pit,
01094 map<EntryId, double>::value_type(entry_id, value));
01095 }
01096 }
01097
01098 }
01099 }
01100
01101
01102 ret.reserve(pairs.size());
01103 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
01104 {
01105 ret.push_back(Result(pit->first, pit->second));
01106 }
01107
01108
01109
01110
01111 sort(ret.begin(), ret.end(), Result::gt);
01112
01113
01114 if(max_results > 0 && (int)ret.size() > max_results)
01115 ret.resize(max_results);
01116
01117 }
01118
01119
01120
01121 template<class TDescriptor, class F>
01122 void TemplatedDatabase<TDescriptor, F>::queryBhattacharyya(
01123 const BowVector &vec, QueryResults &ret, int max_results, int max_id) const
01124 {
01125 BowVector::const_iterator vit;
01126 typename IFRow::const_iterator rit;
01127
01128
01129
01130
01131 map<EntryId, pair<double, int> > pairs;
01132 map<EntryId, pair<double, int> >::iterator pit;
01133
01134 for(vit = vec.begin(); vit != vec.end(); ++vit)
01135 {
01136 const WordId word_id = vit->first;
01137 const WordValue& qvalue = vit->second;
01138
01139 const IFRow& row = m_ifile[word_id];
01140
01141
01142
01143 for(rit = row.begin(); rit != row.end(); ++rit)
01144 {
01145 const EntryId entry_id = rit->entry_id;
01146 const WordValue& dvalue = rit->word_weight;
01147
01148 if((int)entry_id < max_id || max_id == -1)
01149 {
01150 double value = sqrt(qvalue * dvalue);
01151
01152 pit = pairs.lower_bound(entry_id);
01153 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01154 {
01155 pit->second.first += value;
01156 pit->second.second += 1;
01157 }
01158 else
01159 {
01160 pairs.insert(pit,
01161 map<EntryId, pair<double, int> >::value_type(entry_id,
01162 make_pair(value, 1)));
01163 }
01164 }
01165
01166 }
01167 }
01168
01169
01170 ret.reserve(pairs.size());
01171 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
01172 {
01173 if(pit->second.second >= MIN_COMMON_WORDS)
01174 {
01175 ret.push_back(Result(pit->first, pit->second.first));
01176 ret.back().nWords = pit->second.second;
01177 ret.back().bhatScore = pit->second.first;
01178 }
01179 }
01180
01181
01182
01183
01184 sort(ret.begin(), ret.end(), Result::gt);
01185
01186
01187 if(max_results > 0 && (int)ret.size() > max_results)
01188 ret.resize(max_results);
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215 }
01216
01217
01218
01219 template<class TDescriptor, class F>
01220 void TemplatedDatabase<TDescriptor, F>::__queryBhatNorm(
01221 const BowVector &vec, QueryResults &ret, int max_results, int max_id) const
01222 {
01223 BowVector::const_iterator vit;
01224 typename IFRow::const_iterator rit;
01225
01226 map<EntryId, double> pairs;
01227 map<EntryId, double>::iterator pit;
01228
01229 map<EntryId, double> sums;
01230 map<EntryId, double>::iterator sit;
01231
01232 for(vit = vec.begin(); vit != vec.end(); ++vit)
01233 {
01234 const WordId word_id = vit->first;
01235 const WordValue& qvalue = vit->second;
01236
01237 const IFRow& row = m_ifile[word_id];
01238
01239
01240
01241 for(rit = row.begin(); rit != row.end(); ++rit)
01242 {
01243 const EntryId entry_id = rit->entry_id;
01244 const WordValue& dvalue = rit->word_weight;
01245
01246 if((int)entry_id < max_id || max_id == -1)
01247 {
01248 double value = sqrt(qvalue * dvalue);
01249
01250 pit = pairs.lower_bound(entry_id);
01251 sit = sums.lower_bound(entry_id);
01252 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01253 {
01254 pit->second += value;
01255 sit->second += dvalue;
01256 }
01257 else
01258 {
01259 pairs.insert(pit,
01260 map<EntryId, double>::value_type(entry_id, value));
01261
01262 sums.insert(pit,
01263 map<EntryId, double>::value_type(entry_id, dvalue));
01264 }
01265 }
01266
01267 }
01268 }
01269
01270
01271 ret.reserve(pairs.size());
01272 sit = sums.begin();
01273 for(pit = pairs.begin(); pit != pairs.end(); ++pit, ++sit)
01274 {
01275 cout << "@@ obj: " << pit->first << ", bhat: " << pit->second
01276 << ", sum: " << sit->second
01277 << ", bhat/sum: " << pit->second / sit->second << endl;
01278 ret.push_back(Result(pit->first, pit->second / sit->second));
01279 }
01280
01281
01282
01283
01284 sort(ret.begin(), ret.end(), Result::gt);
01285
01286
01287 if(max_results > 0 && (int)ret.size() > max_results)
01288 ret.resize(max_results);
01289
01290 }
01291
01292
01293
01294 template<class TDescriptor, class F>
01295 void TemplatedDatabase<TDescriptor, F>::queryDotProduct(
01296 const BowVector &vec, QueryResults &ret, int max_results, int max_id) const
01297 {
01298 BowVector::const_iterator vit;
01299 typename IFRow::const_iterator rit;
01300
01301 map<EntryId, double> pairs;
01302 map<EntryId, double>::iterator pit;
01303
01304 for(vit = vec.begin(); vit != vec.end(); ++vit)
01305 {
01306 const WordId word_id = vit->first;
01307 const WordValue& qvalue = vit->second;
01308
01309 const IFRow& row = m_ifile[word_id];
01310
01311
01312
01313 for(rit = row.begin(); rit != row.end(); ++rit)
01314 {
01315 const EntryId entry_id = rit->entry_id;
01316 const WordValue& dvalue = rit->word_weight;
01317
01318 if((int)entry_id < max_id || max_id == -1)
01319 {
01320
01321
01322 double value;
01323 if(this->m_voc->getWeightingType() == BINARY)
01324 value = 1;
01325 else
01326 value = qvalue * dvalue;
01327
01328 pit = pairs.lower_bound(entry_id);
01329 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01330 {
01331 pit->second += value;
01332 }
01333 else
01334 {
01335 pairs.insert(pit,
01336 map<EntryId, double>::value_type(entry_id, value));
01337 }
01338 }
01339
01340 }
01341 }
01342
01343
01344 ret.reserve(pairs.size());
01345 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
01346 {
01347 ret.push_back(Result(pit->first, pit->second));
01348 }
01349
01350
01351
01352
01353 sort(ret.begin(), ret.end(), Result::gt);
01354
01355
01356 if(max_results > 0 && (int)ret.size() > max_results)
01357 ret.resize(max_results);
01358
01359
01360 }
01361
01362
01363
01364 template<class TDescriptor, class F>
01365 void TemplatedDatabase<TDescriptor, F>::__queryNormSqDiff
01366 (const BowVector &vec, QueryResults &ret, int max_results, int max_id) const
01367 {
01368 BowVector::const_iterator vit;
01369 typename IFRow::const_iterator rit;
01370
01371 map<EntryId, double> pairs;
01372 map<EntryId, double>::iterator pit;
01373
01374 map<EntryId, int> counters;
01375 map<EntryId, int>::iterator cit;
01376
01377 for(vit = vec.begin(); vit != vec.end(); ++vit)
01378 {
01379 const WordId word_id = vit->first;
01380 const WordValue& qvalue = vit->second;
01381
01382 const IFRow& row = m_ifile[word_id];
01383
01384
01385
01386 for(rit = row.begin(); rit != row.end(); ++rit)
01387 {
01388 const EntryId entry_id = rit->entry_id;
01389 const WordValue& dvalue = rit->word_weight;
01390
01391 if((int)entry_id < max_id || max_id == -1)
01392 {
01393 double value = (qvalue - dvalue)*(qvalue - dvalue);
01394
01395 pit = pairs.lower_bound(entry_id);
01396 cit = counters.lower_bound(entry_id);
01397 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01398 {
01399 pit->second += value;
01400 cit->second += 1;
01401 }
01402 else
01403 {
01404 pairs.insert(pit,
01405 map<EntryId, double>::value_type(entry_id, value));
01406
01407 counters.insert(cit,
01408 map<EntryId, int>::value_type(entry_id, 1));
01409 }
01410 }
01411
01412 }
01413 }
01414
01415
01416 ret.reserve(pairs.size());
01417 cit = counters.begin();
01418 for(pit = pairs.begin(); pit != pairs.end(); ++pit, ++cit)
01419 {
01420 ret.push_back(Result(pit->first, pit->second / cit->second));
01421 }
01422
01423
01424 sort(ret.begin(), ret.end());
01425
01426
01427 if(max_results > 0 && (int)ret.size() > max_results)
01428 ret.resize(max_results);
01429
01430
01431 }
01432
01433
01434
01435 template<class TDescriptor, class F>
01436 void TemplatedDatabase<TDescriptor, F>::__queryL2Dist(const BowVector &vec,
01437 QueryResults &ret, int max_results, int max_id) const
01438 {
01439 BowVector::const_iterator vit;
01440 typename IFRow::const_iterator rit;
01441
01442 map<EntryId, double> pairs;
01443 map<EntryId, double>::iterator pit;
01444
01445 for(vit = vec.begin(); vit != vec.end(); ++vit)
01446 {
01447 const WordId word_id = vit->first;
01448 const WordValue& qvalue = vit->second;
01449
01450 const IFRow& row = m_ifile[word_id];
01451
01452
01453
01454 for(rit = row.begin(); rit != row.end(); ++rit)
01455 {
01456 const EntryId entry_id = rit->entry_id;
01457 const WordValue& dvalue = rit->word_weight;
01458
01459 if((int)entry_id < max_id || max_id == -1)
01460 {
01461 double value = (dvalue > qvalue ? qvalue / dvalue : dvalue / qvalue);
01462
01463 pit = pairs.lower_bound(entry_id);
01464 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01465 {
01466 pit->second += value;
01467 }
01468 else
01469 {
01470 pairs.insert(pit,
01471 map<EntryId, double>::value_type(entry_id, value));
01472 }
01473 }
01474
01475 }
01476 }
01477
01478
01479 double Vsq = 0;
01480 for(vit = vec.begin(); vit != vec.end(); ++vit)
01481 Vsq += vit->second*vit->second;
01482
01483
01484 ret.reserve(pairs.size());
01485 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
01486 {
01487
01488
01489
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503
01504
01505 double sc = pit->second;
01506 ret.push_back(Result(pit->first, sc));
01507 }
01508
01509
01510 sort(ret.begin(), ret.end(), Result::gt);
01511
01512
01513 if(max_results > 0 && (int)ret.size() > max_results)
01514 ret.resize(max_results);
01515
01516
01517 }
01518
01519
01520
01521 template<class TDescriptor, class F>
01522 void TemplatedDatabase<TDescriptor, F>::__queryBCMatching(const BowVector &vec,
01523 QueryResults &ret, int max_results, int max_id) const
01524 {
01525
01526 QueryResults qbhat;
01527 queryBhattacharyya(vec, qbhat, -1, max_id);
01528
01529
01530 QueryResults qchi;
01531 queryChiSquare(vec, qchi, -1, max_id);
01532
01533
01534
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565 const int NMAX = bcmatching_C;
01566 QueryResults::iterator iend = qbhat.begin() +
01567 (NMAX < (int)qbhat.size() ? NMAX : qbhat.size());
01568
01569 ret.clear();
01570 for(size_t i = 0; i < qchi.size(); ++i)
01571
01572 {
01573 QueryResults::iterator qit =
01574 std::find(qbhat.begin(), iend, qchi[i].Id);
01575
01576 if(qit != iend)
01577
01578 {
01579 ret.push_back(qchi[i]);
01580 ret.back().bhatScore = qit->bhatScore;
01581
01582 }
01583 }
01584
01585 }
01586
01587
01588
01589 struct tBCData
01590 {
01591 double bhat;
01592 double chi;
01593 int counter;
01594
01595 tBCData(){}
01596 tBCData(double _b, double _c, int _cn): bhat(_b), chi(_c), counter(_cn){}
01597 };
01598
01599 template<class TDescriptor, class F>
01600 void TemplatedDatabase<TDescriptor, F>::__queryBCThresholding
01601 (const BowVector &vec,
01602 QueryResults &ret, int max_results, int max_id) const
01603 {
01604 BowVector::const_iterator vit;
01605 typename IFRow::const_iterator rit;
01606
01607 map<EntryId, tBCData> pairs;
01608 map<EntryId, tBCData>::iterator pit;
01609
01610 for(vit = vec.begin(); vit != vec.end(); ++vit)
01611 {
01612 const WordId word_id = vit->first;
01613 const WordValue& qvalue = vit->second;
01614
01615 const IFRow& row = m_ifile[word_id];
01616
01617
01618
01619 for(rit = row.begin(); rit != row.end(); ++rit)
01620 {
01621 const EntryId entry_id = rit->entry_id;
01622 const WordValue& dvalue = rit->word_weight;
01623
01624 if((int)entry_id < max_id || max_id == -1)
01625 {
01626 double bhatvalue = sqrt(qvalue * dvalue);
01627 double chivalue = 0;
01628 if(qvalue + dvalue != 0.0)
01629 chivalue = - qvalue * dvalue / (qvalue + dvalue);
01630
01631 pit = pairs.lower_bound(entry_id);
01632 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01633 {
01634 pit->second.bhat += bhatvalue;
01635 pit->second.chi += chivalue;
01636 pit->second.counter += 1;
01637 }
01638 else
01639 {
01640 pairs.insert(pit,
01641 map<EntryId, tBCData>::value_type(entry_id,
01642 tBCData(bhatvalue, chivalue, 1)));
01643 }
01644 }
01645
01646 }
01647 }
01648
01649
01650 ret.clear();
01651 ret.reserve(pairs.size());
01652 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
01653 {
01654 if(pit->second.counter >= MIN_COMMON_WORDS)
01655 {
01656 ret.push_back(Result(pit->first, pit->second.bhat));
01657 ret.back().nWords = pit->second.counter;
01658 ret.back().bhatScore = pit->second.bhat;
01659 ret.back().chiScore = pit->second.chi;
01660 }
01661 }
01662
01663 if(ret.empty()) return;
01664
01665
01666
01667
01668 sort(ret.begin(), ret.end(), Result::gt);
01669
01670
01671 double min_score = ret[0].Score * bcthresholding_beta;
01672
01673 for(size_t n = 0; n < ret.size(); ++n)
01674 {
01675 if(ret[n].Score >= min_score)
01676 {
01677 ret[n].Score = ret[n].chiScore;
01678 }
01679 else
01680 {
01681 ret.resize(n);
01682 break;
01683 }
01684 }
01685
01686
01687 sort(ret.begin(), ret.end());
01688
01689
01690
01691 if(max_results > 0 && (int)ret.size() > max_results)
01692 ret.resize(max_results);
01693
01694
01695 QueryResults::iterator qit;
01696 for(qit = ret.begin(); qit != ret.end(); qit++)
01697 {
01698
01699 qit->Score = - 2. * qit->Score;
01700
01701 qit->chiScore = qit->Score;
01702 }
01703
01704
01705 #if 0
01706
01707 QueryResults qbhat;
01708 queryBhattacharyya(vec, qbhat, -1, max_id);
01709
01710 if(qbhat.empty())
01711 {
01712 ret.clear();
01713 return;
01714 }
01715
01716
01717 QueryResults qchi;
01718 queryChiSquare(vec, qchi, -1, max_id);
01719
01720
01721 double min_score = qbhat[0].Score * bcthresholding_beta;
01722
01723 QueryResults::iterator iend = qbhat.begin();
01724 while(iend != qbhat.end() && iend->Score >= min_score) ++iend;
01725
01726 if(max_results < 0) max_results = qchi.size();
01727
01728 ret.clear();
01729 for(size_t i = 0; i < qchi.size() && ret.size() < (size_t)max_results; ++i)
01730
01731 {
01732 QueryResults::iterator qit =
01733 std::find(qbhat.begin(), iend, qchi[i].Id);
01734
01735 if(qit != iend)
01736
01737 {
01738 ret.push_back(qchi[i]);
01739 ret.back().bhatScore = qit->bhatScore;
01740
01741 }
01742 }
01743 #endif
01744 }
01745
01746
01747
01748 template<class TDescriptor, class F>
01749 void TemplatedDatabase<TDescriptor, F>::__queryBCKMatching(const BowVector &vec,
01750 QueryResults &ret, int max_results, int max_id) const
01751 {
01752
01753 QueryResults qbhat;
01754 queryBhattacharyya(vec, qbhat, max_results, max_id);
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764
01765 QueryResults qchi;
01766 queryChiSquare(vec, qchi, max_results, max_id);
01767
01768 QueryResults qkl;
01769 queryKL(vec, qkl, max_results, max_id);
01770
01771
01772
01773
01774
01775
01776
01777
01778
01779
01780
01781
01782
01783
01784
01785
01786 const int NMAX = 4;
01787 QueryResults::iterator bend = qbhat.begin() +
01788 (NMAX < (int)qbhat.size() ? NMAX : qbhat.size());
01789 QueryResults::iterator cend = qchi.begin() +
01790 (NMAX < (int)qchi.size() ? NMAX : qchi.size());
01791 QueryResults::iterator kend = qkl.begin() +
01792 (NMAX < (int)qkl.size() ? NMAX : qkl.size());
01793
01794 vector<pair<int, double> > votes;
01795 QueryResults::iterator bit;
01796 for(bit = qbhat.begin(); bit != bend; ++bit)
01797 {
01798 int v = 1;
01799
01800 if(find(qchi.begin(), cend, bit->Id) != cend) ++v;
01801 if(find(qkl.begin(), kend, bit->Id) != kend) ++v;
01802
01803 votes.push_back(make_pair(v, bit->Score));
01804 }
01805
01806 vector<unsigned int> i_sort;
01807 DUtils::STL::indexSort(votes.begin(), votes.end(), i_sort);
01808
01809 ret.clear();
01810 int MIN_VOTES = 1;
01811 for(int i = (int)votes.size()-1; i >= 0; --i)
01812 {
01813 int idx = i_sort[i];
01814
01815
01816 if(votes[idx].first < MIN_VOTES) break;
01817
01818 ret.push_back(qbhat[idx]);
01819 ret.back().Score = votes[idx].first * 10 + votes[idx].second;
01820
01821
01822 }
01823
01824 }
01825
01826
01827
01828 static bool fCompareVotes(const pair<int,int> &a, const pair<int,int> &b)
01829 {
01830 return a.first > b.first || (a.first == b.first && a.second < b.second);
01831 }
01832
01833 template<class TDescriptor, class F>
01834 void TemplatedDatabase<TDescriptor, F>::__queryBCKMatching2(const BowVector &vec,
01835 QueryResults &ret, int max_results, int max_id) const
01836 {
01837
01838 QueryResults qbhat;
01839 queryBhattacharyya(vec, qbhat, max_results, max_id);
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850 if(qbhat.size() == 1)
01851 {
01852 ret = qbhat;
01853 return;
01854 }
01855
01856
01857 QueryResults qchi;
01858 queryChiSquare(vec, qchi, max_results, max_id);
01859
01860
01861 QueryResults qkl;
01862 queryKL(vec, qkl, max_results, max_id);
01863
01864
01865
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875 const int NMAX = 4;
01876 const typename QueryResults::iterator bbegin = qbhat.begin();
01877 const typename QueryResults::iterator bend = qbhat.begin() +
01878 (NMAX < (int)qbhat.size() ? NMAX : qbhat.size());
01879
01880
01881 vector<pair<int, int> > votes(bend-bbegin, make_pair(0,0));
01882
01883
01884 typename QueryResults::iterator qit, qf;
01885 for(qit = qchi.begin(); qit != qchi.end(); ++qit)
01886 {
01887 qf = find(bbegin, bend, qit->Id);
01888 if(qf != bend)
01889 {
01890 votes[qf - bbegin].first += 1;
01891 votes[qf - bbegin].second += qit - qchi.begin();
01892 }
01893 }
01894
01895
01896 for(qit = qkl.begin(); qit != qkl.end(); ++qit)
01897 {
01898 qf = find(bbegin, bend, qit->Id);
01899 if(qf != bend)
01900 {
01901 votes[qf - bbegin].first += 1;
01902 votes[qf - bbegin].second += qit - qkl.begin();
01903 }
01904 }
01905
01906 vector<unsigned int> i_sort;
01907 DUtils::STL::indexSort(votes.begin(), votes.end(), i_sort,
01908 fCompareVotes);
01909
01910 ret.clear();
01911 ret.reserve(i_sort.size());
01912 for(size_t i = 0; i < i_sort.size(); ++i)
01913 {
01914 cout << "@@ times: " << votes[i_sort[i]].first << ", pos: "
01915 << votes[i_sort[i]].second
01916 << ", id: " << (bbegin + i_sort[i])->Id << endl;
01917
01918 ret.push_back(*(bbegin + i_sort[i]));
01919 ret.back().Score = votes[i_sort[i]].first * 100 + votes[i_sort[i]].second;
01920
01921 }
01922
01923 }
01924
01925
01926
01927 template<class TDescriptor, class F>
01928 void TemplatedDatabase<TDescriptor, F>::__queryKLInv(const BowVector &vec,
01929 QueryResults &ret, int max_results, int max_id) const
01930 {
01931 BowVector::const_iterator vit;
01932 typename IFRow::const_iterator rit;
01933
01934 map<EntryId, double> pairs;
01935 map<EntryId, double>::iterator pit;
01936
01937 for(vit = vec.begin(); vit != vec.end(); ++vit)
01938 {
01939 const WordId word_id = vit->first;
01940 const WordValue& vi = vit->second;
01941
01942 const IFRow& row = m_ifile[word_id];
01943
01944
01945
01946 for(rit = row.begin(); rit != row.end(); ++rit)
01947 {
01948 const EntryId entry_id = rit->entry_id;
01949 const WordValue& wi = rit->word_weight;
01950
01951 if((int)entry_id < max_id || max_id == -1)
01952 {
01953 double value = 0;
01954 if(vi != 0 && wi != 0) value = wi * log(wi/vi);
01955
01956 pit = pairs.lower_bound(entry_id);
01957 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
01958 {
01959 pit->second += value;
01960 }
01961 else
01962 {
01963 pairs.insert(pit,
01964 map<EntryId, double>::value_type(entry_id, value));
01965 }
01966 }
01967
01968 }
01969 }
01970
01971
01972
01973
01974
01975
01976
01977
01978 ret.reserve(pairs.size());
01979 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
01980 {
01981 EntryId eid = pit->first;
01982 double value = 0.0;
01983
01984 for(vit = vec.begin(); vit != vec.end(); ++vit)
01985 {
01986 const WordValue &vi = vit->second;
01987 const IFRow& row = m_ifile[vit->first];
01988
01989 if(vi != 0)
01990 {
01991 if(row.end() == find(row.begin(), row.end(), eid ))
01992 {
01993 value += vi * (log(vi) - GeneralScoring::LOG_EPS);
01994 }
01995 }
01996 }
01997
01998 pit->second += value;
01999
02000
02001 ret.push_back(Result(pit->first, pit->second));
02002 }
02003
02004
02005
02006
02007
02008 sort(ret.begin(), ret.end());
02009
02010
02011 if(max_results > 0 && (int)ret.size() > max_results)
02012 ret.resize(max_results);
02013
02014
02015 }
02016
02017
02018
02019 template<class TDescriptor, class F>
02020 void TemplatedDatabase<TDescriptor, F>::__queryKLHalf(const BowVector &vec,
02021 QueryResults &ret, int max_results, int max_id) const
02022 {
02023 BowVector::const_iterator vit;
02024 typename IFRow::const_iterator rit;
02025
02026 map<EntryId, int> counters;
02027 map<EntryId, int>::iterator cit;
02028
02029 map<EntryId, double> pairs;
02030 map<EntryId, double>::iterator pit;
02031
02032 for(vit = vec.begin(); vit != vec.end(); ++vit)
02033 {
02034 const WordId word_id = vit->first;
02035 const WordValue& vi = vit->second;
02036
02037 const IFRow& row = m_ifile[word_id];
02038
02039
02040
02041 for(rit = row.begin(); rit != row.end(); ++rit)
02042 {
02043 const EntryId entry_id = rit->entry_id;
02044 const WordValue& wi = rit->word_weight;
02045
02046 if((int)entry_id < max_id || max_id == -1)
02047 {
02048 double value = 0;
02049 if(vi != 0 && wi != 0) value = vi * log(vi/wi);
02050
02051
02052 cit = counters.lower_bound(entry_id);
02053 pit = pairs.lower_bound(entry_id);
02054 if(pit != pairs.end() && !(pairs.key_comp()(entry_id, pit->first)))
02055 {
02056 pit->second += value;
02057 cit->second += 1;
02058 }
02059 else
02060 {
02061 pairs.insert(pit,
02062 map<EntryId, double>::value_type(entry_id, value));
02063 counters.insert(cit,
02064 map<EntryId, int>::value_type(entry_id, 1));
02065 }
02066 }
02067
02068 }
02069 }
02070
02071
02072
02073
02074
02075
02076 ret.reserve(pairs.size());
02077 for(pit = pairs.begin(); pit != pairs.end(); ++pit)
02078 {
02079
02080
02081
02082 ret.push_back(Result(pit->first, 2 * pit->second));
02083 }
02084
02085
02086
02087
02088
02089 sort(ret.begin(), ret.end());
02090
02091
02092 if(max_results > 0 && (int)ret.size() > max_results)
02093 ret.resize(max_results);
02094
02095
02096 for(size_t i = 0; i < ret.size(); ++i)
02097 {
02098 cit = counters.find(ret[i].Id);
02099
02100
02101 }
02102
02103
02104
02105 }
02106
02107
02108
02109 template<class TDescriptor, class F>
02110 const FeatureVector& TemplatedDatabase<TDescriptor, F>::retrieveFeatures
02111 (EntryId id) const
02112 {
02113 assert(id < size());
02114 return m_dfile[id];
02115 }
02116
02117
02118
02119 template<class TDescriptor, class F>
02120 void TemplatedDatabase<TDescriptor, F>::save(const string &filename) const
02121 {
02122 cv::FileStorage fs(filename.c_str(), cv::FileStorage::WRITE);
02123 if(!fs.isOpened()) throw string("Could not open file ") + filename;
02124
02125 save(fs);
02126 }
02127
02128
02129
02130 template<class TDescriptor, class F>
02131 void TemplatedDatabase<TDescriptor, F>::save(cv::FileStorage &fs,
02132 const std::string &name) const
02133 {
02134
02135
02136
02137
02138
02139
02140
02141
02142
02143
02144
02145
02146
02147
02148
02149
02150
02151
02152
02153
02154
02155
02156
02157
02158
02159
02160
02161
02162
02163
02164
02165
02166
02167 m_voc->save(fs);
02168
02169 fs << name << "{";
02170
02171 fs << "nEntries" << m_nentries;
02172 fs << "usingDI" << (m_use_di ? 1 : 0);
02173 fs << "diLevels" << m_dilevels;
02174
02175 fs << "invertedIndex" << "[";
02176
02177 typename InvertedFile::const_iterator iit;
02178 typename IFRow::const_iterator irit;
02179 for(iit = m_ifile.begin(); iit != m_ifile.end(); ++iit)
02180 {
02181 fs << "[";
02182 for(irit = iit->begin(); irit != iit->end(); ++irit)
02183 {
02184 fs << "{:"
02185 << "imageId" << (int)irit->entry_id
02186 << "weight" << irit->word_weight
02187 << "}";
02188 }
02189 fs << "]";
02190 }
02191
02192 fs << "]";
02193
02194 fs << "directIndex" << "[";
02195
02196 typename DirectFile::const_iterator dit;
02197 typename FeatureVector::const_iterator drit;
02198 for(dit = m_dfile.begin(); dit != m_dfile.end(); ++dit)
02199 {
02200 fs << "[";
02201
02202 for(drit = dit->begin(); drit != dit->end(); ++drit)
02203 {
02204 NodeId nid = drit->first;
02205 const vector<unsigned int>& features = drit->second;
02206
02207
02208 fs << "{";
02209 fs << "nodeId" << (int)nid;
02210
02211
02212 fs << "features" << "["
02213 << *(const vector<int>*)(&features) << "]";
02214 fs << "}";
02215 }
02216
02217 fs << "]";
02218 }
02219
02220 fs << "]";
02221
02222 fs << "}";
02223 }
02224
02225
02226
02227 template<class TDescriptor, class F>
02228 void TemplatedDatabase<TDescriptor, F>::load(const string &filename)
02229 {
02230 cv::FileStorage fs(filename.c_str(), cv::FileStorage::READ);
02231 if(!fs.isOpened()) throw string("Could not open file ") + filename;
02232
02233 load(fs);
02234 }
02235
02236
02237
02238 template<class TDescriptor, class F>
02239 void TemplatedDatabase<TDescriptor, F>::load(const cv::FileStorage &fs,
02240 const std::string &name)
02241 {
02242
02243
02244 if(!m_voc) m_voc = new TemplatedVocabulary<TDescriptor, F>;
02245
02246 m_voc->load(fs);
02247
02248
02249 clear();
02250
02251 cv::FileNode fdb = fs[name];
02252
02253 m_nentries = (int)fdb["nEntries"];
02254 m_use_di = (int)fdb["usingDI"] != 0;
02255 m_dilevels = (int)fdb["diLevels"];
02256
02257 cv::FileNode fn = fdb["invertedIndex"];
02258 for(WordId wid = 0; wid < fn.size(); ++wid)
02259 {
02260 cv::FileNode fw = fn[wid];
02261
02262 for(unsigned int i = 0; i < fw.size(); ++i)
02263 {
02264 EntryId eid = (int)fw[i]["imageId"];
02265 WordValue v = fw[i]["weight"];
02266
02267 m_ifile[wid].push_back(IFPair(eid, v));
02268 }
02269 }
02270
02271 if(m_use_di)
02272 {
02273 fn = fdb["directIndex"];
02274
02275 m_dfile.resize(fn.size());
02276 assert(m_nentries == (int)fn.size());
02277
02278 FeatureVector::iterator dit;
02279 for(EntryId eid = 0; eid < fn.size(); ++eid)
02280 {
02281 cv::FileNode fe = fn[eid];
02282
02283 m_dfile[eid].clear();
02284 for(unsigned int i = 0; i < fe.size(); ++i)
02285 {
02286 NodeId nid = (int)fe[i]["nodeId"];
02287
02288 dit = m_dfile[eid].insert(m_dfile[eid].end(),
02289 make_pair(nid, vector<unsigned int>() ));
02290
02291
02292
02293
02294
02295
02296
02297
02298
02299
02300 cv::FileNode ff = fe[i]["features"][0];
02301 dit->second.reserve(ff.size());
02302
02303 cv::FileNodeIterator ffit;
02304 for(ffit = ff.begin(); ffit != ff.end(); ++ffit)
02305 {
02306 dit->second.push_back((int)*ffit);
02307 }
02308 }
02309 }
02310 }
02311
02312 }
02313
02314
02315
02321 template<class TDescriptor, class F>
02322 std::ostream& operator<<(std::ostream &os,
02323 const TemplatedDatabase<TDescriptor,F> &db)
02324 {
02325 os << "Database: Entries = " << db.size() << ", "
02326 "Using direct index = " << (db.usingDirectIndex() ? "yes" : "no");
02327
02328 if(db.usingDirectIndex())
02329 os << ", Direct index levels = " << db.getDirectIndexLevels();
02330
02331 os << ". " << *db.getVocabulary();
02332 return os;
02333 }
02334
02335
02336
02337 }
02338
02339 #endif