simplesearch.h

Go to the documentation of this file.
00001 #ifndef _simplesearch_h
00002 
00003 #define _simplesearch_h
00004 #include <iostream>
00005 #include <string>
00006 #include <queue>
00007 #include "aligner.h"
00008 #include "golem.h"
00009 #include "math.h"
00010 
00011 
00012 class simplesearch{
00013 
00014 private:
00015 
00016   class comparator{
00017     public:
00018       bool operator()(const aligner<int>::alin* a, const aligner<int>::alin* b) const{
00019         return a->score < b->score;
00020       }
00021   };
00022 
00023   aligner<int>* al;
00024   golem* g;
00025   string corpus;
00026   priority_queue<aligner<int>::alin*,vector<aligner<int>::alin*>, comparator> heap;
00027   static inline int max(const int& a, const int& b){ return a > b ? a : b; }
00028   static inline int min(const int& a, const int& b){ return a < b ? a : b; }
00029 
00030 
00031 public:
00032 
00033     simplesearch(string const _scores, string const _corpus){
00034       al = new aligner<int>(_scores);
00035       corpus = _corpus;
00036     }
00037 
00038 
00039     /*
00040      * Finds the bests alignements of _s searching substrings of length _length
00041      */
00042     void search(char* const _s, const int _length, const int _debug){
00043 
00044       int const sizeq = strlen(_s);
00045       int const size = 4*sizeq;
00046       int const padding = (size - _length) / 2;
00047 
00048       char buf[size];
00049       char c;
00050       int b,j,k;
00051       int i=0,r=0;
00052       int hook = 0;
00053       int previous = 0;
00054 
00055       while( !heap.empty() ){heap.pop();}
00056 
00057       g = new golem(_s,sizeq,_length,1);
00058       //g->show();
00059 
00060       ifstream is;
00061       cout << "Searching file " << corpus << " for substring \"" << _s << "\"" << endl;
00062 
00063       is.open(corpus.c_str());
00064       while(!is.eof()){
00065         if(i<=r){
00066           c = buf[ i%size ];
00067         } else {
00068           is.read(&c,1);
00069           buf[ i%size ] = c;
00070         }
00071 
00072         if( (b=g->read(c))!=0 && (i-previous)>=sizeq){
00073           hook++;
00074           previous = i;
00075 
00076           // 1) Fill the buffer up to size
00077           r = i>=r ? i+1 : r;
00078           while( r<=i+padding && !is.eof() ){
00079             is.read(&c,1);
00080             buf[ r%size ] = c;
00081             r++;
00082           }
00083 
00084           // 2) put the characters in the arrays
00085           int left=0, right=0;
00086           left = max(0,i-_length-padding+1);
00087           right= min(r-1,i+padding);
00088           char* target = new char[right-left+1];
00089           cout << "Searching in i="<<i<<" (" << left << "," << right << "): ";
00090           for(k=0,j=left;j<=right; j++,k++){
00091             target[k] = buf[j%size];
00092             cout << target[k];
00093           }
00094           cout << endl;
00095           // 3) Align it!
00096           aligner<int>::alin* result = al->align(_s,sizeq,target,right-left+1,SEMILOCAL);
00097           result->begin += left;
00098           result->end   += left;
00099           result->seg = new char[result->end - result->begin+1];
00100           for(k=0,j=result->begin;j<=result->end; j++,k++){
00101             result->seg[k] = buf[j%size];
00102           }
00103 
00104           // 4) Save the result in a heap of n best solutions
00105           heap.push(result);
00106 
00107           delete[](target);
00108         } else {
00109           //Nothing to do
00110         }
00111         i++;
00112       }
00113 
00114       is.close();
00115       delete(g);
00116 
00117 
00118       /* PRINT SOME RESULTS */
00119       cout << "Found " << hook << " hooks."<<endl;
00120       aligner<int>::alin* dummy;
00121 
00122       while( !heap.empty() ){
00123         dummy = heap.top();
00124 
00125         char* segment = new char[dummy->end - dummy->begin+1];
00126         cout << "MATCH:  ";
00127           
00128         for(int i=0;i<=dummy->end-dummy->begin; i++){
00129           cout << dummy->seg[i];
00130         }
00131         cout << endl << "   a:" << dummy->a << endl << "   b:" << dummy->b << endl;
00132         cout << "   Score = " << dummy->score << "   Begin = " << dummy->begin << " End = " << dummy->end << endl;
00133 
00134         heap.pop();
00135         delete[](dummy->seg);
00136         delete(dummy);
00137         delete[](segment);
00138       }
00139 
00140     } // search() function
00141 
00142 
00143     ~simplesearch(){
00144       delete(al);
00145     }
00146 
00147 };
00148 
00149 #endif
00150 
Generated on Tue Jul 27 16:29:25 2010 for FreeLing by  doxygen 1.6.3