simplesearch.h
Go to the documentation of this file.00001 #ifndef _simplesearch_h
00002
00003 #define _simplesearch_h
00004 #include <iostream>
00005 #include <string>
00006 #include <queue>
00007 #include "aligner.h"
00008 #include "golem.h"
00009 #include "math.h"
00010
00011
00012 class simplesearch{
00013
00014 private:
00015
00016 class comparator{
00017 public:
00018 bool operator()(const aligner<int>::alin* a, const aligner<int>::alin* b) const{
00019 return a->score < b->score;
00020 }
00021 };
00022
00023 aligner<int>* al;
00024 golem* g;
00025 string corpus;
00026 priority_queue<aligner<int>::alin*,vector<aligner<int>::alin*>, comparator> heap;
00027 static inline int max(const int& a, const int& b){ return a > b ? a : b; }
00028 static inline int min(const int& a, const int& b){ return a < b ? a : b; }
00029
00030
00031 public:
00032
00033 simplesearch(string const _scores, string const _corpus){
00034 al = new aligner<int>(_scores);
00035 corpus = _corpus;
00036 }
00037
00038
00039
00040
00041
00042 void search(char* const _s, const int _length, const int _debug){
00043
00044 int const sizeq = strlen(_s);
00045 int const size = 4*sizeq;
00046 int const padding = (size - _length) / 2;
00047
00048 char buf[size];
00049 char c;
00050 int b,j,k;
00051 int i=0,r=0;
00052 int hook = 0;
00053 int previous = 0;
00054
00055 while( !heap.empty() ){heap.pop();}
00056
00057 g = new golem(_s,sizeq,_length,1);
00058
00059
00060 ifstream is;
00061 cout << "Searching file " << corpus << " for substring \"" << _s << "\"" << endl;
00062
00063 is.open(corpus.c_str());
00064 while(!is.eof()){
00065 if(i<=r){
00066 c = buf[ i%size ];
00067 } else {
00068 is.read(&c,1);
00069 buf[ i%size ] = c;
00070 }
00071
00072 if( (b=g->read(c))!=0 && (i-previous)>=sizeq){
00073 hook++;
00074 previous = i;
00075
00076
00077 r = i>=r ? i+1 : r;
00078 while( r<=i+padding && !is.eof() ){
00079 is.read(&c,1);
00080 buf[ r%size ] = c;
00081 r++;
00082 }
00083
00084
00085 int left=0, right=0;
00086 left = max(0,i-_length-padding+1);
00087 right= min(r-1,i+padding);
00088 char* target = new char[right-left+1];
00089 cout << "Searching in i="<<i<<" (" << left << "," << right << "): ";
00090 for(k=0,j=left;j<=right; j++,k++){
00091 target[k] = buf[j%size];
00092 cout << target[k];
00093 }
00094 cout << endl;
00095
00096 aligner<int>::alin* result = al->align(_s,sizeq,target,right-left+1,SEMILOCAL);
00097 result->begin += left;
00098 result->end += left;
00099 result->seg = new char[result->end - result->begin+1];
00100 for(k=0,j=result->begin;j<=result->end; j++,k++){
00101 result->seg[k] = buf[j%size];
00102 }
00103
00104
00105 heap.push(result);
00106
00107 delete[](target);
00108 } else {
00109
00110 }
00111 i++;
00112 }
00113
00114 is.close();
00115 delete(g);
00116
00117
00118
00119 cout << "Found " << hook << " hooks."<<endl;
00120 aligner<int>::alin* dummy;
00121
00122 while( !heap.empty() ){
00123 dummy = heap.top();
00124
00125 char* segment = new char[dummy->end - dummy->begin+1];
00126 cout << "MATCH: ";
00127
00128 for(int i=0;i<=dummy->end-dummy->begin; i++){
00129 cout << dummy->seg[i];
00130 }
00131 cout << endl << " a:" << dummy->a << endl << " b:" << dummy->b << endl;
00132 cout << " Score = " << dummy->score << " Begin = " << dummy->begin << " End = " << dummy->end << endl;
00133
00134 heap.pop();
00135 delete[](dummy->seg);
00136 delete(dummy);
00137 delete[](segment);
00138 }
00139
00140 }
00141
00142
00143 ~simplesearch(){
00144 delete(al);
00145 }
00146
00147 };
00148
00149 #endif
00150