phd.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007 #ifndef _phd_h
00008 #define _phd_h
00009
00010 #include <cstdlib>
00011 #include <iostream>
00012 #include <sstream>
00013 #include <fstream>
00014 #include <map>
00015 #include <set>
00016 #include <string>
00017 #include <math.h>
00018 using namespace std;
00019
00020 #define MAX 128
00021
00022 template <typename T=int> class phd{
00023
00024 private:
00025 T csub, cexp, cvowel, cskip, cspace;
00026 T distance[MAX][MAX];
00027 set<char> svowels;
00028 set<char> sconsonants;
00029 int debug;
00030
00031 inline T V(char a){ return svowels.find(a) != svowels.end() ? cvowel : 0; }
00032 inline T max(const T& a, const T& b){ return a > b ? a : b; }
00033
00034 public:
00035
00036 phd(string const fname){
00037
00038 debug = 0;
00039 ifstream is;
00040 string s;
00041 char c;
00042 T t;
00043 int i,fcount;
00044 map<const string, int> flist;
00045 map<const string, T> fweight;
00046 map<const string, T> values;
00047 set<string> svfeatures;
00048 set<string> scfeatures;
00049 csub = 0;
00050 cskip = 0;
00051 cexp = 0;
00052 cvowel = 0;
00053
00054
00055
00056
00057
00058
00059
00060 T features [MAX][MAX];
00061
00062 try{
00063 is.open(fname.c_str());
00064 } catch(...){
00065 cerr << "Some problem opening " << fname << endl;
00066 cerr << "Exiting..." << endl;
00067 exit(-1);
00068 }
00069
00070 fcount = 0;
00071
00072 while(!is.eof()){
00073
00074 is >> s;
00075
00076 if( s[0] == '#'){
00077 getline(is,s);
00078
00079 } else if( s=="FON:") {
00080 is >> c;
00081
00082 getline(is,s);
00083 stringstream ss(s,stringstream::in);
00084 i = 0;
00085 while(ss>>s){
00086 if(s=="+"){
00087 features[(int)c][i] = 100;
00088 }else if(s=="-"){
00089 features[(int)c][i] = 0;
00090 }else{
00091 features[(int)c][i] = values[s];
00092 }
00093
00094 i++;
00095 }
00096
00097 } else if( s=="VALUE:") {
00098 is >> s >> t;
00099 values[s] = t;
00100
00101
00102 } else if( s=="WEIGHT:") {
00103 is >> s >> t;
00104 fweight[s] = t;
00105
00106 } else if( s=="CONSTANT:") {
00107 is >> s >> t;
00108 if (s=="Cskip") { cskip = t;}
00109 else if(s=="Csub"){ csub = t;}
00110 else if(s=="Cexp"){ cexp = t;}
00111 else if(s=="Cvowel"){ cvowel = t;}
00112 else if(s=="Cspace"){ cspace = t;}
00113 else{ cerr << "UNEXPECTED CONSTANT DEFINITION" << s << endl; }
00114
00115 } else if( s=="VOWELS:") {
00116
00117 getline(is,s);
00118 stringstream ss(s, stringstream::in);
00119 while( ss>>c ){ svowels.insert(c); }
00120
00121 } else if( s=="CONSONANTS:") {
00122
00123 getline(is,s);
00124 stringstream ss(s, stringstream::in);
00125 while( ss>>c ){ sconsonants.insert(c); }
00126
00127 } else if( s=="FEATURES:") {
00128
00129 getline(is,s);
00130 stringstream ss(s, stringstream::in);
00131 i = 0;
00132 while( ss>>s ){ flist[s]=i; i++; }
00133
00134 } else if( s=="FVOWELS:") {
00135
00136 getline(is,s);
00137 stringstream ss(s, stringstream::in);
00138 while( ss>>s ){ svfeatures.insert(s); }
00139
00140 } else if( s=="FOTHER:") {
00141
00142 getline(is,s);
00143 stringstream ss(s, stringstream::in);
00144 while( ss>>s ){ scfeatures.insert(s); }
00145
00146 } else {
00147
00148 }
00149
00150 }
00151
00152 is.close();
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163 set<char>::iterator it1;
00164 set<char>::iterator it2;
00165 set<string>::iterator it3;
00166 T d;
00167 int f;
00168
00169 for(int i=0;i<MAX;i++){
00170 for(int j=0;j<MAX;j++){
00171 distance[i][j]= i==j ? 0 : (T)8000;
00172 }
00173 }
00174
00175
00176
00177 for( it1 = svowels.begin(); it1!=svowels.end(); ++it1){
00178 for( it2 = svowels.begin(); it2!=it1; ++it2){
00179
00180 d=0;
00181 for(it3 = svfeatures.begin(); it3!=svfeatures.end(); ++it3){
00182 f = flist[(*it3)];
00183 d += abs( features[(int)(*it1)][(int)f] - features[(int)(*it2)][(int)f] ) * fweight[(*it3)];
00184 }
00185 distance[(int)(*it1)][(int)(*it2)] = d;
00186 distance[(int)(*it2)][(int)(*it1)] = d;
00187 }
00188 }
00189
00190
00191
00192 for( it2 = sconsonants.begin(); it2!=sconsonants.end(); ++it2){
00193 for( it1 = svowels.begin(); it1!=svowels.end(); ++it1){
00194
00195 d=0;
00196 for(it3 = scfeatures.begin(); it3!=scfeatures.end(); ++it3){
00197 f = flist[(*it3)];
00198 d += abs( features[(int)(*it1)][(int)f] - features[(int)(*it2)][(int)f] ) * fweight[(*it3)];
00199 }
00200 distance[(int)(*it1)][(int)(*it2)] = d;
00201 distance[(int)(*it2)][(int)(*it1)] = d;
00202 }
00203 }
00204
00205
00206
00207 for( it1 = sconsonants.begin(); it1!=sconsonants.end(); ++it1){
00208 for( it2 = sconsonants.begin(); it2!=it1; ++it2){
00209
00210 d=0;
00211 for(it3 = scfeatures.begin(); it3!=scfeatures.end(); ++it3){
00212 f = flist[(*it3)];
00213 d += abs( features[(int)(*it1)][(int)f] - features[(int)(*it2)][(int)f] ) * fweight[(*it3)];
00214 }
00215 distance[(int)(*it1)][(int)(*it2)] = d;
00216 distance[(int)(*it2)][(int)(*it1)] = d;
00217 }
00218 }
00219
00220 if(debug>2){
00221 cerr << "\t";
00222 for( int i=85; i<MAX; i++ ){
00223 cerr << (char)i << "\t";
00224 }
00225 cerr << endl;
00226
00227 for( int i=85; i<MAX; i++ ){
00228 cerr << (char)i << "\t";
00229 for( int j=85; j<MAX; j++ ){
00230 cerr << distance[i][j] << "\t";
00231 }
00232 cerr << endl;
00233 }
00234
00235 }
00236
00237
00238 }
00239
00240
00241 void show(ostream &o){
00242
00243 set<char>::iterator it1;
00244 set<char>::iterator it2;
00245 set<string>::iterator it3;
00246
00247 o << "Distances between phonemes" << endl << "==========================" << endl << endl;
00248
00249 o << "Read values: cskip:" << cskip << ", csub:" << csub << ", cexp:" << cexp << ", cvowel:" << cvowel << endl;
00250
00251
00252 o << "\t";
00253 for( it1 = svowels.begin(); it1!=svowels.end(); ++it1) o << (*it1) << "\t";
00254 o << endl;
00255
00256 for( it1 = svowels.begin(); it1!=svowels.end(); ++it1){
00257 o << (*it1) << "\t";
00258 for( it2 = svowels.begin(); it2!=it1; ++it2){
00259 o << distance[(int)(*it1)][(int)(*it2)] << "\t";
00260 }
00261 o << endl;
00262 }
00263
00264 o << endl << "\t";
00265 for( it1 = svowels.begin(); it1!=svowels.end(); ++it1) o << (*it1) << "\t";
00266 o << endl;
00267
00268
00269 for( it2 = sconsonants.begin(); it2!=sconsonants.end(); ++it2){
00270 o << (*it2) << "\t";
00271 for( it1 = svowels.begin(); it1!=svowels.end(); ++it1){
00272 o << distance[(int)(*it1)][(int)(*it2)] << "\t";
00273 }
00274 o << endl;
00275 }
00276
00277 o << endl << "\t";
00278 for( it1 = sconsonants.begin(); it1!=sconsonants.end(); ++it1) o << (*it1) << "\t";
00279 o << endl;
00280
00281
00282 for( it1 = sconsonants.begin(); it1!=sconsonants.end(); ++it1){
00283 o << (*it1) << "\t";
00284 for( it2 = sconsonants.begin(); it2!=it1; ++it2){
00285 o << distance[(int)(*it1)][(int)(*it2)] << "\t";
00286 }
00287 o << endl;
00288 }
00289 }
00290
00291
00292 T getCskip(){
00293 return cskip;
00294 }
00295
00296 T dSkip(int c){
00297 return c==' ' || c=='_' ? cskip+cspace : cskip;
00298
00299 }
00300
00301 T dSub(int const a, int const b){
00302 if( ( (char)a==' ' || (char)a=='_' ) && ( (char)b==' ' || (char)b=='_' ) ){ return cspace; }
00303 return (char)a=='_' || (char)a==' ' || (char)b==' ' || (char)b=='_' ? -cspace/2 : csub - distance[a][b] - V(a) - V(b);
00304 }
00305
00306 T dExp(int const a, int const b, int const c){
00307 return cexp - distance[a][b] - distance[a][c] - V(a) - max(V(b),V(c));
00308 }
00309
00310 };
00311
00312
00313
00314 #endif