00001 00002 // Class for the feature extractor. 00004 00005 #ifndef CORE_FEX_H 00006 #define CORE_FEX_H 00007 00008 #include <list> 00009 #include <fstream> 00010 #include <iostream> 00011 #include <string> 00012 #include <vector> 00013 #include <algorithm> 00014 #include <cctype> 00015 00016 #include "fries.h" 00017 #include "freeling/semdb.h" 00018 00022 00023 class SAMPLE { 00024 public: 00025 int sent; 00026 int numde; 00027 int posbegin; 00028 int posend; 00029 node *node1; 00030 std::string text; 00031 std::vector<std::string> texttok; 00032 std::vector<std::string> tags; 00033 }; 00034 00038 00039 class EXAMPLE { 00040 public: 00041 int sent; 00042 int numde; 00043 SAMPLE sample1; 00044 SAMPLE sample2; 00045 }; 00046 00047 #define COREFEX_TYPE_TWO 0 00048 #define COREFEX_TYPE_THREE 1 00049 00050 #define COREFEX_DIST 0x00000001 00051 #define COREFEX_IPRON 0x00000002 00052 #define COREFEX_JPRON 0x00000004 00053 #define COREFEX_IPRONM 0x00000008 00054 #define COREFEX_JPRONM 0x00000010 00055 #define COREFEX_STRMATCH 0x00000020 00056 #define COREFEX_DEFNP 0x00000040 00057 #define COREFEX_DEMNP 0x00000080 00058 #define COREFEX_NUMBER 0x00000100 00059 #define COREFEX_GENDER 0x00000200 00060 #define COREFEX_SEMCLASS 0x00000400 00061 #define COREFEX_PROPNAME 0x00000800 00062 #define COREFEX_ALIAS 0x00001000 00063 #define COREFEX_APPOS 0x00002000 00064 00068 00069 class coref_fex{ 00070 private: 00071 // semantic database to check for semantic properties 00072 semanticDB * semdb; 00073 // active features 00074 int vectors; 00075 00076 int jump(const std::vector<std::string> &); 00077 int get_dist(const EXAMPLE &); 00078 int get_numdedist(const EXAMPLE &); 00079 int get_dedist(const EXAMPLE &); 00080 int get_i_pronoum(const EXAMPLE &); 00081 int get_j_pronoum(const EXAMPLE &); 00082 int get_i_pronoum_p(const EXAMPLE &); 00083 int get_j_pronoum_p(const EXAMPLE &); 00084 int get_i_pronoum_d(const EXAMPLE &); 00085 int get_j_pronoum_d(const EXAMPLE &); 00086 int get_i_pronoum_x(const EXAMPLE &); 00087 int get_j_pronoum_x(const EXAMPLE &); 00088 int get_i_pronoum_i(const EXAMPLE &); 00089 int get_j_pronoum_i(const EXAMPLE &); 00090 int get_i_pronoum_t(const EXAMPLE &); 00091 int get_j_pronoum_t(const EXAMPLE &); 00092 int get_i_pronoum_r(const EXAMPLE &); 00093 int get_j_pronoum_r(const EXAMPLE &); 00094 int get_i_pronoum_e(const EXAMPLE &); 00095 int get_j_pronoum_e(const EXAMPLE &); 00096 int get_str_match(const EXAMPLE &); 00097 int get_def_np(const EXAMPLE &); 00098 int get_dem_np(const EXAMPLE &); 00099 int get_number(const EXAMPLE &); 00100 int get_semclass(const EXAMPLE &); 00101 int get_gender(const EXAMPLE &); 00102 int get_proper_name(const EXAMPLE &); 00103 bool check_word(string, string); 00104 int check_acronim(const EXAMPLE &); 00105 int check_fixesleft(const EXAMPLE &); 00106 int check_fixesright(const EXAMPLE &); 00107 int check_order(const EXAMPLE &); 00108 int get_alias_acro(const EXAMPLE &); 00109 int get_alias_fixleft(const EXAMPLE &); 00110 int get_alias_fixright(const EXAMPLE &); 00111 int get_alias_order(const EXAMPLE &); 00112 int get_appositive(const EXAMPLE &); 00113 int get_i_inquotes(const EXAMPLE &); 00114 int get_j_inquotes(const EXAMPLE &); 00115 int get_i_inparenthesis(const EXAMPLE &); 00116 int get_j_inparenthesis(const EXAMPLE &); 00117 int get_i_thirtperson(const EXAMPLE &); 00118 int get_j_thirtperson(const EXAMPLE &); 00119 00120 void put_feature(int, std::vector<int> &); 00121 public: 00122 int typeVector; 00123 00124 coref_fex(const int, const int, const std::string&, const std::string&); 00125 coref_fex(); 00126 ~coref_fex(); 00127 void setVectors(int); 00128 std::vector<std::string> tokenize(const std::string &, const std::string &); 00129 void extract(EXAMPLE &, std::vector<int> &); 00130 }; 00131 #endif