coref_fex.h

Go to the documentation of this file.
00001 
00002 //    Class for the feature extractor.
00004 
00005 #ifndef CORE_FEX_H
00006 #define CORE_FEX_H
00007 
00008 #include <list>
00009 #include <fstream>
00010 #include <iostream>
00011 #include <string>
00012 #include <vector>
00013 #include <algorithm>
00014 #include <cctype>
00015 
00016 #include "fries.h"
00017 #include "freeling/semdb.h"
00018 
00022 
00023 class SAMPLE {
00024  public:
00025         int sent;
00026         int numde;
00027         int posbegin;
00028         int posend;
00029         node *node1;
00030         std::string text;
00031         std::vector<std::string> texttok;
00032         std::vector<std::string> tags;
00033 };
00034 
00038 
00039 class EXAMPLE {
00040  public:
00041         int sent;
00042         int numde;
00043         SAMPLE sample1;
00044         SAMPLE sample2;
00045 };
00046 
00047 #define COREFEX_TYPE_TWO        0
00048 #define COREFEX_TYPE_THREE      1
00049 
00050 #define COREFEX_DIST            0x00000001
00051 #define COREFEX_IPRON           0x00000002
00052 #define COREFEX_JPRON           0x00000004
00053 #define COREFEX_IPRONM          0x00000008
00054 #define COREFEX_JPRONM          0x00000010
00055 #define COREFEX_STRMATCH        0x00000020
00056 #define COREFEX_DEFNP           0x00000040
00057 #define COREFEX_DEMNP           0x00000080
00058 #define COREFEX_NUMBER          0x00000100
00059 #define COREFEX_GENDER          0x00000200
00060 #define COREFEX_SEMCLASS        0x00000400
00061 #define COREFEX_PROPNAME        0x00000800
00062 #define COREFEX_ALIAS           0x00001000
00063 #define COREFEX_APPOS           0x00002000
00064 
00068 
00069 class coref_fex{
00070         private:
00071                 // semantic database to check for semantic properties
00072                 semanticDB * semdb;
00073                 // active features
00074                 int vectors;
00075 
00076                 int jump(const std::vector<std::string> &);
00077                 int get_dist(const EXAMPLE &);
00078                 int get_numdedist(const EXAMPLE &);
00079                 int get_dedist(const EXAMPLE &);
00080                 int get_i_pronoum(const EXAMPLE &);
00081                 int get_j_pronoum(const EXAMPLE &);
00082                 int get_i_pronoum_p(const EXAMPLE &);
00083                 int get_j_pronoum_p(const EXAMPLE &);
00084                 int get_i_pronoum_d(const EXAMPLE &);
00085                 int get_j_pronoum_d(const EXAMPLE &);
00086                 int get_i_pronoum_x(const EXAMPLE &);
00087                 int get_j_pronoum_x(const EXAMPLE &);
00088                 int get_i_pronoum_i(const EXAMPLE &);
00089                 int get_j_pronoum_i(const EXAMPLE &);
00090                 int get_i_pronoum_t(const EXAMPLE &);
00091                 int get_j_pronoum_t(const EXAMPLE &);
00092                 int get_i_pronoum_r(const EXAMPLE &);
00093                 int get_j_pronoum_r(const EXAMPLE &);
00094                 int get_i_pronoum_e(const EXAMPLE &);
00095                 int get_j_pronoum_e(const EXAMPLE &);
00096                 int get_str_match(const EXAMPLE &);
00097                 int get_def_np(const EXAMPLE &);
00098                 int get_dem_np(const EXAMPLE &);
00099                 int get_number(const EXAMPLE &);
00100                 int get_semclass(const EXAMPLE &);
00101                 int get_gender(const EXAMPLE &);
00102                 int get_proper_name(const EXAMPLE &);
00103                 bool check_word(string, string);
00104                 int check_acronim(const EXAMPLE &);
00105                 int check_fixesleft(const EXAMPLE &);
00106                 int check_fixesright(const EXAMPLE &);
00107                 int check_order(const EXAMPLE &);
00108                 int get_alias_acro(const EXAMPLE &);
00109                 int get_alias_fixleft(const EXAMPLE &);
00110                 int get_alias_fixright(const EXAMPLE &);
00111                 int get_alias_order(const EXAMPLE &);
00112                 int get_appositive(const EXAMPLE &);
00113                 int get_i_inquotes(const EXAMPLE &);
00114                 int get_j_inquotes(const EXAMPLE &);
00115                 int get_i_inparenthesis(const EXAMPLE &);
00116                 int get_j_inparenthesis(const EXAMPLE &);
00117                 int get_i_thirtperson(const EXAMPLE &);
00118                 int get_j_thirtperson(const EXAMPLE &);
00119 
00120                 void put_feature(int, std::vector<int> &);
00121         public:
00122                 int typeVector;
00123 
00124                 coref_fex(const int, const int, const std::string&, const std::string&);
00125                 coref_fex();
00126                 ~coref_fex();
00127                 void setVectors(int);
00128                 std::vector<std::string> tokenize(const std::string &, const std::string &);
00129                 void extract(EXAMPLE &, std::vector<int> &);
00130 };
00131 #endif
Generated on Tue Jul 27 16:29:25 2010 for FreeLing by  doxygen 1.6.3