dates_modules.h
Go to the documentation of this file.00001
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00029
00030 #ifndef _DATES_MOD
00031 #define _DATES_MOD
00032
00033 #include <map>
00034
00035 #include "fries/language.h"
00036 #include "freeling/automat.h"
00037 #include "regexp-pcre++.h"
00038
00039
00040
00041 #define RE_ROMAN "^([IVXLCDM]+)$"
00042
00043
00044 #define RE_DATE_DF "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d)))/)(\\d{1,4}))$"
00045 #define RE_TIME1_DF "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:min|m)?)?)$"
00046 #define RE_TIME2_DF "^(?:((?:[0-5])?(?:\\d))(?:min\\.|min|m\\.|m))$"
00047
00048
00049 #define RE_DATE_ES "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d))|enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre|ene|feb|mar|abr|may|jun|jul|ago|sep|oct|nov|dic)/)(\\d{1,4}))$"
00050 #define RE_TIME1_ES "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:minutos|min|m)?)?)$"
00051 #define RE_TIME2_ES "^(?:((?:[0-5])?(?:\\d))(?:minutos|min\\.|min|m\\.|m))$"
00052
00053
00054 #define RE_DATE_CA "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d))|gener|febrer|març|abril|maig|juny|juliol|agost|setembre|octubre|novembre|desembre|gen|feb|mar|abr|mai|jun|jul|ago|set|oct|nov|des)/)(\\d{1,4}))$"
00055 #define RE_TIME1_CA "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:minuts|min|m)?)?)$"
00056 #define RE_TIME2_CA "^(?:((?:[0-5])?(?:\\d))(?:minuts|min\\.|min|m\\.|m))$"
00057
00058
00059 #define RE_DATE_EN "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d))|january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/)(\\d{1,4}))$"
00060 #define RE_TIME1_EN "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:minutes|min|m)?)?)$"
00061 #define RE_TIME2_EN "^(?:((?:[0-5])?(?:\\d))(?:minutes|min\\.|min|m\\.|m))$"
00062
00063
00068
00069 class dates_module: public automat {
00070
00071 protected:
00073 std::map<std::string,int> nMes;
00075 std::map<std::string,std::string> nDia;
00077 std::string century,year,month,day,weekday,hour,minute,meridian;
00079 int temp;
00080 int sign;
00081
00082 int daytemp;
00083 bool inGbb;
00085 std::map<std::string,int> tok;
00086
00087
00088 RegEx RE_Date;
00089 RegEx RE_Time1;
00090 RegEx RE_Time2;
00091 RegEx RE_Roman;
00092
00093 public:
00095 dates_module(const std::string &, const std::string &, const std::string &, const std::string &);
00096 };
00097
00098
00103
00104 class dates_default : public dates_module {
00105
00106 private:
00107 int ComputeToken(int, sentence::iterator &, sentence &);
00108 void ResetActions();
00109 void StateActions(int, int, int, sentence::const_iterator);
00110 void SetMultiwordAnalysis(sentence::iterator, int);
00111
00112 public:
00114 dates_default();
00115 };
00116
00121
00122 class dates_es : public dates_module {
00123
00124 private:
00125 int ComputeToken(int, sentence::iterator &, sentence &);
00126 void ResetActions();
00127 void StateActions(int, int, int, sentence::const_iterator);
00128 void SetMultiwordAnalysis(sentence::iterator, int);
00129
00130 public:
00132 dates_es();
00133 };
00134
00135
00140
00141 class dates_ca : public dates_module {
00142
00143 private:
00144 int ComputeToken(int, sentence::iterator &, sentence &);
00145 void ResetActions();
00146 void StateActions(int, int, int, sentence::const_iterator);
00147 void SetMultiwordAnalysis(sentence::iterator, int);
00148
00149 public:
00151 dates_ca();
00152 };
00153
00154
00159
00160 class dates_en : public dates_module {
00161
00162 private:
00163 int ComputeToken(int, sentence::iterator &, sentence &);
00164 void ResetActions();
00165 void StateActions(int, int, int, sentence::const_iterator);
00166 void SetMultiwordAnalysis(sentence::iterator, int);
00167
00169 std::map<std::string,int> numDay;
00170
00171 public:
00173 dates_en();
00174 };
00175
00176
00177 #endif
00178