dates_modules.h

Go to the documentation of this file.
00001 
00003 //
00004 //    FreeLing - Open Source Language Analyzers
00005 //
00006 //    Copyright (C) 2004   TALP Research Center
00007 //                         Universitat Politecnica de Catalunya
00008 //
00009 //    This library is free software; you can redistribute it and/or
00010 //    modify it under the terms of the GNU General Public
00011 //    License as published by the Free Software Foundation; either
00012 //    version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //    This library is distributed in the hope that it will be useful,
00015 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //    General Public License for more details.
00018 //
00019 //    You should have received a copy of the GNU General Public
00020 //    License along with this library; if not, write to the Free Software
00021 //    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00022 //
00023 //    contact: Lluis Padro (padro@lsi.upc.es)
00024 //             TALP Research Center
00025 //             despatx C6.212 - Campus Nord UPC
00026 //             08034 Barcelona.  SPAIN
00027 //
00029 
00030 #ifndef _DATES_MOD
00031 #define _DATES_MOD
00032 
00033 #include <map>
00034 
00035 #include "fries/language.h"
00036 #include "freeling/automat.h"
00037 #include "regexp-pcre++.h"
00038 
00039 // Date/time regular expressions definitions
00040 
00041 #define RE_ROMAN "^([IVXLCDM]+)$"
00042 
00043 // Default:
00044 #define RE_DATE_DF "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d)))/)(\\d{1,4}))$"
00045 #define RE_TIME1_DF "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:min|m)?)?)$"
00046 #define RE_TIME2_DF "^(?:((?:[0-5])?(?:\\d))(?:min\\.|min|m\\.|m))$"
00047 
00048 // Spanish:
00049 #define RE_DATE_ES "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d))|enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre|ene|feb|mar|abr|may|jun|jul|ago|sep|oct|nov|dic)/)(\\d{1,4}))$"
00050 #define RE_TIME1_ES "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:minutos|min|m)?)?)$"
00051 #define RE_TIME2_ES "^(?:((?:[0-5])?(?:\\d))(?:minutos|min\\.|min|m\\.|m))$"
00052 
00053 // Catalan:
00054 #define RE_DATE_CA "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d))|gener|febrer|març|abril|maig|juny|juliol|agost|setembre|octubre|novembre|desembre|gen|feb|mar|abr|mai|jun|jul|ago|set|oct|nov|des)/)(\\d{1,4}))$"
00055 #define RE_TIME1_CA "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:minuts|min|m)?)?)$"
00056 #define RE_TIME2_CA "^(?:((?:[0-5])?(?:\\d))(?:minuts|min\\.|min|m\\.|m))$"
00057 
00058 // English:
00059 #define RE_DATE_EN "^(?:(?:((?:[0-3])?(?:\\d))/)(?:((?:(?:[0-1])?(?:\\d))|january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/)(\\d{1,4}))$"
00060 #define RE_TIME1_EN "^(?:((?:(?:[0-1])?(?:\\d))|(?:2(?:[01234])))(?:h|:)(?:((?:[0-5])?(?:\\d))(?:minutes|min|m)?)?)$"
00061 #define RE_TIME2_EN "^(?:((?:[0-5])?(?:\\d))(?:minutes|min\\.|min|m\\.|m))$"
00062 
00063 
00068 
00069 class dates_module: public automat {
00070 
00071    protected:
00073       std::map<std::string,int> nMes;
00075       std::map<std::string,std::string> nDia;
00077       std::string century,year,month,day,weekday,hour,minute,meridian;
00079       int temp;
00080       int sign;  // for the catalan "un quart menys(-1)/i(1) cinc de sis" or 
00081                  //  fot English: a quarter to(-1)/past(1) five.
00082       int daytemp; // for special state Gbb in English
00083       bool inGbb; 
00085       std::map<std::string,int> tok;
00086 
00087       // required regular expressions objects
00088       RegEx RE_Date;
00089       RegEx RE_Time1;
00090       RegEx RE_Time2;
00091       RegEx RE_Roman;
00092 
00093    public:
00095       dates_module(const std::string &, const std::string &, const std::string &, const std::string &);
00096 };
00097 
00098 
00103 
00104 class dates_default : public dates_module {
00105 
00106    private:
00107       int ComputeToken(int, sentence::iterator &, sentence &);
00108       void ResetActions();
00109       void StateActions(int, int, int, sentence::const_iterator);
00110       void SetMultiwordAnalysis(sentence::iterator, int);
00111 
00112    public:
00114       dates_default();
00115 };
00116 
00121 
00122 class dates_es : public dates_module {
00123 
00124    private:
00125       int ComputeToken(int, sentence::iterator &, sentence &);
00126       void ResetActions();
00127       void StateActions(int, int, int, sentence::const_iterator);
00128       void SetMultiwordAnalysis(sentence::iterator, int);
00129 
00130    public:
00132       dates_es();
00133 };
00134 
00135 
00140 
00141 class dates_ca : public dates_module {
00142 
00143    private:
00144       int ComputeToken(int, sentence::iterator &, sentence &);
00145       void ResetActions();
00146       void StateActions(int, int, int, sentence::const_iterator);
00147       void SetMultiwordAnalysis(sentence::iterator, int);
00148 
00149    public:
00151       dates_ca();
00152 };
00153 
00154 
00159 
00160 class dates_en : public dates_module {
00161 
00162  private:
00163   int ComputeToken(int, sentence::iterator &, sentence &);
00164   void ResetActions();
00165   void StateActions(int, int, int, sentence::const_iterator);
00166   void SetMultiwordAnalysis(sentence::iterator, int);
00167 
00169   std::map<std::string,int> numDay;
00170 
00171  public:
00173   dates_en();
00174 };
00175 
00176 
00177 #endif
00178 
Generated on Tue Jul 27 16:29:25 2010 for FreeLing by  doxygen 1.6.3