Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

pcre++.cc

Go to the documentation of this file.
00001 /*
00002  *
00003  *  $Id: pcre++.cc,v 1.2 2002/01/02 01:25:30 zarahg Exp $
00004  * 
00005  *  This file  is part of the  NABOU  Intrusion Detection System.
00006  *
00007  *  By  accessing  this software,  NABOU, you  are  duly informed
00008  *  of and agree to be  bound by the  conditions  described below
00009  *  in this notice:
00010  *
00011  *  This software product,  NABOU,  is developed by Thomas Linden
00012  *  and   copyrighted (C) 1999-2002   by  Thomas Linden, with all
00013  *  rights reserved.
00014  *
00015  *  There  is no charge for NABOU software.  You can redistribute
00016  *  it and/or modify it under the terms of the GNU General Public
00017  *  License, which is incorporated by reference herein.
00018  *
00019  *  NABOU is distributed WITHOUT ANY WARRANTY, IMPLIED OR EXPRESS,
00020  *  OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE or that
00021  *  the use of it will not infringe on any third party's intellec-
00022  *  tual property rights.
00023  *
00024  *  You should have received a copy of the GNU General Public
00025  *  License along with NABOU.  Copies can also be obtained from:
00026  *
00027  *    http://www.gnu.org/copyleft/gpl.html
00028  *
00029  *  or by writing to:
00030  *
00031  *  Free Software Foundation, Inc.
00032  *  59 Temple Place, Suite 330
00033  *  Boston, MA 02111-1307
00034  *  USA
00035  *
00036  *  Or contact:
00037  *
00038  *   "Thomas Linden" <tom@nabou.org>
00039  *
00040  *
00041  */
00042 
00043 #include "pcre++.h"
00044 
00045 Pcre::Pcre(const string& expression) {
00046   _expression   = expression;
00047   Compile(0);
00048 }
00049 
00050 Pcre::Pcre(const string& expression, const string& flags) {
00051   _expression   = expression;
00052   unsigned int FLAG = 0;
00053 
00054   for(unsigned int flag=0; flag<flags.length(); flag++) {
00055     switch(flags[flag]) {
00056     case 'i': FLAG |= PCRE_CASELESS;  break;
00057     case 'm': FLAG |= PCRE_MULTILINE; break;
00058     case 's': FLAG |= PCRE_DOTALL;    break;
00059     case 'x': FLAG |= PCRE_EXTENDED;  break;
00060     }
00061   }
00062 
00063   _flags = FLAG;
00064 
00065   Compile(FLAG);
00066 }
00067 
00068 Pcre::Pcre(Pcre &P) {
00069   _expression = P._expression;
00070   _flags      = P._flags;
00071   Compile(_flags);
00072 }
00073 
00074 void Pcre::Compile(int flags) {
00075   p_pcre_extra = NULL;
00076   p_pcre       = NULL;
00077   p_pcre       = pcre_compile((char *)_expression.c_str(), flags,
00078                               (const char **)(&err_str), &erroffset, NULL);
00079 
00080   if(p_pcre == NULL) {
00081     /* umh, that's odd, the parser should not fail at all */
00082     string Error = err_str;
00083     throw PcreException("pcre_compile(..) failed: " + Error);
00084   }
00085 
00086   /* calculate the number of substrings we are willing to catch */
00087   int where;
00088   int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00089   if(info == 0) {
00090     sub_len = (where +2) * 3; /* see "man pcre" for the exact formula */
00091   }
00092   else {
00093     throw PcreException(info);
00094   }
00095   did_match = false;
00096   num_matches = -1;
00097 }
00098 
00099 const Pcre& Pcre::operator = (const string& expression) {
00100   reset();
00101   Pcre *pcre = new Pcre(expression);
00102   return *pcre;
00103 }
00104 
00105 Pcre::~Pcre() {
00106   pcre_free(p_pcre);
00107   delete sub_vec;
00108   if(num_matches > 0) /* avoid deleting of uninitialized pointer */
00109     delete resultset;
00110 }
00111 
00112 void Pcre::reset() {
00113   did_match   = false;
00114   num_matches = -1;
00115 }
00116 
00117 bool Pcre::search(const string& stuff, int OffSet) {
00118   return dosearch(stuff, OffSet);
00119 }
00120 
00121 bool Pcre::search(const string& stuff) {
00122   return dosearch(stuff, 0);
00123 }
00124 
00125 bool Pcre::dosearch(const string& stuff, int OffSet) {
00126   reset();
00127   sub_vec = new int[sub_len];
00128   int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00129                         (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00130 
00131   if(num < 0) {
00132     /* no match at all */
00133     return false;
00134   }
00135   else if(num == 0) {
00136     /* vector too small, there were too many substrings in stuff */
00137     return false;
00138   }
00139   else if(num == 1) {
00140     /* we had a match, but without substrings */
00141     did_match = true;
00142     num_matches = 0;
00143     return true;
00144   }
00145   else if(num > 1) {
00146     /* we had matching substrings */
00147     resultset = new ResultSet;
00148     const char **stringlist;
00149     did_match = true;
00150     num_matches = num - 1;
00151 
00152     int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00153     if(res == 0) {
00154       for(int i=1; i<num; i++) {
00155         resultset->push_back(stringlist[i]);
00156       }
00157       pcre_free_substring_list(stringlist);
00158     }
00159     else {
00160       throw PcreException(res);
00161     }
00162     return true;
00163   }
00164   else {
00165     /* some other uncommon error occured */
00166     return false;
00167   }
00168 }
00169 
00170 ResultSet* Pcre::get_sub_strings() {
00171   if(resultset != NULL)
00172     return resultset;
00173   else
00174     return NULL;
00175 }
00176 
00177 string Pcre::get_match(int pos) {
00178   if(pos >= 0 && pos <= num_matches) {
00179     ResultIterator P = resultset->begin() + pos;
00180     return *P;
00181   }
00182   else {
00183     throw PcreException("out of range");
00184   }
00185 }
00186 
00187 int Pcre::get_match_start(int pos) {
00188   if(pos >= 0 && pos <= num_matches) {
00189     /*
00190      * sub_vec[0] and [1] is the start/end of the entire string.
00191      */
00192     return sub_vec[ (++pos) * 2 ];
00193   }
00194   else {
00195     throw PcreException("out of range");
00196   }  
00197 }
00198 
00199 int Pcre::get_match_end(int pos) {
00200   if(pos >= 0 && pos <= num_matches) {
00201     /*
00202      * the end offset of a subpattern points to
00203      * the first offset of the next substring,
00204      * therefore -1
00205      */
00206     return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00207   }
00208   else {
00209     throw PcreException("out of range");
00210   }
00211 }
00212 
00213 /*
00214  * Exception class
00215  */
00216 PcreException::PcreException(int num) {
00217   errnum  = num;
00218   string msg;
00219   switch(num) {
00220   case -1: msg = "PCRE_ERROR_NOMATCH";      break;
00221   case -2: msg = "PCRE_ERROR_NULL";         break;
00222   case -3: msg = "PCRE_ERROR_BADOPTION";    break;
00223   case -4: msg = "PCRE_ERROR_BADMAGIC";     break;
00224   case -5: msg = "PCRE_ERROR_UNKNOWN_NODE"; break;
00225   case -6: msg = "PCRE_ERROR_NOMEMORY";     break;
00226   case -7: msg = "PCRE_ERROR_NOSUBSTRING";  break;
00227   }
00228   message = "PCRE error: " + msg;
00229   _name = "PcreException";
00230 }
00231 
00232 PcreException::PcreException(const string& msg) {
00233   message = msg;
00234   _name = "PcreException";
00235 }

Generated at Fri Jan 4 03:57:03 2002 for PCRE++ by doxygen1.2.6 written by Dimitri van Heesch, © 1997-2001