src/bregex.c
/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following functions.
- error
- hdigit
- tok
- token
- skiptok
- classtok
- skipclass
- puteps
- putneps
- bseti
- skiplen
- initbitm
- initsimp
- psimp
- dec
- skipend
- pnorm
- pnorm
- por
- findandrep
- cstack
- cstack
- fillout
- initjtable
- regcomp
- cleantail
- regexec
- copyclass
- regcopy
- regfree
- catlim
- regerror
/************************************************************************
* bregex -- The `Better/BuGless/Berg' Regular Expression library *
* *
* Copyright (c) 1991-1996, by S.R. van den Berg, The Netherlands *
* *
* See bregex.h for more info. *
* *
* You are not required to understand the sources in order to *
* use them (I suspect not many people could use them in that *
* case). You can, however, admire them as a work of art :-). *
************************************************************************/
#ifdef RCS
static const char rcsid[]=
"$Id: bregex.c,v 1.2 2002/03/24 13:39:12 proff Exp $";
#endif
#include <sys/types.h> /* size_t */
#include <stddef.h> /* offsetof() */
#ifndef RE_nCHAR_CLASS
#include <string.h> /* strncmp() */
#ifdef RE_nCTYPE
#undef RE_nCTYPE
#endif
#else
#ifndef RE_nERROR_REPORT
#include <string.h> /* strlen() */
#else
#ifdef RE_MEMCHR
#include <string.h> /* memchr() */
#endif
#endif
#endif
#ifndef RE_nCTYPE
#include <ctype.h>
#endif
#define BREGEX_C_
#include "bregex.h"
#ifndef offsetof
#define offsetof(s,m) ((char*)&((s*)sizeof(s))->m-(char*)sizeof(s))
#endif
#define ioffsetof(struc,memb) ((int)offsetof(struc,memb))
#define maxindex(a) (sizeof(a)/sizeof((a)[0])-1)
#define STRLEN(a) (sizeof(a)-1)
#define mx(a,b) ((a)>(b)?(a):(b))
typedef unsigned char uschar;
#define uchar uschar
#ifdef RE_nCTYPE
#define islower(c) ((unsigned)(c)-'a'<='z'-'a')
#define isupper(c) ((unsigned)(c)-'A'<='Z'-'A')
#define isprint(c) ((unsigned)(c)-' '<='~'-' ') /* ASCII dependent */
#define tolower(c) ((c)-'A'+'a')
#define toupper(c) ((c)+'A'-'a')
#define TO_LOWER(i) (isupper(i)&&((i)+='a'-'A'))
#else
#ifndef const
#define TO_LOWER(i) ((i)=Tolower(i))
#else
#define TO_LOWER(i) (isupper(i)&&((i)=Tolower(i)))
#endif /* const */
#endif /* RE_nCTYPE */
#define STARTSTACK 16 /* needed stacksize will solely depend */
#define INCSTACK 16 /* on the length of the match */
#define BITS_P_CHAR 8 /* if you change this, everything goes with it */
#define OPB (1<<BITS_P_CHAR) /* except the \x00 and */
/* \000 encodings */
#define C_BEG_GROUP '(' /* if you prefer different symbols */
#define C_OR '|' /* here is your chance */
#define C_END_GROUP ')'
#define C_0_OR_MORE '*'
#define C_0_OR_1 '?'
#define C_1_OR_MORE '+'
#define C_DOT '.'
#define C_BOL '^'
#define C_EOL '$'
#define C_BEG_NUMBER '{'
#define C_SEP_NUMBER ','
#define C_END_NUMBER '}'
#define C_BEG_CLASS '['
#define C_BEG_CCLASS '['
#define C_DEL_CCLASS ':'
#define C_END_CCLASS ']'
#define C_NOT_CLASS '^'
#define C_RANGE '-'
#define C_END_CLASS ']'
#define C_ESCAPE '\\'
#ifndef RE_nCHAR_CLASS /* The order has to correspond with R_CC_* */
static const char*const cclass[]={"alnum","alpha","blank","cntrl","digit",
"graph","lower","print","punct","space","upper","xdigit",0};
#endif /* the opcodes of the non-deterministic automaton */
/* below OPB are the regular characters */
#define OPC_CLASS OPB /* a bitmapped character class */
#define OPC_DOT (OPB+1) /* any character */
#define OPC_BOL (OPB+2) /* beginning of line */
#define OPC_EOL (OPB+3) /* end of line */
#define OPC_EPS (OPB+4) /* epsilon-transition (fork) */
#define OPC_FIN (OPB+5) /* finish */
#define OPC_GOP (OPB+6) /* group open */
#define OPC_GCL (OPB+7) /* group close */
/* Don't change any opcode here without checking skplen[] */
#define R_BEG_GROUP (OPB|C_BEG_GROUP) /* token values */
#define R_OR (OPB|C_OR)
#define R_END_GROUP (OPB|C_END_GROUP)
#define R_0_OR_MORE (OPB|C_0_OR_MORE)
#define R_0_OR_1 (OPB|C_0_OR_1)
#define R_1_OR_MORE (OPB|C_1_OR_MORE)
#define R_DOT (OPB|C_DOT)
#define R_BOL (OPB|C_BOL)
#define R_EOL (OPB|C_EOL)
#define R_BEG_NUMBER (OPB|C_BEG_NUMBER)
#define R_END_NUMBER (OPB|C_END_NUMBER)
#define R_BEG_CLASS (OPB|C_BEG_CLASS)
#define R_NOT_CLASS (OPB|C_NOT_CLASS)
#define R_RANGE (OPB|C_RANGE)
#define R_END_CLASS (OPB|C_END_CLASS)
#define R_CC_BASE (2*OPB)
#define R_CC_alnum (R_CC_BASE)
#define R_CC_alpha (R_CC_BASE+1)
#define R_CC_blank (R_CC_BASE+2)
#define R_CC_cntrl (R_CC_BASE+3)
#define R_CC_digit (R_CC_BASE+4)
#define R_CC_graph (R_CC_BASE+5)
#define R_CC_lower (R_CC_BASE+6)
#define R_CC_print (R_CC_BASE+7)
#define R_CC_punct (R_CC_BASE+8)
#define R_CC_space (R_CC_BASE+9)
#define R_CC_upper (R_CC_BASE+10)
#define R_CC_xdigit (R_CC_BASE+11)
#define R_CC_nothing (R_CC_BASE+12) /* has to be the last R_CC_* */
#define bit_type unsigned
#define bit_bits (sizeof(bit_type)*BITS_P_CHAR)
#define bit_index(which) ((unsigned)(which)/bit_bits)
#define bit_mask(which) ((unsigned)1<<(unsigned)(which)%bit_bits)
#define bit_toggle(name,which) (name[bit_index(which)]^=bit_mask(which))
#define bit_test(name,which) (!!(name[bit_index(which)]&bit_mask(which)))
#define bit_set(name,which,value) \
(value?(name[bit_index(which)]|=bit_mask(which)):\
(name[bit_index(which)]&=~bit_mask(which)))
#define bit_mindex(size) (((size)+bit_bits-1)/bit_bits)
#define bit_field(name,size) bit_type name[bit_mindex(size)]
#ifdef RE_nEXTENDED
#ifdef RE_WILDCARDS
#undef RE_nEXTENDED
#endif
#endif
#ifdef RE_EMPTY_OR
#ifdef RE_SURE_MINIMAL
#undef RE_SURE_MINIMAL
#endif
#endif
#ifndef RE_nERROR_DETECT
#ifdef RE_SURE_MINIMAL
#undef RE_SURE_MINIMAL
#endif
#endif
#ifdef RE_HEX_CHAR
#define HDIGIT
#define BACKSL_CLASS
#else
#ifdef RE_OCT_CHAR
#define HDIGIT
#define BACKSL_CLASS
#else
#ifndef RE_nBRACES
#define HDIGIT
#endif
#ifdef RE_SPEC_CHAR
#define BACKSL_CLASS
#else
#ifdef RE_NULL_CHAR
#define BACKSL_CLASS
#endif
#endif
#endif
#endif
#ifdef HDIGIT
#define HEX2NUM
#else
#ifndef RE_nERROR_REPORT
#define HEX2NUM
#endif
#endif
#ifdef RE_nICASE
#ifndef RE_nCASE_TABLE
#define RE_nCASE_TABLE
#endif
#undef REG_ICASE
#define REG_ICASE 0
#undef TO_LOWER
#define TO_LOWER(i) (i)
#endif /* RE_nICASE */
#ifndef RE_nCASE_TABLE
#define Tolower(c) (igncase[c])
#undef TO_LOWER
#define TO_LOWER(i) ((i)=Tolower(i))
#else /* RE_nCASE_TABLE */
#define Tolower(c) tolower(c)
#endif /* RE_nCASE_TABLE */
#ifndef isblank
#define isblank(c) ((c)==' '||(c)=='\t')
#endif
#define SZ(x) (sizeof(struct x))
#define Ceps (struct eps*)
#define geno(to,add) ((char*)(to)+(add))
#define epso(to,add) (Ceps geno(to,add))
#define dif(high,low) ((char*)(high)-(char*)(low))
#define Jtable(p) (((struct aligar*)(p))->table)
#define SKIP_TOK (1<<0)
#define CLASS_TOK (1<<1)
#define ii (aleps.topc)
#define kk (aleps.tnext)
#define jj (aleps.ua.jjua)
#define jjp (aleps.ua.tspawn)
#define spawn sp.awn
struct eps{unsigned opc;struct eps*stack;
union evu{struct eps*awn;void*oid;}sp;struct eps*next;};
struct ALEPS{unsigned topc;struct eps*tnext;
union everyt{struct eps*tspawn;unsigned jjua;void*Irrelevoid;}ua;};
#ifdef RE_nCONCUR_COMPILE /* some compilation temporaries */
static struct eps*r; /* target object pointer */
static uchar*p; /* regexp source pointer */
static regex_t*gpreg; /* regex_t reference */
static struct ALEPS aleps; /* general purpose aligner and scratch variables */
static uchar*cachea,*cachep; /* cache at, cache resulting p */
static size_t cacher; /* cache resulting delta r */
static errorno; /* last error found */
#ifdef RE_NSUBMAX
static mnmatch; /* REG_NSUBMAX count cache */
#endif
#define VSO
#define VS
#define VSA
#define VSD
#else /* RE_nCONCUR_COMPILE */
#define VSO vs /* to avoid static variables, use a */
#define VS VSO, /* fake struct that is passed all the */
#define VSS struct Vs*const
#define VSA VSS VSO,
#define VSD VSS VSO; /* way down the compilation functions */
struct Vs{struct eps*r_;uchar*p_;regex_t*gpreg_;struct ALEPS aleps_;
uchar*cachea_,*cachep_;size_t cacher_;int errorno_;
#ifdef RE_NSUBMAX
int mnmatch_;
#endif
};
#define r (vs->r_)
#define p (vs->p_)
#define gpreg (vs->gpreg_)
#define aleps (vs->aleps_)
#define cachea (vs->cachea_)
#define cachep (vs->cachep_)
#define cacher (vs->cacher_)
#define errorno (vs->errorno_)
#define mnmatch (vs->mnmatch_)
#endif /* RE_nCONCUR_COMPILE */
#ifndef RE_nCASE_TABLE
static uchar bothcase[OPB],igncase[OPB];
#endif
struct mchar{unsigned opcc_;struct eps*next1_;
struct evoi{struct eps*st_;const void*wh_;}p1_,p2_;};
struct chclass{unsigned opcc;struct eps*next1;struct evoi pos1,pos2;
bit_field(c,OPB);};
struct gop{unsigned opcgop;struct eps*gopnext;
union{unsigned nr;void*IRrelevoid;}gopp;};
#define gcl gop
#ifdef RE_nJUMP_TABLE
struct aligar{unsigned aopc;bit_field(table,OPB);};
#else /* RE_nJUMP_TABLE */
#ifdef RE_SMALL_JUMP_TABLE
typedef uchar jt_type;
#else
typedef unsigned jt_type;
#endif
struct aligar{unsigned aopc;jt_type table[OPB];};
#endif /* RE_nJUMP_TABLE */
/* lenght array, used by skiplen() */
static const char skplen[]={SZ(chclass),SZ(mchar),SZ(mchar),SZ(mchar),SZ(eps),
0
#ifndef RE_pNOSUB
,SZ(gop),SZ(gcl)
#endif
};
#ifndef RE_nERROR_DETECT
static void error(VS nr)VSD const unsigned nr;
/* [<][>][^][v][top][bottom][index][help] */
{ if(!errorno||(uchar*)gpreg->re_es.re_stack>p) /* no error yet? earlier? */
errorno=nr,gpreg->re_es.re_stack=p; /* ok, save it for posterity */
}
#else
#define error(VS nr)
#endif
#ifdef HEX2NUM
static const char hex2num[]="0123456789abcdef"; /* to get rid of ASCII */
#endif
#ifdef HDIGIT
static int hdigit(d) /* returns 16 if non-xdigit */
/* [<][>][^][v][top][bottom][index][help] */
{ register const char*chp;
TO_LOWER(d);
for(chp=hex2num-1;*++chp&&*chp!=d;);
return(int)(chp-hex2num);
}
#endif
static unsigned tok(VS type)VSD const int type; /* get token */
/* [<][>][^][v][top][bottom][index][help] */
{ register size_t i= *p;
#ifndef RE_nEXTENDED
#ifndef RE_pEXTENDED
register unsigned extended=gpreg->re_flags®_EXTENDED; /* cache it */
#endif
#endif
#ifdef RE_WILDCARDS
register unsigned wildcards=gpreg->re_flags®_mWILDCARDS; /* cache it */
#endif
if(type&CLASS_TOK) /* inside a class? */
{ switch(i)
{ case '\0':goto case_0;
case C_NOT_CLASS:case C_RANGE:case C_END_CLASS:goto o_case;
#ifdef BACKSL_CLASS
case C_ESCAPE:goto e_case;
#endif
#ifndef RE_nCHAR_CLASS
case C_BEG_CCLASS:
if(p[1]==C_DEL_CCLASS)
{ ;{ const uchar*cc=p+1;
do /* find the other delimiter */
if(!*++cc)
{ error(VS REG_ECTYPE);break; /* oops, end of string */
}
while(*cc!=C_DEL_CCLASS||cc[1]!=C_END_CCLASS);
i=cc-p-2; /* how long was it now? */
}
;{ const char*const*ccp; /* do we have that name here? */
for(ccp=cclass;strncmp(*ccp,(char*)p+2,i)||(*ccp)[i];)
if(!*++ccp)
{ error(VS REG_ECTYPE);break; /* oops, past last name */
}
if(p[i+=2]&&p[++i]) /* carefully sense the end */
i++;
if(type&SKIP_TOK)
p+=i; /* actually skip it now */
return R_CC_BASE+ccp-cclass;
}
}
#endif
}
}
switch(i)
{ case '\0':
case_0: return R_END_GROUP;
case C_ESCAPE:
#ifdef BACKSL_CLASS
e_case:
#endif
switch(i=p[1])
{ case '\0':i=C_ESCAPE;error(VS REG_EESCAPE);goto once;
#ifdef RE_HEX_CHAR
case 'x': /* \x00 */
if(hdigit(i)<16)
{ i=hdigit(i);
if(hdigit(p[3])<16)
{ i=(i<<4)+hdigit(p[3]);
#ifdef RE_OCT_CHAR
goto skip2;
}
else
goto skip1;
#else
if(type&SKIP_TOK)
p+=2;
}
else if(type&SKIP_TOK)
p++;
#endif
}
break;
#endif
#ifdef RE_OCT_CHAR
default: /* \000 */
if(hdigit(i)<8)
{ i=hdigit(i);
if(hdigit(p[2])<8)
{ i=(i<<3)+hdigit(p[2]);
if(hdigit(p[3])<8)
{ i=(i<<3)+hdigit(p[3]);
skip2: if(type&SKIP_TOK)
p+=2;
}
else
skip1: if(type&SKIP_TOK)
p++;
}
}
break;
#else /* RE_OCT_CHAR */
#ifdef RE_NULL_CHAR
case '0':i='\0';break;
#endif
#endif /* RE_OCT_CHAR */
#ifdef RE_SPEC_CHAR
case 'a':i='\a';break;
case 'b':i='\b';break;
case 'f':i='\f';break;
case 'n':i='\n';break;
case 'r':i='\r';break;
case 't':i='\t';break;
case 'v':i='\v';break;
#endif
#ifndef RE_pEXTENDED
case C_0_OR_1:case C_1_OR_MORE:case C_BEG_GROUP:case C_OR:
case C_END_GROUP:
#ifndef RE_nBRACES
case C_BEG_NUMBER:case C_END_NUMBER:
#endif
#ifndef RE_nEXTENDED
if(!extended)
#endif
i|=OPB;
#endif /* RE_pEXTENDED */
}
if(type&SKIP_TOK)
p++;
break;
#ifndef RE_nEXTENDED
case C_BEG_GROUP:case C_OR:case C_END_GROUP:
#ifdef RE_WILDCARDS
if(!extended)
{ break;
#endif
case C_0_OR_1:
#ifdef RE_WILDCARDS
if(wildcards&&!(type&CLASS_TOK))
{ i=R_DOT;break;
}
#endif
case C_1_OR_MORE:
#ifndef RE_nBRACES
case C_BEG_NUMBER:case C_END_NUMBER:
#endif
#ifndef RE_pEXTENDED
if(!extended)
break;
#endif
#endif /* RE_nEXTENDED */
case C_DOT:
#ifdef RE_WILDCARDS
if(wildcards)
break;
#endif
#ifdef RE_WILDCARDS
}
#endif
case C_0_OR_MORE:case C_BOL:case C_EOL:case C_BEG_CLASS:
if(!(type&CLASS_TOK))
o_case: i|=OPB;
}
once:
if(type&SKIP_TOK) /* skip it as well? */
p++;
return i; /* return token found */
}
static unsigned token(VSO)VSD /* normal token */
/* [<][>][^][v][top][bottom][index][help] */
{ return tok(VS 0);
}
static unsigned skiptok(VSO)VSD /* skip normal token */
/* [<][>][^][v][top][bottom][index][help] */
{ return tok(VS SKIP_TOK);
}
static unsigned classtok(VSO)VSD /* token in class */
/* [<][>][^][v][top][bottom][index][help] */
{ return tok(VS CLASS_TOK);
}
static unsigned skipclass(VSO)VSD /* skip token in class */
/* [<][>][^][v][top][bottom][index][help] */
{ return tok(VS CLASS_TOK|SKIP_TOK);
}
/* epsilon transition */
static void puteps(spot,to,aswell)struct eps*const spot;
/* [<][>][^][v][top][bottom][index][help] */
const struct eps*const to,*const aswell;
{ spot->opc=OPC_EPS;spot->next=to!=spot?Ceps to:Ceps aswell;
spot->spawn=aswell!=spot?Ceps aswell:Ceps to;
#ifndef RE_SURE_MINIMAL
spot->stack=0; /* zero stack; cstack(), the optimiser, needs this! */
#endif
}
static void putneps(spot,to)struct eps*const spot;const struct eps*const to;
/* [<][>][^][v][top][bottom][index][help] */
{ puteps(spot,to,spot+1); /* epsilon transition, one branch to the next */
}
#define Cc(p,memb) (((struct chclass*)(p))->memb)
#define Go(p,memb) (((struct gop*)(p))->memb)
#define Gc(p,memb) (((struct gcl*)(p))->memb)
#define rAc Cc(r,c)
static void bseti(VS fld,i,j)VSD bit_field(fld,OPB);unsigned i;
/* [<][>][^][v][top][bottom][index][help] */
const unsigned j;
{ bit_set(fld,i,j); /* mark 'i' as being in the class */
if(gpreg->re_flags®_ICASE) /* mark the other case too */
{ if(islower(i)) /* lowercase */
i=toupper(i);
else
TO_LOWER(i); /* uppercase */
bit_set(fld,i,j);
}
}
/* general purpose length routine */
static struct eps*skiplen(ep)const struct eps*const ep;
/* [<][>][^][v][top][bottom][index][help] */
{ return epso(ep,ep->opc<OPB?SZ(mchar):skplen[ep->opc-OPB]);
}
static void initbitm(fld,reset)bit_type*fld;const unsigned reset;
/* [<][>][^][v][top][bottom][index][help] */
{ register unsigned i=bit_mindex(OPB);
do fld[--i]=reset?0:~(bit_type)0; /* preset the bit field */
while(i);
}
static void initsimp(rp,e)struct eps*rp,*e;
/* [<][>][^][v][top][bottom][index][help] */
{ Cc(rp,next1)=e;Cc(rp,pos1.st_)=Cc(rp,pos2.st_)=0; /* init thiss & other */
#ifndef RE_nJUMP_TABLE /* stacks to 0 (regexec() & initjtable() need this) */
Cc(rp,pos2.wh_)=0;
#endif
}
/* we need one forward declaration to allow recursion */
static int por P((VSA const struct eps*const e));
#define IN_CLASS(CLASS,class) \
case CLASS:\
if(class(cc))\
break;\
continue
static void psimp(VS e)VSD const struct eps*e; /* put a simple element */
/* [<][>][^][v][top][bottom][index][help] */
{ register union{unsigned uns;struct eps*epp;}u; /* save stack space */
register union{unsigned i;void*pold;}v; /* for the recursion */
switch(token(VSO))
{ case R_BEG_GROUP:skiptok(VSO); /* not so simple after all */
#ifndef RE_pNOSUB /* are we exceeding REG_NSUBMAX? */
if(!e||gpreg->re_flags®_NOSUB
#ifdef RE_NSUBMAX
||gpreg->re_flags®_NSUBMAX&&gpreg->re_nsub>=mnmatch
#endif
)
{ if(por(VS e))
error(VS REG_EPAREN);
#ifndef RE_nBRACES /* we need these fillers, identical subexpressions */
if(e) /* must have the same lengths (pnorm() depends on it) */
r->opc=OPC_GOP; /* also, bogus opcodes are not allowed */
r=epso(r,SZ(gop)); /* e.g. findandrep() depends on it */
if(!e)
goto no_e;
#else
r=epso(r,SZ(gop)+SZ(gcl));return;
#endif
}
else /* insert the numbered paren pairs */
{ (u.epp=r)->opc=OPC_GOP;r=epso(u.epp,SZ(gop));
u.epp->stack=epso(u.epp,SZ(gop)); /* remember the spot of the ( */
Go(u.epp,gopp.nr)= ++gpreg->re_nsub;v.pold=p;por(VS Ceps 0);
p=v.pold;r->stack=Ceps e;Gc(e=r,gopp.nr)=Go(u.epp,gopp.nr);
r=epso(u.epp,SZ(gop)); /* match up the ) with the ( */
if(por(VS e)) /* insert the meat */
error(VS REG_EPAREN);
}
r->opc=OPC_GCL;
no_e: r=epso(r,SZ(gcl));return;
#else /* RE_pNOSUB */
if(por(VS e))
error(VS REG_EPAREN);
return;
#endif /* RE_pNOSUB */
case R_BEG_CLASS: /* a simple class */
skiptok(VSO);u.uns=C_NOT_CLASS!=*p; /* `not' modifier? */
if(e)
r->opc=OPC_CLASS,initsimp(r,Ceps e),initbitm(rAc,u.uns);
if(!u.uns) /* skip the `not' modifier */
{ p++;
#ifndef RE_nNEWLINE
#ifdef RE_pNEWLINE
if(e)
#else
if(e&&gpreg->re_flags®_NEWLINE) /* newlines are special */
#endif
bit_toggle(rAc,'\n');
#endif
}
if(*p==C_END_CLASS) /* right at the start, cannot mean the end */
{ p++;
if(e)
v.i=C_END_CLASS,bit_toggle(rAc,C_END_CLASS);
}
else if(*p==C_RANGE) /* take it literally */
{ p++;
if(e)
v.i=C_RANGE,bit_toggle(rAc,C_RANGE);
}
for(;;skipclass(VSO)) /* skip through the class */
{ switch(classtok(VSO))
{ case R_END_GROUP:error(VS REG_EBRACK);
case R_END_CLASS:skipclass(VSO);r=epso(r,SZ(chclass));
return;
#ifndef RE_nCHAR_CLASS
case R_CC_alnum:case R_CC_alpha:case R_CC_blank:case R_CC_cntrl:
case R_CC_digit:case R_CC_graph:case R_CC_lower:case R_CC_print:
case R_CC_punct:case R_CC_space:case R_CC_upper:case R_CC_xdigit:
if(e)
{ unsigned cc;
v.i=classtok(VSO);cc=OPB-1;
do
{ switch(v.i)
{ IN_CLASS(R_CC_alnum,isalnum);
IN_CLASS(R_CC_alpha,isalpha);
IN_CLASS(R_CC_blank,isblank);
IN_CLASS(R_CC_cntrl,iscntrl);
IN_CLASS(R_CC_digit,isdigit);
IN_CLASS(R_CC_graph,isgraph);
IN_CLASS(R_CC_lower,islower);
IN_CLASS(R_CC_print,isprint);
IN_CLASS(R_CC_punct,ispunct);
IN_CLASS(R_CC_space,isspace);
IN_CLASS(R_CC_upper,isupper);
default:
if(!isxdigit(cc))
continue;
} /* it's in the character class */
bseti(VS rAc,cc,u.uns); /* so mark it */
}
while(cc--); /* walk through the whole alphabet */
}
case R_CC_nothing:v.i=OPB;continue; /* erroneous char class */
#endif
case R_RANGE:p++;
switch(classtok(VSO))
{ default:
if(e) /* mark all in range */
{ while(++v.i<(classtok(VSO)&(OPB-1))) /* careful */
bseti(VS rAc,v.i&(OPB-1),u.uns); /* with mask */
#ifndef RE_nERROR_DETECT /* to stay within bounds */
if(v.i!=(classtok(VSO)&(OPB-1))
#ifndef RE_nCHAR_CLASS
||classtok(VSO)>=R_CC_BASE
#endif
)
error(VS REG_ERANGE);
#endif
}
break;
case R_END_CLASS:p--; /* literally */
case R_END_GROUP:;
}
}
if(e) /* regular character, mark it */
bseti(VS rAc,v.i=classtok(VSO),u.uns);
}
case R_END_GROUP:return;
#ifdef RE_WILDCARDS
case R_0_OR_MORE:
if(!(gpreg->re_flags®_mWILDCARDS))
break;
if(e) /* first an epsilon, then the dot */
putneps(r,e),e=r;
r++;
#endif
case R_DOT: /* matches everything but a newline */
if(e) /* if REG_NEWLINE is set */
{ u.uns=OPC_DOT;goto fine;
}
goto fine2;
case R_BOL: /* beginning of line */
if(e)
{ u.uns=OPC_BOL;goto finep;
}
goto fine2;
case R_EOL: /* end of line */
if(e)
{ u.uns=OPC_EOL;
finep:
#ifndef RE_DUMB_ANCHOR
Cc(r,pos1.wh_)=p;
#endif
goto fine;
}
goto fine2;
}
if(e) /* a regular character */
#ifndef RE_nERROR_DETECT
{ if((u.uns=token(VSO))>=OPB)
error(VS REG_BADRPT),u.uns&=OPB-1;
#else
{ u.uns=token(VSO)&OPB-1;
#endif
fine:
r->opc=u.uns<OPB&&gpreg->re_flags®_ICASE&&isupper(u.uns)?
Tolower(u.uns):u.uns;
initsimp(r,Ceps e);
}
fine2:
skiptok(VSO);r=epso(r,SZ(mchar)); /* advance source and target pointers */
}
#define PROGID const char progid[]=\
"Bregex library, copyright (c) 1991-1993, S.R. van den Berg, The Netherlands"
#ifndef RE_nBRACES
static int dec(VSO)VSD /* return a decimal for {.. , ..} */
/* [<][>][^][v][top][bottom][index][help] */
{ unsigned i=0;
while(hdigit(*p)<10)
i=i*10+hdigit(*p++);
return i;
}
static int skipend(VSO)VSD /* check if we are close before the end */
/* [<][>][^][v][top][bottom][index][help] */
{ switch(token(VSO)) /* anything special? */
{ case R_OR:case R_END_GROUP: /* the end in person */
goto ret1;
case R_BEG_NUMBER: /* something complicated */
for(;;) /* start searching for its complement */
switch(skiptok(VSO))
{ case R_END_GROUP:goto ret1; /* how unexpected */
case R_END_NUMBER:goto skippend; /* how predictable */
}
case R_0_OR_MORE:
#ifdef RE_WILDCARDS
if(gpreg->re_flags®_mWILDCARDS)
break;
#endif
case R_1_OR_MORE:case R_0_OR_1:case R_END_NUMBER:
skiptok(VSO); /* how casual */
skippend:
switch(token(VSO)) /* look one step ahead */
{ case R_OR:case R_END_GROUP: /* so it ends here afterall */
ret1: return 1;
}
}
return 0; /* then again, it doesn't have to */
}
#define EOS(x) (kk?kk:(x)) /* real end, or continue with the next? */
static void pnorm(VS e)VSD const struct eps*const e; /* put normal text */
/* [<][>][^][v][top][bottom][index][help] */
{ void*pold;int i;union{struct eps*old;unsigned len;}q;
unsigned aj,bi;
for(;;) /* get the length */
{ i=0;pold=p;q.old=r;psimp(VS Ceps 0);q.len=dif(r,q.old); /* store it */
if(e) /* we really want to generate code */
r=epso(r,-(int)q.len); /* backup before we start */
switch(token(VSO)) /* anything special trailing it? */
{
#ifndef RE_pNOSUB
unsigned pin;
#endif
case R_1_OR_MORE:ii=1;goto procj0; /* ii indicates the minimum */
case R_0_OR_1:ii=0;goto procj1; /* no. of occurrences */
case R_0_OR_MORE:ii=0; /* jj indicates the maximum */
procj0: jj=0; /* no. of occurrences */
#ifdef RE_WILDCARDS
if(!(gpreg->re_flags®_mWILDCARDS))
#endif
goto proc;
default:ii=1;
procj1: jj=1;goto proc;
case R_BEG_NUMBER:
#ifndef RE_pNOSUB
for(;;gpreg->re_nsub=pin) /* restore it from our excursion */
#else
for(;;)
#endif
{ skiptok(VSO);ii=dec(VSO);jj=C_SEP_NUMBER==*p?(p++,dec(VSO)):ii;
#ifndef RE_nERROR_DETECT
if(token(VSO)!=R_END_NUMBER)
error(VS REG_EBRACE);
#endif
proc: if(jj)
{ aj=jj;bi=ii;
#ifndef RE_SURE_MINIMAL
if(ii>jj)
{ error(VS REG_BADBR);jj=ii;
}
#endif
if(!e) /* only calculating the length? */
{ r=epso(r+aj-bi,q.len*(aj-1));goto eit; /* use a shortcut */
}
}
else if(!e)
{ r++;
if(ii)
r=epso(r,q.len*(ii-1)); /* different shortcut */
goto eit;
}
kk=skipend(VSO)?Ceps e:Ceps 0;p=pold;
if(++i<ii) /* while we're still below the minimum */
#ifdef RE_pNOSUB
psimp(VS epso(r,q.len));
else if(i==ii)
#else /* keep the paren groups out of our hair */
{ pin=gpreg->re_flags;gpreg->re_flags|=REG_NOSUB;
psimp(VS epso(r,q.len));gpreg->re_flags=pin;
} /* keep the paren group number the same */
else if(pin=gpreg->re_nsub,i==ii) /* while copying */
#endif
{ if(i==jj) /* last copy */
{ psimp(VS EOS(epso(r,q.len)));goto eit;
}
if(!jj) /* maximum is infinit */
{ puteps(epso(r,q.len),r,EOS(epso(r,q.len)+1)); /* loop */
psimp(VS epso(r,q.len));r++;goto eit;
}
psimp(VS epso(r,q.len)); /* plain */
}
else if(!jj) /* maximum is infinit */
{ putneps(r,EOS(epso(r+1,q.len)));psimp(VS r++);goto eit;
}
else if(i==jj) /* maximum reached */
{ putneps(r,EOS(epso(r+1,q.len)));r++;
psimp(VS EOS(epso(r,q.len)));
eit: if(skipend(VSO)) /* real end? */
return;
break;
}
else /* plain */
{ putneps(r,EOS(epso(r,(q.len+SZ(eps))*(1+jj-i))));
psimp(VS epso(++r,q.len));
}
}
}
}
}
#else /* RE_nBRACES */
#define EOS(x) (jjp?jjp:(x))
static void pnorm(VS e)VSD const struct eps*const e; /* put normal text */
/* [<][>][^][v][top][bottom][index][help] */
{ void*pold;struct eps*rold;
for(;;) /* skip it first */
{ pold=p;rold=r;psimp(VS Ceps 0);
if(!e) /* if we're just counting */
pold=p; /* remember this spot */
ii=token(VSO);skiptok(VSO);
if(e)
{ p=pold;pold=r;
jjp=token(VSO)==R_OR||token(VSO)==R_END_GROUP?Ceps e:Ceps 0;
}
switch(ii) /* check for any of the postfix operators */
{ case R_0_OR_MORE:
#ifdef RE_WILDCARDS
if(gpreg->re_flags®_mWILDCARDS)
break;
#endif
r++;
if(e) /* first an epsilon, then the rest */
putneps(rold,EOS(r)),r=rold+1,psimp(VS rold);
goto incagoon;
case R_1_OR_MORE: /* first the rest */
if(e) /* and then an epsilon */
puteps(r,rold,EOS(r+1)),r=rold,psimp(VS Ceps pold);
r++;goto incagoon;
case R_0_OR_1:r++;
if(e) /* first an epsilon, then the rest */
putneps(rold,r=EOS(r)),pold=r,r=rold+1,psimp(VS Ceps pold);
incagoon: switch(skiptok(VSO)) /* at the end of this group already? */
{ case R_OR:case R_END_GROUP:return;
}
continue; /* regular end of the group */
case R_OR:case R_END_GROUP:case '\0':
if(e)
r=rold,psimp(VS e);
return;
}
if(e) /* no fancy postfix operators, plain vanilla */
r=rold,psimp(VS Ceps pold);
else
p=pold;
}
}
#endif /* RE_nBRACES */
static PROGID;
static int por(VS e)VSD const struct eps*const e; /* put an or-group */
/* [<][>][^][v][top][bottom][index][help] */
{ uchar*pvold;struct eps*rvold;
if(!e) /* if we're only counting */
{ rvold=r;
if(cachea==(pvold=p)) /* check the cache */
{ p=cachep;r=epso(rvold,cacher);goto ret0; /* yep, hit */
} /* if the cache is taken out, compile time rises exponentially */
} /* with the number of nested parens */
for(;;)
{ uchar*pold;struct eps*rold;
for(pold=p,rold=r;;)
{ switch(token(VSO))
{ default:pnorm(VS Ceps 0);r=rold;continue; /* still in this group */
case R_END_GROUP: /* found the end of the group */
#ifdef RE_EMPTY_OR
if(p==pold) /* empty `or' group */
{ if(e)
puteps(r,e,e); /* misused epsilon as branch */
r++;
}
else
#endif
p=pold,pnorm(VS e); /* normal last group */
if(!e) /* only counting? refresh the cache */
{ skiptok(VSO);cachea=pvold;cachep=p;cacher=dif(r,rvold);
goto ret0;
}
if(*p)
{ skiptok(VSO);
ret0: return 0;
}
return 1;
case R_OR:r++;
#ifdef RE_EMPTY_OR
if(p==pold) /* empty `or' group */
{ if(e)
putneps(rold,e); /* special epsilon */
}
else
#endif
{ p=pold;pnorm(VS e); /* normal `or' group, first an */
if(e) /* epsilon, then the rest */
putneps(rold,r);
}
skiptok(VSO);
}
break;
}
}
}
#ifndef RE_SURE_MINIMAL
static void findandrep(VS old,newv)VSD register struct eps**const old;
/* [<][>][^][v][top][bottom][index][help] */
struct eps*const newv;
{ register struct eps*i,*t= *old;
for(i=r;;i=skiplen(i)) /* change all pointers from *old to newv */
{ switch(i->opc)
{ case OPC_FIN:*old=t;return; /* last one, finished */
case OPC_EPS:
if(i->next==t)
i->next=newv;
if(i->spawn==t)
i->spawn=newv;
continue;
}
if(i->stack==t)
i->stack=newv;
}
}
#define drs(m) (*(struct eps**)geno(*stack,ioffsetof(struct eps,m)^ofs))
#define RETyes 1
#define RETno 0
#ifndef RE_pNOSUB
#undef RETyes
#undef RETno
#define RETyes ((struct eps**)0)
#define RETno nst
static struct eps**cstack(VS stack,ofs)VSD struct eps**const stack;
/* [<][>][^][v][top][bottom][index][help] */
{ struct eps**nst;
for(nst= &drs(next);;)
{ switch((*nst)->opc)
{ case OPC_GOP:case OPC_GCL:nst= &(*nst)->stack;continue;
}
break;
}
#else
#define nst (&drs(next))
static int cstack(VS stack,ofs)VSD struct eps**const stack;
/* [<][>][^][v][top][bottom][index][help] */
{
#endif /* RE_pNOSUB */
if((*nst)->stack==Ceps p)
{ findandrep(VS *(struct eps***)stack,drs(next));*stack=drs(spawn);
return RETyes;
}
return RETno;
}
/* break up any closed epsilon circles, otherwise they can't be executed */
static int fillout(VS stack)VSD struct eps**const stack;
/* [<][>][^][v][top][bottom][index][help] */
{
#ifndef RE_pNOSUB
struct eps**nst;
#endif
if((*stack)->opc!=OPC_EPS||(*stack)->stack)
return 0;
(*stack)->stack=Ceps p; /* mark this one as used */
#ifndef RE_pNOSUB
#define RECURS(nxt) \
do\
{ if(!(nst=cstack(VS (struct eps**)stack,\
ioffsetof(struct eps,nxt)^ioffsetof(struct eps,next))))\
return 1;\
}\
while(fillout(VS nst))
#else /* RE_pNOSUB */
#define RECURS(nxt) \
do\
if(cstack(VS (struct eps**)stack,\
ioffsetof(struct eps,nxt)^ioffsetof(struct eps,next)))\
return 1;\
while(fillout(VS &(*stack)->nxt))
#endif /* RE_pNOSUB */
RECURS(next);RECURS(spawn);return 0; /* recurse */
}
#endif /* RE_SURE_MINIMAL */
#define XOR1 \
(ioffsetof(struct chclass,pos1)^ioffsetof(struct chclass,pos2))
#define PC(thiss,t) (((struct evoi*)geno(thiss,t))->st_)
#define PCp(thiss,t) (((struct evoi*)geno(thiss,t))->wh_)
#define PCs(s,thiss,t) (*(struct eps**)((s)=(void*)&PC(thiss,t)))
#define PCps(s,thiss,t) ((uchar*)*(const void**)\
geno(&PCs(s,thiss,t),ioffsetof(struct evoi,wh_)))
#define Pci(thiss,t) PCs(c,thiss,t)
#define Pc() (*(struct eps**)c)
#define Pcp() (*(const void**)(c+=ioffsetof(struct evoi,wh_)))
#define PcP() (*(const void**)c)
static void initjtable(VSO)VSD
/* [<][>][^][v][top][bottom][index][help] */
#ifndef RE_nJUMP_TABLE
{ typedef int goodness;
goodness bestval;jt_type*bt=Jtable(gpreg->re_finpoint);
unsigned depth,maxdepth,th1,ot1;
register struct eps*stack,*thiss,*other,*code,*s;jt_type jt[OPB];
;{ register int i=OPB-1;
while(jt[i]=0,i--); /* pre-init our jump table */
}
th1=ioffsetof(struct chclass,pos1);ot1=ioffsetof(struct chclass,pos2);
maxdepth=(jt_type)~(jt_type)0;depth=1;thiss=other=code=(s=r)-1;bestval=0;
stack=0;goto nostack;
do /* now flood the tree from the left, counting the distance */
{ th1^=XOR1;ot1^=XOR1;thiss=other;other=code;stack=0;depth++;
do /* pop next entry off this pc-stack */
{ thiss=PC(s=thiss,th1);PC(s,th1)=0;s=s->stack;goto nostack;
do
for(s=stack->spawn,stack=stack->stack;;) /* empty epsilon stack */
nostack: { switch(s->opc-OPB)
{ case OPC_EPS-OPB:s->stack=stack;s=(stack=s)->next;
continue;
#ifndef RE_pNOSUB
case OPC_GOP-OPB:case OPC_GCL-OPB:s=s->stack;continue;
#endif /* maxjump determined */
case OPC_FIN-OPB:maxdepth=depth;break;
case OPC_DOT-OPB:case OPC_CLASS-OPB:
{ register unsigned c;
c=OPB-1;
do
{ if(s->opc==OPC_CLASS)
{ if(!bit_test(Cc(s,c),c))
continue;
}
#ifndef RE_nNEWLINE
#ifdef RE_pNEWLINE
else if(c=='\n')
#else
else if(c=='\n'&&gpreg->re_flags®_NEWLINE)
#endif
continue;
#endif
jt[c]=depth;
}
while(c--); /* loop through the whole alphabet */
goto acc_st;
}
case OPC_BOL-OPB:case OPC_EOL-OPB:jt['\n']=depth;goto acc_st;
default:jt[s->opc]=depth; /* if state not yet pushed and */
acc_st: if(!PC(s,ot1)&&!Cc(s,pos2.wh_)) /* still in the running */
{ PC(s,ot1)=other;other=s;
if(depth==maxdepth) /* last rounds? */
Cc(s,pos2.wh_)=s; /* knock him out */
}
}
break;
}
while(stack);
}
while(thiss!=code); /* still not done with this pass? */
if(gpreg->re_flags®_ICASE)
{ unsigned i=OPB;
do
if(isupper(--i)) /* fill the uppercase values with the */
jt[i]=jt[Tolower(i)]; /* lowercase ones */
while(i);
}
if(depth==maxdepth) /* already at the limit? */
depth--; /* keep our depth within bounds */
;{ jt_type*jp;unsigned i;goodness better;
jp=jt+(i=OPB);better=depth;
do
if(i--,*--jp>depth) /* out of bounds? */
*jp=depth;
else if(isprint(i)) /* statistically relevant? */
better+=depth-*jp; /* add its weight to the rating */
while(i);
if(better-bestval>0) /* is this one better than our old one? */
{ jt_type*jpb; /* it's better, so copy it over */
bestval=better;gpreg->re_maxjump=depth-1;i=OPB;jpb=bt;
while(*jpb++=depth-*jp++,--i);
}
}
}
while(other!=code); /* done with every thread? */
#ifndef RE_STRLEN
*bt=0; /* make sure that \0 always matches */
#endif
#else /* RE_nJUMP_TABLE */
{ bit_type*jt=Jtable(gpreg->re_finpoint);
;{ register struct eps*stack,*s;
initbitm(jt,1);stack=0;s=r;goto nxt3; /* preset our bitmask */
do /* now examine all the candidates that could be first */
for(s=stack->spawn,stack=stack->stack;;)
nxt3: { switch(s->opc-OPB)
{ case OPC_EPS-OPB:s->stack=stack;s=(stack=s)->next;continue;
#ifndef RE_pNOSUB
case OPC_GOP-OPB:case OPC_GCL-OPB:s=s->stack;continue;
#endif /* pity, mark everything as eligible */
case OPC_FIN-OPB:case OPC_DOT-OPB:initbitm(jt,0);goto bailout;
case OPC_CLASS-OPB:
;{ size_t i;bit_type*from;
i=maxindex(Cc(s,c))+1;from=Cc(s,c);
while(*jt++|=*from++,--i);
}
jt-=maxindex(Cc(s,c))+1;break;
case OPC_BOL-OPB:case OPC_EOL-OPB:bseti(VS jt,'\n',1);break;
default:bseti(VS jt,s->opc,1);
}
break;
}
while(stack);
}
bailout:;
#ifndef RE_STRLEN
bseti(VS jt,'\0',1); /* make sure that \0 always matches */
#endif
#endif /* RE_nJUMP_TABLE */
}
#ifndef reg_malloc
#include <stdlib.h> /* malloc() realloc() free() */
void
*(*reg_malloc)P((size_t size))=malloc,
*(*reg_realloc)P((void*ptr,size_t size))=realloc,
(*reg_free)P((void*ptr))=free;
#endif
int regcomp(preg,pattern,cflags)regex_t*const preg;const char*const pattern;
/* [<][>][^][v][top][bottom][index][help] */
{ struct eps*s;
#ifndef RE_nCONCUR_COMPILE
struct Vs vss; /* virtual static struct :-) */
#define vs (&vss)
#endif
#ifndef RE_nCASE_TABLE
if(!bothcase[OPB-1]) /* already initialised case tables? */
{ register unsigned c=OPB-1; /* no, so go through the entire alphabet */
while(igncase[bothcase[c]=c]=isupper(c)?tolower(c):c,c--);
}
#endif
#ifdef RE_NSUBMAX
if(((gpreg=preg)->re_flags=cflags)®_NSUBMAX) /* max. re_nsub given? */
mnmatch=preg->re_nsub;
#else
(gpreg=preg)->re_flags=cflags; /* store the flags, for later reference */
#endif
preg->re_nsub=errorno=(char*)progid-(char*)progid;preg->re_es.re_stack=0;
p=(uchar*)pattern;r=Ceps&aleps;cachea=0;por(VS Ceps 0); /* 1st a trial run */
/*
* Possible ANSI violation here. The assumption is that:
* (&aleps+a)+b==&aleps+c for any positive integers a,b,c
* where a+b==c
*/
;{ size_t i;
if(!(r=preg->re_compiled=s=reg_malloc((i=dif(r,&aleps))+ /* allocate */
mx(ioffsetof(struct eps,stack)+SZ(eps*),SZ(aligar))))) /* the memory */
return REG_ESPACE;
p=(uchar*)pattern;preg->re_nsub=0; /* back to the start of the source */
if(!por(VS epso(s,i))) /* now compile it for real */
error(VS REG_EPAREN);
}
(Ceps(preg->re_finpoint=r))->opc=OPC_FIN; /* add end */
#ifndef RE_SURE_MINIMAL
r->stack=0;
for(r=s;;s=skiplen(s)) /* simplify the compiled code (i.e. */
{ switch(s->opc) /* take out cyclic epsilon references) */
{ case OPC_EPS:p=(uchar*)s;fillout(VS &s); /* check tree */
default:continue;
case OPC_FIN:; /* finished */
}
break;
}
#endif
cflags|=REG_SIMPLE;
#ifndef RE_DUMB_ANCHOR
;{ register struct eps*stack;
stack=0;s=r;goto nxt;
do
for(s=stack->spawn,stack=stack->stack;;)
nxt: { switch(s->opc)
{ case OPC_EPS:s->stack=stack;s=(stack=s)->next;continue;
#ifndef RE_pNOSUB
case OPC_GOP:case OPC_GCL:s=s->stack;continue;
#endif
case OPC_BOL:Cc(s,pos1.wh_)=0; /* found a real OPC_BOL */
} /* anything else is irrelevant (prune the search to the top) */
break;
}
while(stack);
}
for(s=r;;s=skiplen(s)) /* search through it linearly */
{ switch(s->opc)
{ case OPC_EOL:
{ register struct eps*t,*stack;
stack=0;t=s->stack;goto nxt1;
do
for(t=stack->spawn,stack=stack->stack;;)
nxt1: { switch(t->opc)
{ case OPC_EPS:t->stack=stack;t=(stack=t)->next;continue;
#ifndef RE_pNOSUB
case OPC_GOP:case OPC_GCL:t=t->stack;continue;
#endif
case OPC_FIN:goto eol_ok; /* ok, reached the end */
} /* not the end, look elsewhere */
break;
}
while(stack);
s->opc=C_EOL; /* so it turned out to be a fake OPC_EOL */
err_anch:
#ifndef RE_nERROR_DETECT
p=(void*)Cc(s,pos1.wh_); /* adjust the source pointer */
#ifndef RE_pEXTENDED
if(cflags®_EXTENDED)
#endif
#endif
error(VS REG_BADRPT); /* so that the error is at the right */
continue; /* offset */
}
case OPC_BOL:
if(Cc(s,pos1.wh_)) /* fake OPC_BOL? */
{ s->opc=C_BOL;goto err_anch; /* yep, convert it */
}
#ifndef RE_pNOSUB
case OPC_GOP:case OPC_GCL:
#endif
eol_ok: cflags&=~REG_SIMPLE;
default:continue;
case OPC_FIN:; /* end of the line, *stop* */
}
break;
}
#else /* RE_DUMB_ANCHOR */
for(s=r;;s=skiplen(s)) /* search through it linearly */
{ switch(s->opc)
{ case OPC_FIN:break;
case OPC_EOL:case OPC_BOL:case OPC_GOP:case OPC_GCL:
cflags&=~REG_SIMPLE;continue;
}
break;
}
#endif /* RE_DUMB_ANCHOR */
if(errorno) /* did we detect any error? */
{ preg->re_erroffset=(const char*)preg->re_es.re_stack-pattern;
preg->re_flags=cflags|REG_ERROR;
}
#ifndef RE_EXEC_ERROR
else
#endif
initjtable(VSO);
return errorno; /* phew, compilation finished */
}
#ifdef RE_nCASE_TABLE
#ifdef RE_nICASE
#define CHECKCASE(i) 0
#else
#define CHECKCASE(i) (ign_case&&TO_LOWER(i))
#endif
#else
#define CHECKCASE(i) ((i)=transcase[i])
#endif
static struct eps*cleantail(start,code,thiss,th1)const uchar*const start;
/* [<][>][^][v][top][bottom][index][help] */
const struct eps*const code;struct eps*thiss;const unsigned th1;
{ register struct eps**t;
while(thiss!=code) /* not at the end of this pc-stack */
if(PCps(t,thiss,th1)>start) /* did it start later? */
thiss= *t,*t=0; /* yep, so throw it out */
else /* no, proceed carefully */
{ register struct eps**s;
while(*t!=code) /* not at the end of this pc-stack */
if(PCps(s,*t,th1)>start) /* did it start later? */
*t= *s,*s=0; /* yep, so throw it out */
else
t=s; /* no, proceed */
break;
}
return thiss;
}
#ifdef RE_STRLEN
int regexec(preg,string,length,nmatch,pmatch,eflags)/*const*/regex_t*preg;
/* [<][>][^][v][top][bottom][index][help] */
const char*const string;size_t length;size_t nmatch;regmatch_t pmatch[];
#else
int regexec(preg,string,nmatch,pmatch,eflags)/*const*/regex_t*preg;
const char*const string;size_t nmatch;regmatch_t pmatch[];
#endif /* RE_STRLEN */
{ register struct eps*code;const uchar*founds,*founde=0;
#ifndef RE_nICASE
#ifdef RE_nCASE_TABLE
unsigned ign_case;
#else
const uchar*transcase;
#endif
#endif
#ifdef RE_STRLEN
size_t len;
#endif
;{ register struct eps*other,*thiss;unsigned th1,ot1;const uchar*str;
struct eps*initstack,*initcode;struct mchar virteol;
virteol.next1_=preg->re_finpoint;virteol.p1_.st_=virteol.p2_.st_=0;
#ifndef RE_nICASE
#ifdef RE_nCASE_TABLE
ign_case=(eflags|=preg->re_flags)®_ICASE;
#else
transcase=(eflags|=preg->re_flags)®_ICASE?igncase:bothcase;
#endif
#endif
if((code=preg->re_compiled)->opc==OPC_EPS)
initcode=(initstack=code)->next; /* epsilon at the start */
else
initcode=code,initstack=0; /* regular at the start */
th1=ioffsetof(struct chclass,pos1);ot1=ioffsetof(struct chclass,pos2);
/*
* Possible ANSI violation here. The assumption is that:
* (struct eps*)malloc(sizeof(struct eps))-1!=0
*/
thiss=other= --code;str=(uchar*)string-1; /* init pc-stack pointers */
#ifdef RE_STRLEN
founds=(uchar*)string+2+(len=length); /* saves us a compare later */
#else
founds=0; /* there is no other way in this case */
#endif
;{ register struct eps*s,*stack; /* proper pre-conditioning */
stack=initstack;s=initcode;goto nxt;
do /* make sure \n does not match BOL */
for(s=stack->spawn,stack=stack->stack;;)
nxt: { switch(s->opc)
{ case OPC_EPS:s->stack=stack;s=(stack=s)->next;continue;
#ifndef RE_pNOSUB
case OPC_GOP:case OPC_GCL:s=s->stack;continue;
#endif
case OPC_FIN: /* special case */
#ifndef RE_COMMON_SUBXP
{ register char*c;
if(!Pci(&virteol,ot1)) /* state not yet pushed */
Pc()=other,other=Ceps&virteol,Pcp()=string;
}
#else /* RE_COMMON_SUBXP */
if(!PC(&virteol,ot1)) /* state not yet pushed */
{ PC(&virteol,ot1)=other;
PCp(other=Ceps&virteol,ot1)=string;
}
#endif /* RE_COMMON_SUBXP */
break;
case OPC_BOL:
if(!(eflags®_NOTBOL))
#ifndef RE_COMMON_SUBXP
{ register char*c;
if(!Pci(s,ot1)) /* state not yet pushed */
Pc()=other,other=s,Pcp()=str;
}
#else /* RE_COMMON_SUBXP */
if(!PC(s,ot1)) /* state not yet pushed */
PC(s,ot1)=other,PCp(other=s,ot1)=str;
#endif /* RE_COMMON_SUBXP */
}
break;
}
while(stack);
}
#ifdef RE_STRLEN
if(len)
#endif
{ register struct eps*s,*stack;const uchar*start;unsigned i;
#ifdef RE_nJUMP_TABLE
bit_type*jt;
#else
jt_type*jt;unsigned maxjump=preg->re_maxjump;
#endif
jt=Jtable(preg->re_finpoint);
#ifndef RE_STRLEN
while((i= *++str))
{
#else
do
{ i= *++str; /* get the next real-text character */
#endif /* switch this & other pc-stack */
CHECKCASE(i);th1^=XOR1;ot1^=XOR1;thiss=other;other=code;
stack=initstack;
if((s=initcode))
{ if(thiss==code) /* automaton idle? */
#ifndef RE_nJUMP_TABLE
if(maxjump) /* can we jump at all? */
{ register unsigned jump=maxjump;
register const uchar*jstr=str;
shortcut: do
#ifdef RE_STRLEN
{ if(jump>=len) /* want to jump over the edge? */
{ if(jump!=len||jt['\n']) /* virtual edge? */
goto ret; /* no, the real one */
jstr+=jump;jump=0;break; /* might be virtual */
}
len-=jump; /* make the jump */
#ifdef RE_ABORT_EXPR
if(RE_ABORT_EXPR)
goto ret;
#endif
}
while((jump=(jt[*(jstr+=jump)]))); /* determine new jump */
do /* go back on our steps */
if(len++,jt[*--jstr]>++jump) /* impossible match? */
#else /* RE_STRLEN */
#ifdef RE_MEMCHR
{
switch(jump)
{ case 2:
if(!jstr[1])
goto ret;
break;
default:
if(memchr(jstr+1,'\0',(size_t)jump-1))
goto ret;
case 1:;
}
#else
{ while(--jump) /* jump in little steps */
if(!*++jstr) /* so as not to overlook the \0 */
goto ret;
#endif /* RE_nMEMCHR */
#ifdef RE_ABORT_EXPR
if(RE_ABORT_EXPR)
goto ret;
#endif
}
#ifdef RE_MEMCHR
while((jump=jt[*(jstr+=jump)]));
#else
while((jump=jt[*++jstr]));
#endif
if(!*jstr&&jt['\n']) /* virtual \0 ? */
goto ret;
do /* go back on our steps */
if(jt[*--jstr]>++jump) /* impossible match? */
#endif /* RE_STRLEN */
{ jump=jt[*jstr];goto shortcut; /* jump forward */
}
while(jump!=maxjump); /* not at target position */
i= *(str=jstr);CHECKCASE(i); /* load and run */
}
else
#define JT_TEST(jt,i) (jt[i]) /* !maxjump, so misuse jt as a bitmap */
#else /* RE_nJUMP_TABLE */
#define JT_TEST(jt,i) (!bit_test(jt,i))
#endif /* RE_nJUMP_TABLE */
if(JT_TEST(jt,i))
{ register const uchar*jstr=str;
do
{
#ifdef RE_STRLEN
if(!--len)
{ len++;break; /* give `$' a chance */
}
#endif
#ifdef RE_ABORT_EXPR
if(RE_ABORT_EXPR)
goto ret;
#endif
}
while(jstr++,JT_TEST(jt,*jstr));
#ifndef RE_STRLEN
if(!(i= *(str=jstr)))
i= *--str; /* give `$' a chance */
#else
i= *(str=jstr); /* load and run */
#endif
CHECKCASE(i);
}
start=str;goto nostack;
}
else if(thiss==code)
goto foundit;
do /* pop next entry off this pc-stack */
#ifndef RE_COMMON_SUBXP
{ s=thiss->stack;
;{ register char*c;
thiss=Pci(thiss,th1);Pc()=0;start=Pcp();
}
#else /* RE_COMMON_SUBXP */
{ thiss=PC(s=thiss,th1);PC(s,th1)=0;start=PCp(s,th1);s=s->stack;
#endif /* RE_COMMON_SUBXP */
goto nostack;
do /* pop next entry off the epsilon-stack */
for(s=stack->spawn,stack=stack->stack;;)
nostack: { switch(s->opc-OPB)
{ default:
if(i==s->opc) /* regular character match */
goto yep;
break;
case OPC_BOL-OPB:
if(i=='\n') /* reset the start pointer */
#ifndef RE_COMMON_SUBXP
{ register char*c;
if(!Pci(s,ot1)) /* state not yet pushed */
Pc()=other,other=s;
Pcp()=str;
#else /* RE_COMMON_SUBXP */
{ if(!PC(s,ot1)) /* state not yet pushed */
PC(s,ot1)=other,other=s;
PCp(s,ot1)=str;
#endif /* RE_COMMON_SUBXP */
}
break;
case OPC_EOL-OPB:
if(i=='\n') /* only allow OPC_FIN to follow */
{ s=Ceps&virteol;goto yep; /* this one */
}
break; /* push spawned branch on the work-stack */
case OPC_EPS-OPB:s->stack=stack;s=(stack=s)->next;
continue;
#ifndef RE_pNOSUB
case OPC_GOP-OPB:case OPC_GCL-OPB:s=s->stack;continue;
#endif
case OPC_FIN-OPB: /* hurray! complete match */
#ifdef RE_STRLEN
if(start<=founds) /* will always match once */
#else
if(start<=founds||!founds) /* extra compare */
#endif
{ founde=str; /* one past the start */
if(!nmatch)
goto ret; /* only report success */
thiss=cleantail(founds=start,code,thiss,th1);
other=cleantail(start,code,other,ot1);
initcode=initstack=0; /* don't start anything */
} /* new */
break;
case OPC_CLASS-OPB:
if(bit_test(Cc(s,c),i))
goto yep; /* character in class */
break;
case OPC_DOT-OPB: /* dot-wildcard */
#ifndef RE_nNEWLINE
#ifdef RE_pNEWLINE
if(i!='\n')
#else
if(i!='\n'||!(eflags®_NEWLINE))
#endif
#endif
#ifndef RE_COMMON_SUBXP
{ register char*c;
yep: if(!Pci(s,ot1)) /* state not yet pushed */
{ Pc()=other;other=s;Pcp();
earlier: PcP()=start;
}
else if((uchar*)Pcp()>start)
goto earlier;
}
#else /* RE_COMMON_SUBXP */
yep: if(!PC(s,ot1)) /* state not yet pushed */
{ PC(s,ot1)=other;other=s;
earlier: PCp(s,ot1)=start;
} /* is it not the earliest match? */
else if((uchar*)PCp(s,ot1)>start)
goto earlier;
#endif /* RE_COMMON_SUBXP */
} /* push location onto other pc-stack */
break;
}
while(stack); /* the work-stack is not empty */
}
while(thiss!=code); /* this pc-stack is not empty */
#ifdef RE_ABORT_EXPR
if(RE_ABORT_EXPR)
goto ret;
#endif
}
#ifdef RE_STRLEN
while(--len); /* still text to search */
}
str++; /* one past the end */
#else
} /* out of text */
#endif /* proper post-conditioning */
;{ register struct eps*t,*s,*stack;unsigned i,no_eol;
for(t=other,no_eol=eflags®_NOTEOL;t!=code;t=PC(t,ot1))
{ s=t->stack;i=0;stack=0;goto nxt2;
do
for(s=stack->spawn,stack=stack->stack;;)
nxt2: { switch(s->opc)
{ case OPC_EPS:s->stack=stack;s=(stack=s)->next;continue;
case OPC_EOL:
if(no_eol) /* can we match this EOL? */
break;
i=1; /* skip one further in that case */
#ifndef RE_pNOSUB
case OPC_GOP:case OPC_GCL:
#endif
s=s->stack;continue;
case OPC_FIN:
{ const uchar*start;
#ifdef RE_STRLEN
if((start=PCp(t,ot1))<=founds)
#else
if((start=PCp(t,ot1))<=founds||!founds)
#endif /* assign match + offset i */
founds=start,founde=str+i;
/*
* Possible ANSI violation here.
* The assumption is that `founde' can be
* assigned a value that is two elements
* past the last element of `string'.
*/
}
}
break;
}
while(stack);
}
}
ret:;
#ifndef RE_nREUSABLE
do
{ register struct eps*t;
while(thiss!=code) /* cleanup this pc-stack */
thiss=PC(t=thiss,th1),PC(t,th1)=0;
thiss=other; /* and the other one */
}
while((th1^=XOR1)==ot1);
#endif
}
#ifdef RE_ABORT_EXPR
if(RE_ABORT_EXPR)
return REG_ABORT;
#endif
foundit:
if(!nmatch||(!founde&&eflags®_NOSUB))
goto retmatch;
;{ unsigned lsub=nmatch;
while(--lsub) /* clear pmatch array first */
pmatch[lsub].rm_ep=0;
}
#define for_CLEAN_PMATCH(nmatch,lsub) \
for(;;)\
if(nmatch<=lsub||!pmatch[lsub].rm_ep)\
lsub--;\
else if(lsub&&pmatch[lsub].rm_sp>=(char*)str)\
pmatch[lsub--].rm_ep=0;\
else
#define PMATCH_so(str) \
if(nmatch>(lsub=Go(s,gopp.nr)))\
pmatch[lsub].rm_sp=(char*)str
#define PMATCH_eo(str) \
if(nmatch>Gc(s,gopp.nr))\
pmatch[Gc(s,gopp.nr)].rm_ep=(char*)str
/* any match at all? spare us the details? */
if((pmatch->rm_ep=(char*)founde)&&
(pmatch->rm_sp=(char*)founds,!(eflags®_SIMPLE)))
{ unsigned sol=0,eol; /* second automaton, for filling pmatch */
;{ struct evoi*cstp,*stp,*estp;register struct eps*s;const uchar*str,*end;
unsigned i,no_eol;
#ifndef RE_pNOSUB
unsigned lsub=0;
#endif
#ifdef RE_EXEC_ERROR
if(eflags®_ERROR)
{ preg->re_flags=eflags&~REG_ERROR;goto newstack;
}
#endif
if((stp=preg->re_es.re_stack))
estp=(struct evoi*)stp->wh_;
else
#ifdef RE_EXEC_ERROR
newstack:
#endif
estp=STARTSTACK+(stp=reg_malloc(STARTSTACK*SZ(evoi)));
cstp=stp;s=code+1;end=founde--;
#ifdef RE_STRLEN
if((char*)end>string+length)
end--; /* limit end to the non-virtual area */
no_eol=(char*)end<string+length&&*end!='\n';
#else
if((char*)end>string&&!end[-1])
end--; /* limit end to the non-virtual area */
no_eol= *end!='\n'&&*end;
#endif /* upcoming preincrement, so backup */
str=founds-1;eol=0;
if((char*)founds<string) /* match before the start of string? */
{ str++;goto virtual_newline;
}
for(sol=0;;s=s->stack) /* update the pc to the next */
{ if(++str<end) /* inside our match? */
goon: { i= *str; /* read character from stream */
redo: switch(s->opc-OPB) /* switch to the current opcode */
{ default:case OPC_FIN-OPB:
CHECKCASE(i);
if(i==s->opc) /* compare a regular character */
continue; /* a match, simply go to the next */
break;
case OPC_DOT-OPB:
#ifndef RE_nNEWLINE
#ifdef RE_pNEWLINE
if(i!='\n')
#else
if(i!='\n'||!(eflags®_NEWLINE))
#endif
#endif
continue;
break;
case OPC_CLASS-OPB:
if(bit_test(Cc(s,c),i))
continue;
break;
case OPC_EPS-OPB: /* push(str,s->spawn); */
if(estp==cstp)
{ size_t t;
if(!(cstp=reg_realloc(stp,
t=dif(cstp,stp)+INCSTACK*SZ(evoi))))
{ eflags|=REG_LOWMEM|REG_OUTOFMEM;goto complete_match;
}
stp=cstp;cstp=(estp=(struct evoi*)geno(stp,t))-INCSTACK;
} /* save the junction on the stack, pick one branch */
cstp->wh_=str;cstp++->st_=s->spawn;s=s->next;goto redo;
#ifndef RE_pNOSUB
case OPC_GOP-OPB:PMATCH_so(str);s=s->stack;goto redo;
case OPC_GCL-OPB:PMATCH_eo(str);s=s->stack;goto redo;
#endif
case OPC_EOL-OPB:
if(i=='\n'&&str==founde)
{ eol=1;continue;
}
break;
case OPC_BOL-OPB:
if(i=='\n'&&str==founds)
{ sol=1;continue;
}
}
}
else
{ register struct eps*stack=0;
goto nxt3; /* proper post-conditioning again */
do
for(s=stack->spawn,stack=stack->stack;;s=s->stack)
nxt3: { switch(s->opc)
{ case OPC_EPS:s->stack=stack;s=(stack=s)->next;
goto nxt3;
case OPC_EOL:
if(no_eol)
break;
continue;
#ifndef RE_pNOSUB
case OPC_GOP:PMATCH_so(str);continue;
case OPC_GCL:PMATCH_eo(str);continue;
#endif
case OPC_FIN:goto complete_match; /* yahoe! */
}
#ifndef RE_pNOSUB
for_CLEAN_PMATCH(nmatch,lsub)
break;
#endif
break;
}
while(stack);
}
str=(--cstp)->wh_;s=cstp->st_; /* pop(str,s); */
#ifndef RE_pNOSUB
for_CLEAN_PMATCH(nmatch,lsub)
#endif
if((char*)str<string)
virtual_newline:
{ i='\n';goto redo;
}
else
{ if(str<founde)
eol=0;
if(str==founds)
sol=0;
goto goon;
}
}
complete_match: /* free the runtime stack or prepare it for later */
preg->re_es.re_stack=eflags®_LOWMEM?
(reg_free(stp),(struct evoi*)0):(stp->wh_=estp,stp);
pmatch->rm_ep=(char*)str; /* mark the end of the match */
}
if(sol) /* adjust the starting point? (due to a `^') */
{ unsigned lsub=nmatch;
do
if(pmatch[--lsub].rm_sp==(char*)founds)
pmatch[lsub].rm_sp=(char*)founds+1; /* shift it right */
while(lsub);
}
if(eol) /* adjust the ending point? (due to a `$') */
{ unsigned lsub=nmatch;
do
if(pmatch[--lsub].rm_ep>(char*)founde)
pmatch[lsub].rm_ep=(char*)founde; /* shift it left */
while(lsub);
}
}
;{ unsigned lsub=nmatch;
do
if(!pmatch[--lsub].rm_ep) /* no match on this group? */
#ifdef RE_SUBSTR_PTR
pmatch[lsub].rm_sp=0; /* then wipe out the start as well */
#else
pmatch[lsub].rm_so=pmatch[lsub].rm_eo= -1; /* same, with offsets */
else
{ pmatch[lsub].rm_so=(const char*)pmatch[lsub].rm_sp-string;
pmatch[lsub].rm_eo=(const char*)pmatch[lsub].rm_ep-string;
}
#endif
while(lsub);
}
retmatch:
return eflags®_OUTOFMEM?REG_ESPACE:founde?0:REG_NOMATCH;
}
#ifdef RE_COPY
#define cp_s(memb) (dest->memb=src->memb)
#define cp_e(memb) (d->memb=epso(d,dif(s->memb,s)))
static void copyclass(to,from)bit_type*to;const bit_type*from;
/* [<][>][^][v][top][bottom][index][help] */
{ register unsigned i=bit_mindex(OPB);
while(*to++= *from++,--i);
}
regcopy(dest,src)regex_t*const dest;const regex_t*const src;
/* [<][>][^][v][top][bottom][index][help] */
{ register struct eps*s,*d;
cp_s(re_nsub);cp_s(re_flags); /* clone the easy parts */
#ifndef RE_nJUMP_TABLE
cp_s(re_maxjump);
#endif
dest->re_compiled=0;
if(src->re_flags®_ERROR)
cp_s(re_erroffset);
else
dest->re_es.re_stack=0;
if(!(s=src->re_compiled)) /* hmmm..., missing source regexp */
return REG_BADPAT; /* allocate some space */
if(!(d=dest->re_compiled=reg_malloc(dif(src->re_finpoint,s)+
mx(ioffsetof(struct eps,stack)+SZ(eps*),SZ(aligar)))))
return REG_ESPACE; /* oops */
for(;;d=skiplen(d),s=skiplen(s)) /* step through them */
{ switch((d->opc=s->opc)-OPB) /* copying opcodes */
{ case OPC_EPS-OPB:cp_e(spawn);cp_e(next);continue;
case OPC_FIN-OPB:
#ifdef RE_nJUMP_TABLE
copyclass(Jtable(dest->re_finpoint=d),Jtable(s));
#else
;{ register jt_type*to;register const jt_type*from;
register unsigned i;
i=OPB;to=Jtable(dest->re_finpoint=d);from=Jtable(s);
while(*to++= *from++,--i);
}
#endif
return 0;
#ifndef RE_nNOSUB
case OPC_GOP-OPB:
#ifndef gcl
Go(d,gopp.nr)=Go(s,gopp.nr);break;
#endif
case OPC_GCL-OPB:
Gc(d,gopp.nr)=Gc(s,gopp.nr);break;
#endif /* RE_nNOSUB */
case OPC_CLASS-OPB:copyclass(Cc(d,c),Cc(s,c));
default:case OPC_DOT-OPB:case OPC_BOL-OPB:case OPC_EOL-OPB:
initsimp(d,d);
}
cp_e(stack); /* simple next pointers */
}
}
#endif /* RE_COPY */
void regfree(rg)regex_t*rg;
/* [<][>][^][v][top][bottom][index][help] */
{ if(rg->re_compiled)
{ reg_free(rg->re_compiled);rg->re_compiled=0;
if(!(rg->re_flags®_ERROR)&&rg->re_es.re_stack)
reg_free(rg->re_es.re_stack),rg->re_es.re_stack=0;
}
}
#ifndef RE_nERROR_REPORT
static size_t catlim(dest,src,lim)register char*dest;register const char*src;
/* [<][>][^][v][top][bottom][index][help] */
register size_t lim;
{ size_t len=strlen(src);
while(lim&&*dest) /* search for the end of dest */
dest++,lim--;
if(lim) /* still space left in buffer */
{ while(--lim&&(*dest++= *src++)); /* append src */
*dest='\0'; /* make sure it is properly terminated */
}
return len; /* merely a counting service */
}
size_t regerror(errcode,preg,errbuf,errbuf_size)const regex_t*const preg;
/* [<][>][^][v][top][bottom][index][help] */
char*const errbuf;size_t errbuf_size;
{ size_t needed,i;
static const char*const error_msg[]=
{"Failed to match","Invalid regular expression",
"Invalid collating element","Invalid character class type","Trailing \\",
"Number in \\digit invalid","[ ] imbalance","( ) imbalance",
"{ } imbalance","Content of { } invalid",
"Invalid endpoint in range expression","Out of memory",
"Inappropriate use of magic character",
#ifdef RE_ABORT_EXPR
"Search aborted"
#endif
},atoffset[]=" at offset: ";
if(errbuf_size) /* any target at all? */
*errbuf='\0'; /* clear out the target */
needed=1; /* for the omnipresent terminating \0 */
if(--errcode>=0&&errcode<=maxindex(error_msg))
{ needed+=catlim(errbuf,error_msg[errcode],errbuf_size); /* regular msg */
if(errcode&&preg&&(preg->re_flags&(REG_NOOFFSET|REG_ERROR))==REG_ERROR)
{ needed+=catlim(errbuf,atoffset,errbuf_size); /* at offset */
for(errcode=0,i=preg->re_erroffset;errcode++,i/=10;);
needed+=errcode;errcode=1;i=preg->re_erroffset;
do /* fill in the digits */
if(needed-++errcode<errbuf_size)
errbuf[needed-errcode]=hex2num[i%10]; /* coding it like this */
while(i/=10); /* avoids linking in sprintf() */
errbuf[(needed<=errbuf_size?needed:errbuf_size)-1]='\0';
}
}
return needed;
}
#endif /* RE_nERROR_REPORT */
/*
* It might be hard to believe, but I generally dislike conditionally
* compiled code (especially when it is done to get the code working
* on vastly distinct architectures).
*
* However, you might have noticed (I guess you could hardly have missed
* it :-) that I made an exception in this library. I did it because:
* 1. The conditional compilation constructs in this library have
* nothing to do with portability issues.
* 2. This seemed to be the only way to have a relatively compact
* library, yet easily customisable to your personal taste.
*
* But, I hope I never have to program/debug code like this in the
* future. It comes close to a programmer's worst nightmare :-).
*/