libproff/pattern.c

/* [<][>]
[^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following functions.
  1. domatch
  2. match
  3. ispattern
  4. matchExp

/* $Id: pattern.c,v 1.2 2002/03/24 13:18:18 proff Exp $

 *  Do shell-style pattern matching for ?, \, [], and * characters.
 *  Might not be robust in face of malformed patterns; e.g., "foo[a-"
 *  could cause a segmentation violation.  It is 8bit clean.
 *
 *  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
 *  Rich $alz is now <rsalz@osf.org>.
 *  April, 1991:  Replaced mutually-recursive calls with in-line code
 *  for the star character.
 *  March, 1996: Added some minor general case optimisations, and those
 *  necessary for 1996 arch/compilers and specified terminator's
 *  - Julian Assange <proff@suburbia.net>
 *  Jan 1998: More optimisations, groupMatch() - Proff
 *  
 *  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
 *  This can greatly speed up failing wildcard patterns.  For example:
 *      pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
 *      text 1:  -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
 *      text 2:  -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
 *  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
 *  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
 *  explanation is from Lars:
 *  The precondition that must be fulfilled is that DoMatch will consume
 *  at least one character in text.  This is true if *p is neither '*' nor
 *  '\0'.)  The last return has ABORT instead of FALSE to avoid quadratic
 *  behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx".  With
 *  FALSE, each star-loop has to run to the end of the text; with ABORT
 *  only the last one does.
 *
 *  Once the control of one instance of DoMatch enters the star-loop, that
 *  instance will return either TRUE or ABORT, and any calling instance
 *  will therefore return immediately after (without calling recursively
 *  again).  In effect, only one star-loop is ever active.  It would be
 *  possible to modify the code to maintain this context explicitly,
 *  eliminating all recursive calls at the cost of some complication and
 *  loss of clarity (and the ABORT stuff seems to be unclear enough by
 *  itself).  I think it would be unwise to try to get this into a
 *  released version unless you have a good test data base to try it out
 *  on.
 */

#include "libproff.h"

#include <string.h>
#include <ctype.h>

/* What character marks an inverted character class? */
#define NEGATE_CLASS            '^'
/* Is "*" a common pattern? */
/* Do tar(1) matching rules, which ignore a trailing slash? */
#undef MATCH_TAR_PATTERN

/*
 *  Match text and p, return TRUE, FALSE, or ABORT.
 */
static int domatch (unsigned char *text, unsigned char *p, bool f_case, unsigned char eol)
/* [<][>][^][v][top][bottom][index][help] */
{
        unsigned char last;
        bool matched;
        bool reverse;

        for (; *p; text++, p++)
        {
                if ((!*text || *text == eol) && *p != '*')
                        return ABORT;
                switch (*p)
                {
                case '\\':
                        /* Literal match with following character. */
                        p++;
                        /* FALLTHROUGH */
                default:
                        if ((f_case && (tolower (*text) != tolower(*p))) ||
                            *text != *p)
                                return FALSE;
                        continue;
                case '?':
                        /* Match anything. */
                        continue;
                case '*':
                        while (*++p == '*')
                                /* Consecutive stars act just like one. */
                                continue;
                        if (*p == '\0')
                                /* Trailing star matches everything. */
                                return TRUE;
                        while (*text)
                                if ((matched = domatch (text++, p, f_case, eol)) != FALSE)
                                        return matched;
                        return ABORT;
                case '[':
                        reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
                        if (reverse)
                                /* Inverted character class. */
                                p++;
                        matched = FALSE;
                        if (p[1] == ']' || p[1] == '-')
                                if (*++p == *text)
                                        matched = TRUE;
                        for (last = *p; *++p && *p != ']'; last = *p)
                                /* This next line requires a good C compiler. */
                                if (*p == '-' && p[1] != ']'
                                    ? *text <= *++p && *text >= last : *text == *p)
                                        matched = TRUE;
                        if (matched == reverse)
                                return FALSE;
                }
        }

#ifdef  MATCH_TAR_PATTERN
        if (*text == '/')
                return TRUE;
#endif /* MATCH_TAR_ATTERN */
        return *text == '\0' || *text == eol;
}

EXPORT bool match (char *p, char *text, bool f_case, char eol)
/* [<][>][^][v][top][bottom][index][help] */
{
        if (*p == '*')
        {
                if (!p[1])
                        return TRUE;
        } else
        {
                if (*p != *text && *p != '?')
                        return FALSE;
        }
        return domatch ((unsigned char *) text, (unsigned char *) p, f_case, (unsigned char )eol) == TRUE;
}

EXPORT bool ispattern (char *p)
/* [<][>][^][v][top][bottom][index][help] */
{
        if (strcspn(p, "*[]?")==strlen(p))
                return FALSE;
        else
                return TRUE;
}

EXPORT bool matchExp (char *p, char *text, bool f_case, char eol)
/* [<][>][^][v][top][bottom][index][help] */
{
        for (;*p;)
            {
              char c; /* IGNORE warning */
                bool not;
                char *p2;
                if (*p == '!')
                    {
                        not = TRUE;
                        p++;
                    }
                else
                    not = FALSE;
                p2 = strchr(p, ',');
                if (p2)
                    {
                        c = *p2;
                        *p2 = '\0';
                    }
                if (match(p, text, f_case, eol))
                    {
                        if (p2)
                            *p2 = c;
                        return !not;
                    }
                if (p2)
                    {
                        *p2 = c;
                        p = p2+1;
                    }
                else
                    break;
            }
        return FALSE;
}

/* [<][>][^][v][top][bottom][index][help] */