LCOV - trace.lcov_info_final - fuzz/pcre/pcre.c

LCOV - code coverage report

Current view:	top level - fuzz/pcre - pcre.c (source / functions)		Hit	Total	Coverage
Test:	trace.lcov_info_final	Lines:	0	1751	0.0 %
Date:	2021-02-22 04:51:02	Functions:	0	19	0.0 %

          Line data    Source code

       1             : /*************************************************
       2             : *      Perl-Compatible Regular Expressions       *
       3             : *************************************************/
       4             : 
       5             : /*
       6             : This is a library of functions to support regular expressions whose syntax
       7             : and semantics are as close as possible to those of the Perl 5 language. See
       8             : the file Tech.Notes for some information on the internals.
       9             : 
      10             : Written by: Philip Hazel <ph10@cam.ac.uk>
      11             : 
      12             :            Copyright (c) 1997-2000 University of Cambridge
      13             : 
      14             : -----------------------------------------------------------------------------
      15             : Permission is granted to anyone to use this software for any purpose on any
      16             : computer system, and to redistribute it freely, subject to the following
      17             : restrictions:
      18             : 
      19             : 1. This software is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
      22             : 
      23             : 2. The origin of this software must not be misrepresented, either by
      24             :    explicit claim or by omission.
      25             : 
      26             : 3. Altered versions must be plainly marked as such, and must not be
      27             :    misrepresented as being the original software.
      28             : 
      29             : 4. If PCRE is embedded in any software that is released under the GNU
      30             :    General Purpose Licence (GPL), then the terms of that licence shall
      31             :    supersede any condition above with which it is incompatible.
      32             : -----------------------------------------------------------------------------
      33             : */
      34             : 
      35             : 
      36             : /* Define DEBUG to get debugging output on stdout. */
      37             : 
      38             : /* #define DEBUG */
      39             : 
      40             : /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
      41             : inline, and there are *still* stupid compilers about that don't like indented
      42             : pre-processor statements. I suppose it's only been 10 years... */
      43             : 
      44             : #ifdef DEBUG
      45             : #define DPRINTF(p) printf p
      46             : #else
      47             : #define DPRINTF(p) /*nothing*/
      48             : #endif
      49             : 
      50             : /* Include the internals header, which itself includes Standard C headers plus
      51             : the external pcre header. */
      52             : 
      53             : #include "internal.h"
      54             : 
      55             : 
      56             : /* Allow compilation as C++ source code, should anybody want to do that. */
      57             : 
      58             : #ifdef __cplusplus
      59             : #define class pcre_class
      60             : #endif
      61             : 
      62             : 
      63             : /* Number of items on the nested bracket stacks at compile time. This should
      64             : not be set greater than 200. */
      65             : 
      66             : #define BRASTACK_SIZE 200
      67             : 
      68             : 
      69             : /* The number of bytes in a literal character string above which we can't add
      70             : any more is different when UTF-8 characters may be encountered. */
      71             : 
      72             : #ifdef SUPPORT_UTF8
      73             : #define MAXLIT 250
      74             : #else
      75             : #define MAXLIT 255
      76             : #endif
      77             : 
      78             : 
      79             : /* Min and max values for the common repeats; for the maxima, 0 => infinity */
      80             : 
      81             : static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
      82             : static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
      83             : 
      84             : /* Text forms of OP_ values and things, for debugging (not all used) */
      85             : 
      86             : #ifdef DEBUG
      87             : static const char *OP_names[] = {
      88             :   "End", "\\A", "\\B", "\\b", "\\D", "\\d",
      89             :   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
      90             :   "Opt", "^", "$", "Any", "chars", "not",
      91             :   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
      92             :   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
      93             :   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
      94             :   "*", "*?", "+", "+?", "?", "??", "{", "{",
      95             :   "class", "Ref", "Recurse",
      96             :   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
      97             :   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
      98             :   "Brazero", "Braminzero", "Bra"
      99             : };
     100             : #endif
     101             : 
     102             : /* Table for handling escaped characters in the range '0'-'z'. Positive returns
     103             : are simple data values; negative values are for special things like \d and so
     104             : on. Zero means further processing is needed (for things like \x), or the escape
     105             : is invalid. */
     106             : 
     107             : static const short int escapes[] = {
     108             :     0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
     109             :     0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
     110             :   '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
     111             :     0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */
     112             :     0,      0,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
     113             :     0,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
     114             :   '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
     115             :     0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
     116             :     0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
     117             :     0,      0, -ESC_z                                            /* x - z */
     118             : };
     119             : 
     120             : /* Tables of names of POSIX character classes and their lengths. The list is
     121             : terminated by a zero length entry. The first three must be alpha, upper, lower,
     122             : as this is assumed for handling case independence. */
     123             : 
     124             : static const char *posix_names[] = {
     125             :   "alpha", "lower", "upper",
     126             :   "alnum", "ascii", "cntrl", "digit", "graph",
     127             :   "print", "punct", "space", "word",  "xdigit" };
     128             : 
     129             : static const uschar posix_name_lengths[] = {
     130             :   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
     131             : 
     132             : /* Table of class bit maps for each POSIX class; up to three may be combined
     133             : to form the class. */
     134             : 
     135             : static const int posix_class_maps[] = {
     136             :   cbit_lower, cbit_upper, -1,             /* alpha */
     137             :   cbit_lower, -1,         -1,             /* lower */
     138             :   cbit_upper, -1,         -1,             /* upper */
     139             :   cbit_digit, cbit_lower, cbit_upper,     /* alnum */
     140             :   cbit_print, cbit_cntrl, -1,             /* ascii */
     141             :   cbit_cntrl, -1,         -1,             /* cntrl */
     142             :   cbit_digit, -1,         -1,             /* digit */
     143             :   cbit_graph, -1,         -1,             /* graph */
     144             :   cbit_print, -1,         -1,             /* print */
     145             :   cbit_punct, -1,         -1,             /* punct */
     146             :   cbit_space, -1,         -1,             /* space */
     147             :   cbit_word,  -1,         -1,             /* word */
     148             :   cbit_xdigit,-1,         -1              /* xdigit */
     149             : };
     150             : 
     151             : 
     152             : /* Definition to allow mutual recursion */
     153             : 
     154             : static BOOL
     155             :   compile_regex(int, int, int *, uschar **, const uschar **, const char **,
     156             :     BOOL, int, int *, int *, compile_data *);
     157             : 
     158             : /* Structure for building a chain of data that actually lives on the
     159             : stack, for holding the values of the subject pointer at the start of each
     160             : subpattern, so as to detect when an empty string has been matched by a
     161             : subpattern - to break infinite loops. */
     162             : 
     163             : typedef struct eptrblock {
     164             :   struct eptrblock *prev;
     165             :   const uschar *saved_eptr;
     166             : } eptrblock;
     167             : 
     168             : /* Flag bits for the match() function */
     169             : 
     170             : #define match_condassert   0x01    /* Called to check a condition assertion */
     171             : #define match_isgroup      0x02    /* Set if start of bracketed group */
     172             : 
     173             : 
     174             : 
     175             : /*************************************************
     176             : *               Global variables                 *
     177             : *************************************************/
     178             : 
     179             : /* PCRE is thread-clean and doesn't use any global variables in the normal
     180             : sense. However, it calls memory allocation and free functions via the two
     181             : indirections below, which are can be changed by the caller, but are shared
     182             : between all threads. */
     183             : 
     184             : void *(*pcre_malloc)(size_t) = malloc;
     185             : void  (*pcre_free)(void *) = free;
     186             : 
     187             : 
     188             : 
     189             : /*************************************************
     190             : *    Macros and tables for character handling    *
     191             : *************************************************/
     192             : 
     193             : /* When UTF-8 encoding is being used, a character is no longer just a single
     194             : byte. The macros for character handling generate simple sequences when used in
     195             : byte-mode, and more complicated ones for UTF-8 characters. */
     196             : 
     197             : #ifndef SUPPORT_UTF8
     198             : #define GETCHARINC(c, eptr) c = *eptr++;
     199             : #define GETCHARLEN(c, eptr, len) c = *eptr;
     200             : #define BACKCHAR(eptr)
     201             : 
     202             : #else   /* SUPPORT_UTF8 */
     203             : 
     204             : /* Get the next UTF-8 character, advancing the pointer */
     205             : 
     206             : #define GETCHARINC(c, eptr) \
     207             :   c = *eptr++; \
     208             :   if (md->utf8 && (c & 0xc0) == 0xc0) \
     209             :     { \
     210             :     int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
     211             :     int s = 6 - a;                  /* Amount to shift next byte */  \
     212             :     c &= utf8_table3[a];            /* Low order bits from first byte */ \
     213             :     while (a-- > 0) \
     214             :       { \
     215             :       c |= (*eptr++ & 0x3f) << s; \
     216             :       s += 6; \
     217             :       } \
     218             :     }
     219             : 
     220             : /* Get the next UTF-8 character, not advancing the pointer, setting length */
     221             : 
     222             : #define GETCHARLEN(c, eptr, len) \
     223             :   c = *eptr; \
     224             :   len = 1; \
     225             :   if (md->utf8 && (c & 0xc0) == 0xc0) \
     226             :     { \
     227             :     int i; \
     228             :     int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
     229             :     int s = 6 - a;                  /* Amount to shift next byte */  \
     230             :     c &= utf8_table3[a];            /* Low order bits from first byte */ \
     231             :     for (i = 1; i <= a; i++) \
     232             :       { \
     233             :       c |= (eptr[i] & 0x3f) << s; \
     234             :       s += 6; \
     235             :       } \
     236             :     len += a; \
     237             :     }
     238             : 
     239             : /* If the pointer is not at the start of a character, move it back until
     240             : it is. */
     241             : 
     242             : #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;
     243             : 
     244             : #endif
     245             : 
     246             : 
     247             : 
     248             : /*************************************************
     249             : *             Default character tables           *
     250             : *************************************************/
     251             : 
     252             : /* A default set of character tables is included in the PCRE binary. Its source
     253             : is built by the maketables auxiliary program, which uses the default C ctypes
     254             : functions, and put in the file chartables.c. These tables are used by PCRE
     255             : whenever the caller of pcre_compile() does not provide an alternate set of
     256             : tables. */
     257             : 
     258             : #include "chartables.c"
     259             : 
     260             : 
     261             : 
     262             : #ifdef SUPPORT_UTF8
     263             : /*************************************************
     264             : *           Tables for UTF-8 support             *
     265             : *************************************************/
     266             : 
     267             : /* These are the breakpoints for different numbers of bytes in a UTF-8
     268             : character. */
     269             : 
     270             : static int utf8_table1[] = { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
     271             : 
     272             : /* These are the indicator bits and the mask for the data bits to set in the
     273             : first byte of a character, indexed by the number of additional bytes. */
     274             : 
     275             : static int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
     276             : static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
     277             : 
     278             : /* Table of the number of extra characters, indexed by the first character
     279             : masked with 0x3f. The highest number for a valid UTF-8 character is in fact
     280             : 0x3d. */
     281             : 
     282             : static uschar utf8_table4[] = {
     283             :   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     284             :   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     285             :   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     286             :   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
     287             : 
     288             : 
     289             : /*************************************************
     290             : *       Convert character value to UTF-8         *
     291             : *************************************************/
     292             : 
     293             : /* This function takes an integer value in the range 0 - 0x7fffffff
     294             : and encodes it as a UTF-8 character in 0 to 6 bytes.
     295             : 
     296             : Arguments:
     297             :   cvalue     the character value
     298             :   buffer     pointer to buffer for result - at least 6 bytes long
     299             : 
     300             : Returns:     number of characters placed in the buffer
     301             : */
     302             : 
     303             : static int
     304             : ord2utf8(int cvalue, uschar *buffer)
     305             : {
     306             : register int i, j;
     307             : for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
     308             :   if (cvalue <= utf8_table1[i]) break;
     309             : *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
     310             : cvalue >>= 6 - i;
     311             : for (j = 0; j < i; j++)
     312             :   {
     313             :   *buffer++ = 0x80 | (cvalue & 0x3f);
     314             :   cvalue >>= 6;
     315             :   }
     316             : return i + 1;
     317             : }
     318             : #endif
     319             : 
     320             : 
     321             : 
     322             : /*************************************************
     323             : *          Return version string                 *
     324             : *************************************************/
     325             : 
     326             : #define STRING(a)  # a
     327             : #define XSTRING(s) STRING(s)
     328             : 
     329             : const char *
     330           0 : pcre_version(void)
     331             : {
     332           0 : return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
     333             : }
     334             : 
     335             : 
     336             : 
     337             : 
     338             : /*************************************************
     339             : * (Obsolete) Return info about compiled pattern  *
     340             : *************************************************/
     341             : 
     342             : /* This is the original "info" function. It picks potentially useful data out
     343             : of the private structure, but its interface was too rigid. It remains for
     344             : backwards compatibility. The public options are passed back in an int - though
     345             : the re->options field has been expanded to a long int, all the public options
     346             : at the low end of it, and so even on 16-bit systems this will still be OK.
     347             : Therefore, I haven't changed the API for pcre_info().
     348             : 
     349             : Arguments:
     350             :   external_re   points to compiled code
     351             :   optptr        where to pass back the options
     352             :   first_char    where to pass back the first character,
     353             :                 or -1 if multiline and all branches start ^,
     354             :                 or -2 otherwise
     355             : 
     356             : Returns:        number of capturing subpatterns
     357             :                 or negative values on error
     358             : */
     359             : 
     360             : int
     361           0 : pcre_info(const pcre *external_re, int *optptr, int *first_char)
     362             : {
     363           0 : const real_pcre *re = (const real_pcre *)external_re;
     364           0 : if (re == NULL) return PCRE_ERROR_NULL;
     365           0 : if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
     366           0 : if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
     367           0 : if (first_char != NULL)
     368           0 :   *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
     369           0 :      ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
     370           0 : return re->top_bracket;
     371             : }
     372             : 
     373             : 
     374             : 
     375             : /*************************************************
     376             : *        Return info about compiled pattern      *
     377             : *************************************************/
     378             : 
     379             : /* This is a newer "info" function which has an extensible interface so
     380             : that additional items can be added compatibly.
     381             : 
     382             : Arguments:
     383             :   external_re      points to compiled code
     384             :   external_study   points to study data, or NULL
     385             :   what             what information is required
     386             :   where            where to put the information
     387             : 
     388             : Returns:           0 if data returned, negative on error
     389             : */
     390             : 
     391             : int
     392           0 : pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,
     393             :   void *where)
     394             : {
     395           0 : const real_pcre *re = (const real_pcre *)external_re;
     396           0 : const real_pcre_extra *study = (const real_pcre_extra *)study_data;
     397             : 
     398           0 : if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
     399           0 : if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
     400             : 
     401           0 : switch (what)
     402             :   {
     403           0 :   case PCRE_INFO_OPTIONS:
     404           0 :   *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
     405           0 :   break;
     406             : 
     407           0 :   case PCRE_INFO_SIZE:
     408           0 :   *((size_t *)where) = re->size;
     409           0 :   break;
     410             : 
     411           0 :   case PCRE_INFO_CAPTURECOUNT:
     412           0 :   *((int *)where) = re->top_bracket;
     413           0 :   break;
     414             : 
     415           0 :   case PCRE_INFO_BACKREFMAX:
     416           0 :   *((int *)where) = re->top_backref;
     417           0 :   break;
     418             : 
     419           0 :   case PCRE_INFO_FIRSTCHAR:
     420           0 :   *((int *)where) =
     421           0 :     ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
     422           0 :     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
     423           0 :   break;
     424             : 
     425           0 :   case PCRE_INFO_FIRSTTABLE:
     426           0 :   *((const uschar **)where) =
     427           0 :     (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
     428           0 :       study->start_bits : NULL;
     429           0 :   break;
     430             : 
     431           0 :   case PCRE_INFO_LASTLITERAL:
     432           0 :   *((int *)where) =
     433           0 :     ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;
     434           0 :   break;
     435             : 
     436           0 :   default: return PCRE_ERROR_BADOPTION;
     437             :   }
     438             : 
     439           0 : return 0;
     440             : }
     441             : 
     442             : 
     443             : 
     444             : #ifdef DEBUG
     445             : /*************************************************
     446             : *        Debugging function to print chars       *
     447             : *************************************************/
     448             : 
     449             : /* Print a sequence of chars in printable format, stopping at the end of the
     450             : subject if the requested.
     451             : 
     452             : Arguments:
     453             :   p           points to characters
     454             :   length      number to print
     455             :   is_subject  TRUE if printing from within md->start_subject
     456             :   md          pointer to matching data block, if is_subject is TRUE
     457             : 
     458             : Returns:     nothing
     459             : */
     460             : 
     461             : static void
     462             : pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
     463             : {
     464             : int c;
     465             : if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
     466             : while (length-- > 0)
     467             :   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
     468             : }
     469             : #endif
     470             : 
     471             : 
     472             : 
     473             : 
     474             : /*************************************************
     475             : *            Handle escapes                      *
     476             : *************************************************/
     477             : 
     478             : /* This function is called when a \ has been encountered. It either returns a
     479             : positive value for a simple escape such as \n, or a negative value which
     480             : encodes one of the more complicated things such as \d. When UTF-8 is enabled,
     481             : a positive value greater than 255 may be returned. On entry, ptr is pointing at
     482             : the \. On exit, it is on the final character of the escape sequence.
     483             : 
     484             : Arguments:
     485             :   ptrptr     points to the pattern position pointer
     486             :   errorptr   points to the pointer to the error message
     487             :   bracount   number of previous extracting brackets
     488             :   options    the options bits
     489             :   isclass    TRUE if inside a character class
     490             :   cd         pointer to char tables block
     491             : 
     492             : Returns:     zero or positive => a data character
     493             :              negative => a special escape sequence
     494             :              on error, errorptr is set
     495             : */
     496             : 
     497             : static int
     498           0 : check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
     499             :   int options, BOOL isclass, compile_data *cd)
     500             : {
     501           0 : const uschar *ptr = *ptrptr;
     502             : int c, i;
     503             : 
     504             : /* If backslash is at the end of the pattern, it's an error. */
     505             : 
     506           0 : c = *(++ptr);
     507           0 : if (c == 0) *errorptr = ERR1;
     508             : 
     509             : /* Digits or letters may have special meaning; all others are literals. */
     510             : 
     511           0 : else if (c < '0' || c > 'z') {}
     512             : 
     513             : /* Do an initial lookup in a table. A non-zero result is something that can be
     514             : returned immediately. Otherwise further processing may be required. */
     515             : 
     516           0 : else if ((i = escapes[c - '0']) != 0) c = i;
     517             : 
     518             : /* Escapes that need further processing, or are illegal. */
     519             : 
     520             : else
     521             :   {
     522             :   const uschar *oldptr;
     523           0 :   switch (c)
     524             :     {
     525             :     /* The handling of escape sequences consisting of a string of digits
     526             :     starting with one that is not zero is not straightforward. By experiment,
     527             :     the way Perl works seems to be as follows:
     528             : 
     529             :     Outside a character class, the digits are read as a decimal number. If the
     530             :     number is less than 10, or if there are that many previous extracting
     531             :     left brackets, then it is a back reference. Otherwise, up to three octal
     532             :     digits are read to form an escaped byte. Thus \123 is likely to be octal
     533             :     123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
     534             :     value is greater than 377, the least significant 8 bits are taken. Inside a
     535             :     character class, \ followed by a digit is always an octal number. */
     536             : 
     537           0 :     case '1': case '2': case '3': case '4': case '5':
     538             :     case '6': case '7': case '8': case '9':
     539             : 
     540           0 :     if (!isclass)
     541             :       {
     542           0 :       oldptr = ptr;
     543           0 :       c -= '0';
     544           0 :       while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
     545           0 :         c = c * 10 + *(++ptr) - '0';
     546           0 :       if (c < 10 || c <= bracount)
     547             :         {
     548           0 :         c = -(ESC_REF + c);
     549           0 :         break;
     550             :         }
     551           0 :       ptr = oldptr;      /* Put the pointer back and fall through */
     552             :       }
     553             : 
     554             :     /* Handle an octal number following \. If the first digit is 8 or 9, Perl
     555             :     generates a binary zero byte and treats the digit as a following literal.
     556             :     Thus we have to pull back the pointer by one. */
     557             : 
     558           0 :     if ((c = *ptr) >= '8')
     559             :       {
     560           0 :       ptr--;
     561           0 :       c = 0;
     562           0 :       break;
     563             :       }
     564             : 
     565             :     /* \0 always starts an octal number, but we may drop through to here with a
     566             :     larger first octal digit. */
     567             : 
     568             :     case '0':
     569           0 :     c -= '0';
     570           0 :     while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
     571           0 :       ptr[1] != '8' && ptr[1] != '9')
     572           0 :         c = c * 8 + *(++ptr) - '0';
     573           0 :     c &= 255;     /* Take least significant 8 bits */
     574           0 :     break;
     575             : 
     576             :     /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number
     577             :     which can be greater than 0xff, but only if the ddd are hex digits. */
     578             : 
     579           0 :     case 'x':
     580             : #ifdef SUPPORT_UTF8
     581             :     if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)
     582             :       {
     583             :       const uschar *pt = ptr + 2;
     584             :       register int count = 0;
     585             :       c = 0;
     586             :       while ((cd->ctypes[*pt] & ctype_xdigit) != 0)
     587             :         {
     588             :         count++;
     589             :         c = c * 16 + cd->lcc[*pt] -
     590             :           (((cd->ctypes[*pt] & ctype_digit) != 0)? '0' : 'W');
     591             :         pt++;
     592             :         }
     593             :       if (*pt == '}')
     594             :         {
     595             :         if (c < 0 || count > 8) *errorptr = ERR34;
     596             :         ptr = pt;
     597             :         break;
     598             :         }
     599             :       /* If the sequence of hex digits does not end with '}', then we don't
     600             :       recognize this construct; fall through to the normal \x handling. */
     601             :       }
     602             : #endif
     603             : 
     604             :     /* Read just a single hex char */
     605             : 
     606           0 :     c = 0;
     607           0 :     while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
     608             :       {
     609           0 :       ptr++;
     610           0 :       c = c * 16 + cd->lcc[*ptr] -
     611           0 :         (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
     612             :       }
     613           0 :     break;
     614             : 
     615             :     /* Other special escapes not starting with a digit are straightforward */
     616             : 
     617           0 :     case 'c':
     618           0 :     c = *(++ptr);
     619           0 :     if (c == 0)
     620             :       {
     621           0 :       *errorptr = ERR2;
     622           0 :       return 0;
     623             :       }
     624             : 
     625             :     /* A letter is upper-cased; then the 0x40 bit is flipped */
     626             : 
     627           0 :     if (c >= 'a' && c <= 'z') c = cd->fcc[c];
     628           0 :     c ^= 0x40;
     629           0 :     break;
     630             : 
     631             :     /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
     632             :     other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
     633             :     for Perl compatibility, it is a literal. This code looks a bit odd, but
     634             :     there used to be some cases other than the default, and there may be again
     635             :     in future, so I haven't "optimized" it. */
     636             : 
     637           0 :     default:
     638           0 :     if ((options & PCRE_EXTRA) != 0) switch(c)
     639             :       {
     640           0 :       default:
     641           0 :       *errorptr = ERR3;
     642           0 :       break;
     643             :       }
     644           0 :     break;
     645             :     }
     646           0 :   }
     647             : 
     648           0 : *ptrptr = ptr;
     649           0 : return c;
     650             : }
     651             : 
     652             : 
     653             : 
     654             : /*************************************************
     655             : *            Check for counted repeat            *
     656             : *************************************************/
     657             : 
     658             : /* This function is called when a '{' is encountered in a place where it might
     659             : start a quantifier. It looks ahead to see if it really is a quantifier or not.
     660             : It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
     661             : where the ddds are digits.
     662             : 
     663             : Arguments:
     664             :   p         pointer to the first char after '{'
     665             :   cd        pointer to char tables block
     666             : 
     667             : Returns:    TRUE or FALSE
     668             : */
     669             : 
     670             : static BOOL
     671           0 : is_counted_repeat(const uschar *p, compile_data *cd)
     672             : {
     673           0 : if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
     674           0 : while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
     675           0 : if (*p == '}') return TRUE;
     676             : 
     677           0 : if (*p++ != ',') return FALSE;
     678           0 : if (*p == '}') return TRUE;
     679             : 
     680           0 : if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
     681           0 : while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
     682           0 : return (*p == '}');
     683             : }
     684             : 
     685             : 
     686             : 
     687             : /*************************************************
     688             : *         Read repeat counts                     *
     689             : *************************************************/
     690             : 
     691             : /* Read an item of the form {n,m} and return the values. This is called only
     692             : after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
     693             : so the syntax is guaranteed to be correct, but we need to check the values.
     694             : 
     695             : Arguments:
     696             :   p          pointer to first char after '{'
     697             :   minp       pointer to int for min
     698             :   maxp       pointer to int for max
     699             :              returned as -1 if no max
     700             :   errorptr   points to pointer to error message
     701             :   cd         pointer to character tables clock
     702             : 
     703             : Returns:     pointer to '}' on success;
     704             :              current ptr on error, with errorptr set
     705             : */
     706             : 
     707             : static const uschar *
     708           0 : read_repeat_counts(const uschar *p, int *minp, int *maxp,
     709             :   const char **errorptr, compile_data *cd)
     710             : {
     711           0 : int min = 0;
     712           0 : int max = -1;
     713             : 
     714           0 : while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
     715             : 
     716           0 : if (*p == '}') max = min; else
     717             :   {
     718           0 :   if (*(++p) != '}')
     719             :     {
     720           0 :     max = 0;
     721           0 :     while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
     722           0 :     if (max < min)
     723             :       {
     724           0 :       *errorptr = ERR4;
     725           0 :       return p;
     726             :       }
     727             :     }
     728             :   }
     729             : 
     730             : /* Do paranoid checks, then fill in the required variables, and pass back the
     731             : pointer to the terminating '}'. */
     732             : 
     733           0 : if (min < 0 || min > 65535 || max < -1 || max > 65535)
     734           0 :   *errorptr = ERR5;
     735             : else
     736             :   {
     737           0 :   *minp = min;
     738           0 :   *maxp = max;
     739             :   }
     740           0 : return p;
     741             : }
     742             : 
     743             : 
     744             : 
     745             : /*************************************************
     746             : *        Find the fixed length of a pattern      *
     747             : *************************************************/
     748             : 
     749             : /* Scan a pattern and compute the fixed length of subject that will match it,
     750             : if the length is fixed. This is needed for dealing with backward assertions.
     751             : 
     752             : Arguments:
     753             :   code     points to the start of the pattern (the bracket)
     754             :   options  the compiling options
     755             : 
     756             : Returns:   the fixed length, or -1 if there is no fixed length
     757             : */
     758             : 
     759             : static int
     760           0 : find_fixedlength(uschar *code, int options)
     761             : {
     762           0 : int length = -1;
     763             : 
     764           0 : register int branchlength = 0;
     765           0 : register uschar *cc = code + 3;
     766             : 
     767             : /* Scan along the opcodes for this branch. If we get to the end of the
     768             : branch, check the length against that of the other branches. */
     769             : 
     770             : for (;;)
     771           0 :   {
     772             :   int d;
     773           0 :   register int op = *cc;
     774           0 :   if (op >= OP_BRA) op = OP_BRA;
     775             : 
     776           0 :   switch (op)
     777             :     {
     778           0 :     case OP_BRA:
     779             :     case OP_ONCE:
     780             :     case OP_COND:
     781           0 :     d = find_fixedlength(cc, options);
     782           0 :     if (d < 0) return -1;
     783           0 :     branchlength += d;
     784           0 :     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
     785           0 :     cc += 3;
     786           0 :     break;
     787             : 
     788             :     /* Reached end of a branch; if it's a ket it is the end of a nested
     789             :     call. If it's ALT it is an alternation in a nested call. If it is
     790             :     END it's the end of the outer call. All can be handled by the same code. */
     791             : 
     792           0 :     case OP_ALT:
     793             :     case OP_KET:
     794             :     case OP_KETRMAX:
     795             :     case OP_KETRMIN:
     796             :     case OP_END:
     797           0 :     if (length < 0) length = branchlength;
     798           0 :       else if (length != branchlength) return -1;
     799           0 :     if (*cc != OP_ALT) return length;
     800           0 :     cc += 3;
     801           0 :     branchlength = 0;
     802           0 :     break;
     803             : 
     804             :     /* Skip over assertive subpatterns */
     805             : 
     806           0 :     case OP_ASSERT:
     807             :     case OP_ASSERT_NOT:
     808             :     case OP_ASSERTBACK:
     809             :     case OP_ASSERTBACK_NOT:
     810           0 :     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
     811           0 :     cc += 3;
     812           0 :     break;
     813             : 
     814             :     /* Skip over things that don't match chars */
     815             : 
     816           0 :     case OP_REVERSE:
     817           0 :     cc++;
     818             :     /* Fall through */
     819             : 
     820           0 :     case OP_CREF:
     821             :     case OP_OPT:
     822           0 :     cc++;
     823             :     /* Fall through */
     824             : 
     825           0 :     case OP_SOD:
     826             :     case OP_EOD:
     827             :     case OP_EODN:
     828             :     case OP_CIRC:
     829             :     case OP_DOLL:
     830             :     case OP_NOT_WORD_BOUNDARY:
     831             :     case OP_WORD_BOUNDARY:
     832           0 :     cc++;
     833           0 :     break;
     834             : 
     835             :     /* Handle char strings. In UTF-8 mode we must count characters, not bytes.
     836             :     This requires a scan of the string, unfortunately. We assume valid UTF-8
     837             :     strings, so all we do is reduce the length by one for byte whose bits are
     838             :     10xxxxxx. */
     839             : 
     840           0 :     case OP_CHARS:
     841           0 :     branchlength += *(++cc);
     842             : #ifdef SUPPORT_UTF8
     843             :     for (d = 1; d <= *cc; d++)
     844             :       if ((cc[d] & 0xc0) == 0x80) branchlength--;
     845             : #endif
     846           0 :     cc += *cc + 1;
     847           0 :     break;
     848             : 
     849             :     /* Handle exact repetitions */
     850             : 
     851           0 :     case OP_EXACT:
     852             :     case OP_TYPEEXACT:
     853           0 :     branchlength += (cc[1] << 8) + cc[2];
     854           0 :     cc += 4;
     855           0 :     break;
     856             : 
     857             :     /* Handle single-char matchers */
     858             : 
     859           0 :     case OP_NOT_DIGIT:
     860             :     case OP_DIGIT:
     861             :     case OP_NOT_WHITESPACE:
     862             :     case OP_WHITESPACE:
     863             :     case OP_NOT_WORDCHAR:
     864             :     case OP_WORDCHAR:
     865             :     case OP_ANY:
     866           0 :     branchlength++;
     867           0 :     cc++;
     868           0 :     break;
     869             : 
     870             : 
     871             :     /* Check a class for variable quantification */
     872             : 
     873           0 :     case OP_CLASS:
     874           0 :     cc += (*cc == OP_REF)? 2 : 33;
     875             : 
     876           0 :     switch (*cc)
     877             :       {
     878           0 :       case OP_CRSTAR:
     879             :       case OP_CRMINSTAR:
     880             :       case OP_CRQUERY:
     881             :       case OP_CRMINQUERY:
     882           0 :       return -1;
     883             : 
     884           0 :       case OP_CRRANGE:
     885             :       case OP_CRMINRANGE:
     886           0 :       if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
     887           0 :       branchlength += (cc[1] << 8) + cc[2];
     888           0 :       cc += 5;
     889           0 :       break;
     890             : 
     891           0 :       default:
     892           0 :       branchlength++;
     893             :       }
     894           0 :     break;
     895             : 
     896             :     /* Anything else is variable length */
     897             : 
     898           0 :     default:
     899           0 :     return -1;
     900             :     }
     901             :   }
     902             : /* Control never gets here */
     903             : }
     904             : 
     905             : 
     906             : 
     907             : 
     908             : /*************************************************
     909             : *           Check for POSIX class syntax         *
     910             : *************************************************/
     911             : 
     912             : /* This function is called when the sequence "[:" or "[." or "[=" is
     913             : encountered in a character class. It checks whether this is followed by an
     914             : optional ^ and then a sequence of letters, terminated by a matching ":]" or
     915             : ".]" or "=]".
     916             : 
     917             : Argument:
     918             :   ptr      pointer to the initial [
     919             :   endptr   where to return the end pointer
     920             :   cd       pointer to compile data
     921             : 
     922             : Returns:   TRUE or FALSE
     923             : */
     924             : 
     925             : static BOOL
     926           0 : check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
     927             : {
     928             : int terminator;          /* Don't combine these lines; the Solaris cc */
     929           0 : terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
     930           0 : if (*(++ptr) == '^') ptr++;
     931           0 : while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
     932           0 : if (*ptr == terminator && ptr[1] == ']')
     933             :   {
     934           0 :   *endptr = ptr;
     935           0 :   return TRUE;
     936             :   }
     937           0 : return FALSE;
     938             : }
     939             : 
     940             : 
     941             : 
     942             : 
     943             : /*************************************************
     944             : *          Check POSIX class name                *
     945             : *************************************************/
     946             : 
     947             : /* This function is called to check the name given in a POSIX-style class entry
     948             : such as [:alnum:].
     949             : 
     950             : Arguments:
     951             :   ptr        points to the first letter
     952             :   len        the length of the name
     953             : 
     954             : Returns:     a value representing the name, or -1 if unknown
     955             : */
     956             : 
     957             : static int
     958           0 : check_posix_name(const uschar *ptr, int len)
     959             : {
     960           0 : register int yield = 0;
     961           0 : while (posix_name_lengths[yield] != 0)
     962             :   {
     963           0 :   if (len == posix_name_lengths[yield] &&
     964           0 :     strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
     965           0 :   yield++;
     966             :   }
     967           0 : return -1;
     968             : }
     969             : 
     970             : 
     971             : 
     972             : 
     973             : /*************************************************
     974             : *           Compile one branch                   *
     975             : *************************************************/
     976             : 
     977             : /* Scan the pattern, compiling it into the code vector.
     978             : 
     979             : Arguments:
     980             :   options      the option bits
     981             :   brackets     points to number of brackets used
     982             :   code         points to the pointer to the current code point
     983             :   ptrptr       points to the current pattern pointer
     984             :   errorptr     points to pointer to error message
     985             :   optchanged   set to the value of the last OP_OPT item compiled
     986             :   reqchar      set to the last literal character required, else -1
     987             :   countlits    set to count of mandatory literal characters
     988             :   cd           contains pointers to tables
     989             : 
     990             : Returns:       TRUE on success
     991             :                FALSE, with *errorptr set on error
     992             : */
     993             : 
     994             : static BOOL
     995           0 : compile_branch(int options, int *brackets, uschar **codeptr,
     996             :   const uschar **ptrptr, const char **errorptr, int *optchanged,
     997             :   int *reqchar, int *countlits, compile_data *cd)
     998             : {
     999             : int repeat_type, op_type;
    1000             : int repeat_min, repeat_max;
    1001             : int bravalue, length;
    1002             : int greedy_default, greedy_non_default;
    1003             : int prevreqchar;
    1004           0 : int condcount = 0;
    1005           0 : int subcountlits = 0;
    1006             : register int c;
    1007           0 : register uschar *code = *codeptr;
    1008             : uschar *tempcode;
    1009           0 : const uschar *ptr = *ptrptr;
    1010             : const uschar *tempptr;
    1011           0 : uschar *previous = NULL;
    1012             : uschar class[32];
    1013             : 
    1014             : /* Set up the default and non-default settings for greediness */
    1015             : 
    1016           0 : greedy_default = ((options & PCRE_UNGREEDY) != 0);
    1017           0 : greedy_non_default = greedy_default ^ 1;
    1018             : 
    1019             : /* Initialize no required char, and count of literals */
    1020             : 
    1021           0 : *reqchar = prevreqchar = -1;
    1022           0 : *countlits = 0;
    1023             : 
    1024             : /* Switch on next character until the end of the branch */
    1025             : 
    1026           0 : for (;; ptr++)
    1027           0 :   {
    1028             :   BOOL negate_class;
    1029             :   int class_charcount;
    1030             :   int class_lastchar;
    1031             :   int newoptions;
    1032             :   int condref;
    1033             :   int subreqchar;
    1034             : 
    1035           0 :   c = *ptr;
    1036           0 :   if ((options & PCRE_EXTENDED) != 0)
    1037             :     {
    1038           0 :     if ((cd->ctypes[c] & ctype_space) != 0) continue;
    1039           0 :     if (c == '#')
    1040             :       {
    1041             :       /* The space before the ; is to avoid a warning on a silly compiler
    1042             :       on the Macintosh. */
    1043           0 :       while ((c = *(++ptr)) != 0 && c != '\n') ;
    1044           0 :       continue;
    1045             :       }
    1046             :     }
    1047             : 
    1048           0 :   switch(c)
    1049             :     {
    1050             :     /* The branch terminates at end of string, |, or ). */
    1051             : 
    1052           0 :     case 0:
    1053             :     case '|':
    1054             :     case ')':
    1055           0 :     *codeptr = code;
    1056           0 :     *ptrptr = ptr;
    1057           0 :     return TRUE;
    1058             : 
    1059             :     /* Handle single-character metacharacters */
    1060             : 
    1061           0 :     case '^':
    1062           0 :     previous = NULL;
    1063           0 :     *code++ = OP_CIRC;
    1064           0 :     break;
    1065             : 
    1066           0 :     case '$':
    1067           0 :     previous = NULL;
    1068           0 :     *code++ = OP_DOLL;
    1069           0 :     break;
    1070             : 
    1071           0 :     case '.':
    1072           0 :     previous = code;
    1073           0 :     *code++ = OP_ANY;
    1074           0 :     break;
    1075             : 
    1076             :     /* Character classes. These always build a 32-byte bitmap of the permitted
    1077             :     characters, except in the special case where there is only one character.
    1078             :     For negated classes, we build the map as usual, then invert it at the end.
    1079             :     */
    1080             : 
    1081           0 :     case '[':
    1082           0 :     previous = code;
    1083           0 :     *code++ = OP_CLASS;
    1084             : 
    1085             :     /* If the first character is '^', set the negation flag and skip it. */
    1086             : 
    1087           0 :     if ((c = *(++ptr)) == '^')
    1088             :       {
    1089           0 :       negate_class = TRUE;
    1090           0 :       c = *(++ptr);
    1091             :       }
    1092           0 :     else negate_class = FALSE;
    1093             : 
    1094             :     /* Keep a count of chars so that we can optimize the case of just a single
    1095             :     character. */
    1096             : 
    1097           0 :     class_charcount = 0;
    1098           0 :     class_lastchar = -1;
    1099             : 
    1100             :     /* Initialize the 32-char bit map to all zeros. We have to build the
    1101             :     map in a temporary bit of store, in case the class contains only 1
    1102             :     character, because in that case the compiled code doesn't use the
    1103             :     bit map. */
    1104             : 
    1105           0 :     memset(class, 0, 32 * sizeof(uschar));
    1106             : 
    1107             :     /* Process characters until ] is reached. By writing this as a "do" it
    1108             :     means that an initial ] is taken as a data character. */
    1109             : 
    1110             :     do
    1111             :       {
    1112           0 :       if (c == 0)
    1113             :         {
    1114           0 :         *errorptr = ERR6;
    1115           0 :         goto FAILED;
    1116             :         }
    1117             : 
    1118             :       /* Handle POSIX class names. Perl allows a negation extension of the
    1119             :       form [:^name]. A square bracket that doesn't match the syntax is
    1120             :       treated as a literal. We also recognize the POSIX constructions
    1121             :       [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
    1122             :       5.6 does. */
    1123             : 
    1124           0 :       if (c == '[' &&
    1125           0 :           (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
    1126           0 :           check_posix_syntax(ptr, &tempptr, cd))
    1127             :         {
    1128           0 :         BOOL local_negate = FALSE;
    1129             :         int posix_class, i;
    1130           0 :         register const uschar *cbits = cd->cbits;
    1131             : 
    1132           0 :         if (ptr[1] != ':')
    1133             :           {
    1134           0 :           *errorptr = ERR31;
    1135           0 :           goto FAILED;
    1136             :           }
    1137             : 
    1138           0 :         ptr += 2;
    1139           0 :         if (*ptr == '^')
    1140             :           {
    1141           0 :           local_negate = TRUE;
    1142           0 :           ptr++;
    1143             :           }
    1144             : 
    1145           0 :         posix_class = check_posix_name(ptr, tempptr - ptr);
    1146           0 :         if (posix_class < 0)
    1147             :           {
    1148           0 :           *errorptr = ERR30;
    1149           0 :           goto FAILED;
    1150             :           }
    1151             : 
    1152             :         /* If matching is caseless, upper and lower are converted to
    1153             :         alpha. This relies on the fact that the class table starts with
    1154             :         alpha, lower, upper as the first 3 entries. */
    1155             : 
    1156           0 :         if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
    1157           0 :           posix_class = 0;
    1158             : 
    1159             :         /* Or into the map we are building up to 3 of the static class
    1160             :         tables, or their negations. */
    1161             : 
    1162           0 :         posix_class *= 3;
    1163           0 :         for (i = 0; i < 3; i++)
    1164             :           {
    1165           0 :           int taboffset = posix_class_maps[posix_class + i];
    1166           0 :           if (taboffset < 0) break;
    1167           0 :           if (local_negate)
    1168           0 :             for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset];
    1169             :           else
    1170           0 :             for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset];
    1171             :           }
    1172             : 
    1173           0 :         ptr = tempptr + 1;
    1174           0 :         class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
    1175           0 :         continue;
    1176             :         }
    1177             : 
    1178             :       /* Backslash may introduce a single character, or it may introduce one
    1179             :       of the specials, which just set a flag. Escaped items are checked for
    1180             :       validity in the pre-compiling pass. The sequence \b is a special case.
    1181             :       Inside a class (and only there) it is treated as backspace. Elsewhere
    1182             :       it marks a word boundary. Other escapes have preset maps ready to
    1183             :       or into the one we are building. We assume they have more than one
    1184             :       character in them, so set class_count bigger than one. */
    1185             : 
    1186           0 :       if (c == '\\')
    1187             :         {
    1188           0 :         c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
    1189           0 :         if (-c == ESC_b) c = '\b';
    1190           0 :         else if (c < 0)
    1191             :           {
    1192           0 :           register const uschar *cbits = cd->cbits;
    1193           0 :           class_charcount = 10;
    1194           0 :           switch (-c)
    1195             :             {
    1196           0 :             case ESC_d:
    1197           0 :             for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
    1198           0 :             continue;
    1199             : 
    1200           0 :             case ESC_D:
    1201           0 :             for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
    1202           0 :             continue;
    1203             : 
    1204           0 :             case ESC_w:
    1205           0 :             for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word];
    1206           0 :             continue;
    1207             : 
    1208           0 :             case ESC_W:
    1209           0 :             for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word];
    1210           0 :             continue;
    1211             : 
    1212           0 :             case ESC_s:
    1213           0 :             for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
    1214           0 :             continue;
    1215             : 
    1216           0 :             case ESC_S:
    1217           0 :             for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
    1218           0 :             continue;
    1219             : 
    1220           0 :             default:
    1221           0 :             *errorptr = ERR7;
    1222           0 :             goto FAILED;
    1223             :             }
    1224             :           }
    1225             : 
    1226             :         /* Fall through if single character, but don't at present allow
    1227             :         chars > 255 in UTF-8 mode. */
    1228             : 
    1229             : #ifdef SUPPORT_UTF8
    1230             :         if (c > 255)
    1231             :           {
    1232             :           *errorptr = ERR33;
    1233             :           goto FAILED;
    1234             :           }
    1235             : #endif
    1236             :         }
    1237             : 
    1238             :       /* A single character may be followed by '-' to form a range. However,
    1239             :       Perl does not permit ']' to be the end of the range. A '-' character
    1240             :       here is treated as a literal. */
    1241             : 
    1242           0 :       if (ptr[1] == '-' && ptr[2] != ']')
    1243             :         {
    1244             :         int d;
    1245           0 :         ptr += 2;
    1246           0 :         d = *ptr;
    1247             : 
    1248           0 :         if (d == 0)
    1249             :           {
    1250           0 :           *errorptr = ERR6;
    1251           0 :           goto FAILED;
    1252             :           }
    1253             : 
    1254             :         /* The second part of a range can be a single-character escape, but
    1255             :         not any of the other escapes. Perl 5.6 treats a hyphen as a literal
    1256             :         in such circumstances. */
    1257             : 
    1258           0 :         if (d == '\\')
    1259             :           {
    1260           0 :           const uschar *oldptr = ptr;
    1261           0 :           d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
    1262             : 
    1263             : #ifdef SUPPORT_UTF8
    1264             :           if (d > 255)
    1265             :             {
    1266             :             *errorptr = ERR33;
    1267             :             goto FAILED;
    1268             :             }
    1269             : #endif
    1270             :           /* \b is backslash; any other special means the '-' was literal */
    1271             : 
    1272           0 :           if (d < 0)
    1273             :             {
    1274           0 :             if (d == -ESC_b) d = '\b'; else
    1275             :               {
    1276           0 :               ptr = oldptr - 2;
    1277           0 :               goto SINGLE_CHARACTER;  /* A few lines below */
    1278             :               }
    1279             :             }
    1280             :           }
    1281             : 
    1282           0 :         if (d < c)
    1283             :           {
    1284           0 :           *errorptr = ERR8;
    1285           0 :           goto FAILED;
    1286             :           }
    1287             : 
    1288           0 :         for (; c <= d; c++)
    1289             :           {
    1290           0 :           class[c/8] |= (1 << (c&7));
    1291           0 :           if ((options & PCRE_CASELESS) != 0)
    1292             :             {
    1293           0 :             int uc = cd->fcc[c];           /* flip case */
    1294           0 :             class[uc/8] |= (1 << (uc&7));
    1295             :             }
    1296           0 :           class_charcount++;                /* in case a one-char range */
    1297           0 :           class_lastchar = c;
    1298             :           }
    1299           0 :         continue;   /* Go get the next char in the class */
    1300             :         }
    1301             : 
    1302             :       /* Handle a lone single character - we can get here for a normal
    1303             :       non-escape char, or after \ that introduces a single character. */
    1304             : 
    1305           0 :       SINGLE_CHARACTER:
    1306             : 
    1307           0 :       class [c/8] |= (1 << (c&7));
    1308           0 :       if ((options & PCRE_CASELESS) != 0)
    1309             :         {
    1310           0 :         c = cd->fcc[c];   /* flip case */
    1311           0 :         class[c/8] |= (1 << (c&7));
    1312             :         }
    1313           0 :       class_charcount++;
    1314           0 :       class_lastchar = c;
    1315             :       }
    1316             : 
    1317             :     /* Loop until ']' reached; the check for end of string happens inside the
    1318             :     loop. This "while" is the end of the "do" above. */
    1319             : 
    1320           0 :     while ((c = *(++ptr)) != ']');
    1321             : 
    1322             :     /* If class_charcount is 1 and class_lastchar is not negative, we saw
    1323             :     precisely one character. This doesn't need the whole 32-byte bit map.
    1324             :     We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
    1325             :     it's negative. */
    1326             : 
    1327           0 :     if (class_charcount == 1 && class_lastchar >= 0)
    1328             :       {
    1329           0 :       if (negate_class)
    1330             :         {
    1331           0 :         code[-1] = OP_NOT;
    1332             :         }
    1333             :       else
    1334             :         {
    1335           0 :         code[-1] = OP_CHARS;
    1336           0 :         *code++ = 1;
    1337             :         }
    1338           0 :       *code++ = class_lastchar;
    1339             :       }
    1340             : 
    1341             :     /* Otherwise, negate the 32-byte map if necessary, and copy it into
    1342             :     the code vector. */
    1343             : 
    1344             :     else
    1345             :       {
    1346           0 :       if (negate_class)
    1347           0 :         for (c = 0; c < 32; c++) code[c] = ~class[c];
    1348             :       else
    1349           0 :         memcpy(code, class, 32);
    1350           0 :       code += 32;
    1351             :       }
    1352           0 :     break;
    1353             : 
    1354             :     /* Various kinds of repeat */
    1355             : 
    1356           0 :     case '{':
    1357           0 :     if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
    1358           0 :     ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
    1359           0 :     if (*errorptr != NULL) goto FAILED;
    1360           0 :     goto REPEAT;
    1361             : 
    1362           0 :     case '*':
    1363           0 :     repeat_min = 0;
    1364           0 :     repeat_max = -1;
    1365           0 :     goto REPEAT;
    1366             : 
    1367           0 :     case '+':
    1368           0 :     repeat_min = 1;
    1369           0 :     repeat_max = -1;
    1370           0 :     goto REPEAT;
    1371             : 
    1372           0 :     case '?':
    1373           0 :     repeat_min = 0;
    1374           0 :     repeat_max = 1;
    1375             : 
    1376           0 :     REPEAT:
    1377           0 :     if (previous == NULL)
    1378             :       {
    1379           0 :       *errorptr = ERR9;
    1380           0 :       goto FAILED;
    1381             :       }
    1382             : 
    1383             :     /* If the next character is '?' this is a minimizing repeat, by default,
    1384             :     but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
    1385             :     next character. */
    1386             : 
    1387           0 :     if (ptr[1] == '?')
    1388           0 :       { repeat_type = greedy_non_default; ptr++; }
    1389           0 :     else repeat_type = greedy_default;
    1390             : 
    1391             :     /* If previous was a string of characters, chop off the last one and use it
    1392             :     as the subject of the repeat. If there was only one character, we can
    1393             :     abolish the previous item altogether. A repeat with a zero minimum wipes
    1394             :     out any reqchar setting, backing up to the previous value. We must also
    1395             :     adjust the countlits value. */
    1396             : 
    1397           0 :     if (*previous == OP_CHARS)
    1398             :       {
    1399           0 :       int len = previous[1];
    1400             : 
    1401           0 :       if (repeat_min == 0) *reqchar = prevreqchar;
    1402           0 :       *countlits += repeat_min - 1;
    1403             : 
    1404           0 :       if (len == 1)
    1405             :         {
    1406           0 :         c = previous[2];
    1407           0 :         code = previous;
    1408             :         }
    1409             :       else
    1410             :         {
    1411           0 :         c = previous[len+1];
    1412           0 :         previous[1]--;
    1413           0 :         code--;
    1414             :         }
    1415           0 :       op_type = 0;                 /* Use single-char op codes */
    1416           0 :       goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
    1417             :       }
    1418             : 
    1419             :     /* If previous was a single negated character ([^a] or similar), we use
    1420             :     one of the special opcodes, replacing it. The code is shared with single-
    1421             :     character repeats by adding a suitable offset into repeat_type. */
    1422             : 
    1423           0 :     else if ((int)*previous == OP_NOT)
    1424             :       {
    1425           0 :       op_type = OP_NOTSTAR - OP_STAR;  /* Use "not" opcodes */
    1426           0 :       c = previous[1];
    1427           0 :       code = previous;
    1428           0 :       goto OUTPUT_SINGLE_REPEAT;
    1429             :       }
    1430             : 
    1431             :     /* If previous was a character type match (\d or similar), abolish it and
    1432             :     create a suitable repeat item. The code is shared with single-character
    1433             :     repeats by adding a suitable offset into repeat_type. */
    1434             : 
    1435           0 :     else if ((int)*previous < OP_EODN || *previous == OP_ANY)
    1436             :       {
    1437           0 :       op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
    1438           0 :       c = *previous;
    1439           0 :       code = previous;
    1440             : 
    1441           0 :       OUTPUT_SINGLE_REPEAT:
    1442             : 
    1443             :       /* If the maximum is zero then the minimum must also be zero; Perl allows
    1444             :       this case, so we do too - by simply omitting the item altogether. */
    1445             : 
    1446           0 :       if (repeat_max == 0) goto END_REPEAT;
    1447             : 
    1448             :       /* Combine the op_type with the repeat_type */
    1449             : 
    1450           0 :       repeat_type += op_type;
    1451             : 
    1452             :       /* A minimum of zero is handled either as the special case * or ?, or as
    1453             :       an UPTO, with the maximum given. */
    1454             : 
    1455           0 :       if (repeat_min == 0)
    1456             :         {
    1457           0 :         if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
    1458           0 :           else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
    1459             :         else
    1460             :           {
    1461           0 :           *code++ = OP_UPTO + repeat_type;
    1462           0 :           *code++ = repeat_max >> 8;
    1463           0 :           *code++ = (repeat_max & 255);
    1464             :           }
    1465             :         }
    1466             : 
    1467             :       /* The case {1,} is handled as the special case + */
    1468             : 
    1469           0 :       else if (repeat_min == 1 && repeat_max == -1)
    1470           0 :         *code++ = OP_PLUS + repeat_type;
    1471             : 
    1472             :       /* The case {n,n} is just an EXACT, while the general case {n,m} is
    1473             :       handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
    1474             : 
    1475             :       else
    1476             :         {
    1477           0 :         if (repeat_min != 1)
    1478             :           {
    1479           0 :           *code++ = OP_EXACT + op_type;  /* NB EXACT doesn't have repeat_type */
    1480           0 :           *code++ = repeat_min >> 8;
    1481           0 :           *code++ = (repeat_min & 255);
    1482             :           }
    1483             : 
    1484             :         /* If the mininum is 1 and the previous item was a character string,
    1485             :         we either have to put back the item that got cancelled if the string
    1486             :         length was 1, or add the character back onto the end of a longer
    1487             :         string. For a character type nothing need be done; it will just get
    1488             :         put back naturally. Note that the final character is always going to
    1489             :         get added below. */
    1490             : 
    1491           0 :         else if (*previous == OP_CHARS)
    1492             :           {
    1493           0 :           if (code == previous) code += 2; else previous[1]++;
    1494             :           }
    1495             : 
    1496             :         /*  For a single negated character we also have to put back the
    1497             :         item that got cancelled. */
    1498             : 
    1499           0 :         else if (*previous == OP_NOT) code++;
    1500             : 
    1501             :         /* If the maximum is unlimited, insert an OP_STAR. */
    1502             : 
    1503           0 :         if (repeat_max < 0)
    1504             :           {
    1505           0 :           *code++ = c;
    1506           0 :           *code++ = OP_STAR + repeat_type;
    1507             :           }
    1508             : 
    1509             :         /* Else insert an UPTO if the max is greater than the min. */
    1510             : 
    1511           0 :         else if (repeat_max != repeat_min)
    1512             :           {
    1513           0 :           *code++ = c;
    1514           0 :           repeat_max -= repeat_min;
    1515           0 :           *code++ = OP_UPTO + repeat_type;
    1516           0 :           *code++ = repeat_max >> 8;
    1517           0 :           *code++ = (repeat_max & 255);
    1518             :           }
    1519             :         }
    1520             : 
    1521             :       /* The character or character type itself comes last in all cases. */
    1522             : 
    1523           0 :       *code++ = c;
    1524             :       }
    1525             : 
    1526             :     /* If previous was a character class or a back reference, we put the repeat
    1527             :     stuff after it, but just skip the item if the repeat was {0,0}. */
    1528             : 
    1529           0 :     else if (*previous == OP_CLASS || *previous == OP_REF)
    1530             :       {
    1531           0 :       if (repeat_max == 0)
    1532             :         {
    1533           0 :         code = previous;
    1534           0 :         goto END_REPEAT;
    1535             :         }
    1536           0 :       if (repeat_min == 0 && repeat_max == -1)
    1537           0 :         *code++ = OP_CRSTAR + repeat_type;
    1538           0 :       else if (repeat_min == 1 && repeat_max == -1)
    1539           0 :         *code++ = OP_CRPLUS + repeat_type;
    1540           0 :       else if (repeat_min == 0 && repeat_max == 1)
    1541           0 :         *code++ = OP_CRQUERY + repeat_type;
    1542             :       else
    1543             :         {
    1544           0 :         *code++ = OP_CRRANGE + repeat_type;
    1545           0 :         *code++ = repeat_min >> 8;
    1546           0 :         *code++ = repeat_min & 255;
    1547           0 :         if (repeat_max == -1) repeat_max = 0;  /* 2-byte encoding for max */
    1548           0 :         *code++ = repeat_max >> 8;
    1549           0 :         *code++ = repeat_max & 255;
    1550             :         }
    1551             :       }
    1552             : 
    1553             :     /* If previous was a bracket group, we may have to replicate it in certain
    1554             :     cases. */
    1555             : 
    1556           0 :     else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
    1557           0 :              (int)*previous == OP_COND)
    1558           0 :       {
    1559             :       register int i;
    1560           0 :       int ketoffset = 0;
    1561           0 :       int len = code - previous;
    1562           0 :       uschar *bralink = NULL;
    1563             : 
    1564             :       /* If the maximum repeat count is unlimited, find the end of the bracket
    1565             :       by scanning through from the start, and compute the offset back to it
    1566             :       from the current code pointer. There may be an OP_OPT setting following
    1567             :       the final KET, so we can't find the end just by going back from the code
    1568             :       pointer. */
    1569             : 
    1570           0 :       if (repeat_max == -1)
    1571             :         {
    1572           0 :         register uschar *ket = previous;
    1573           0 :         do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
    1574           0 :         ketoffset = code - ket;
    1575             :         }
    1576             : 
    1577             :       /* The case of a zero minimum is special because of the need to stick
    1578             :       OP_BRAZERO in front of it, and because the group appears once in the
    1579             :       data, whereas in other cases it appears the minimum number of times. For
    1580             :       this reason, it is simplest to treat this case separately, as otherwise
    1581             :       the code gets far too mess. There are several special subcases when the
    1582             :       minimum is zero. */
    1583             : 
    1584           0 :       if (repeat_min == 0)
    1585             :         {
    1586             :         /* If we set up a required char from the bracket, we must back off
    1587             :         to the previous value and reset the countlits value too. */
    1588             : 
    1589           0 :         if (subcountlits > 0)
    1590             :           {
    1591           0 :           *reqchar = prevreqchar;
    1592           0 :           *countlits -= subcountlits;
    1593             :           }
    1594             : 
    1595             :         /* If the maximum is also zero, we just omit the group from the output
    1596             :         altogether. */
    1597             : 
    1598           0 :         if (repeat_max == 0)
    1599             :           {
    1600           0 :           code = previous;
    1601           0 :           goto END_REPEAT;
    1602             :           }
    1603             : 
    1604             :         /* If the maximum is 1 or unlimited, we just have to stick in the
    1605             :         BRAZERO and do no more at this point. */
    1606             : 
    1607           0 :         if (repeat_max <= 1)
    1608             :           {
    1609           0 :           memmove(previous+1, previous, len);
    1610           0 :           code++;
    1611           0 :           *previous++ = OP_BRAZERO + repeat_type;
    1612             :           }
    1613             : 
    1614             :         /* If the maximum is greater than 1 and limited, we have to replicate
    1615             :         in a nested fashion, sticking OP_BRAZERO before each set of brackets.
    1616             :         The first one has to be handled carefully because it's the original
    1617             :         copy, which has to be moved up. The remainder can be handled by code
    1618             :         that is common with the non-zero minimum case below. We just have to
    1619             :         adjust the value or repeat_max, since one less copy is required. */
    1620             : 
    1621             :         else
    1622             :           {
    1623             :           int offset;
    1624           0 :           memmove(previous+4, previous, len);
    1625           0 :           code += 4;
    1626           0 :           *previous++ = OP_BRAZERO + repeat_type;
    1627           0 :           *previous++ = OP_BRA;
    1628             : 
    1629             :           /* We chain together the bracket offset fields that have to be
    1630             :           filled in later when the ends of the brackets are reached. */
    1631             : 
    1632           0 :           offset = (bralink == NULL)? 0 : previous - bralink;
    1633           0 :           bralink = previous;
    1634           0 :           *previous++ = offset >> 8;
    1635           0 :           *previous++ = offset & 255;
    1636             :           }
    1637             : 
    1638           0 :         repeat_max--;
    1639             :         }
    1640             : 
    1641             :       /* If the minimum is greater than zero, replicate the group as many
    1642             :       times as necessary, and adjust the maximum to the number of subsequent
    1643             :       copies that we need. */
    1644             : 
    1645             :       else
    1646             :         {
    1647           0 :         for (i = 1; i < repeat_min; i++)
    1648             :           {
    1649           0 :           memcpy(code, previous, len);
    1650           0 :           code += len;
    1651             :           }
    1652           0 :         if (repeat_max > 0) repeat_max -= repeat_min;
    1653             :         }
    1654             : 
    1655             :       /* This code is common to both the zero and non-zero minimum cases. If
    1656             :       the maximum is limited, it replicates the group in a nested fashion,
    1657             :       remembering the bracket starts on a stack. In the case of a zero minimum,
    1658             :       the first one was set up above. In all cases the repeat_max now specifies
    1659             :       the number of additional copies needed. */
    1660             : 
    1661           0 :       if (repeat_max >= 0)
    1662             :         {
    1663           0 :         for (i = repeat_max - 1; i >= 0; i--)
    1664             :           {
    1665           0 :           *code++ = OP_BRAZERO + repeat_type;
    1666             : 
    1667             :           /* All but the final copy start a new nesting, maintaining the
    1668             :           chain of brackets outstanding. */
    1669             : 
    1670           0 :           if (i != 0)
    1671             :             {
    1672             :             int offset;
    1673           0 :             *code++ = OP_BRA;
    1674           0 :             offset = (bralink == NULL)? 0 : code - bralink;
    1675           0 :             bralink = code;
    1676           0 :             *code++ = offset >> 8;
    1677           0 :             *code++ = offset & 255;
    1678             :             }
    1679             : 
    1680           0 :           memcpy(code, previous, len);
    1681           0 :           code += len;
    1682             :           }
    1683             : 
    1684             :         /* Now chain through the pending brackets, and fill in their length
    1685             :         fields (which are holding the chain links pro tem). */
    1686             : 
    1687           0 :         while (bralink != NULL)
    1688             :           {
    1689             :           int oldlinkoffset;
    1690           0 :           int offset = code - bralink + 1;
    1691           0 :           uschar *bra = code - offset;
    1692           0 :           oldlinkoffset = (bra[1] << 8) + bra[2];
    1693           0 :           bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
    1694           0 :           *code++ = OP_KET;
    1695           0 :           *code++ = bra[1] = offset >> 8;
    1696           0 :           *code++ = bra[2] = (offset & 255);
    1697             :           }
    1698             :         }
    1699             : 
    1700             :       /* If the maximum is unlimited, set a repeater in the final copy. We
    1701             :       can't just offset backwards from the current code point, because we
    1702             :       don't know if there's been an options resetting after the ket. The
    1703             :       correct offset was computed above. */
    1704             : 
    1705           0 :       else code[-ketoffset] = OP_KETRMAX + repeat_type;
    1706             :       }
    1707             : 
    1708             :     /* Else there's some kind of shambles */
    1709             : 
    1710             :     else
    1711             :       {
    1712           0 :       *errorptr = ERR11;
    1713           0 :       goto FAILED;
    1714             :       }
    1715             : 
    1716             :     /* In all case we no longer have a previous item. */
    1717             : 
    1718           0 :     END_REPEAT:
    1719           0 :     previous = NULL;
    1720           0 :     break;
    1721             : 
    1722             : 
    1723             :     /* Start of nested bracket sub-expression, or comment or lookahead or
    1724             :     lookbehind or option setting or condition. First deal with special things
    1725             :     that can come after a bracket; all are introduced by ?, and the appearance
    1726             :     of any of them means that this is not a referencing group. They were
    1727             :     checked for validity in the first pass over the string, so we don't have to
    1728             :     check for syntax errors here.  */
    1729             : 
    1730           0 :     case '(':
    1731           0 :     newoptions = options;
    1732           0 :     condref = -1;
    1733             : 
    1734           0 :     if (*(++ptr) == '?')
    1735             :       {
    1736             :       int set, unset;
    1737             :       int *optset;
    1738             : 
    1739           0 :       switch (*(++ptr))
    1740             :         {
    1741           0 :         case '#':                 /* Comment; skip to ket */
    1742           0 :         ptr++;
    1743           0 :         while (*ptr != ')') ptr++;
    1744           0 :         continue;
    1745             : 
    1746           0 :         case ':':                 /* Non-extracting bracket */
    1747           0 :         bravalue = OP_BRA;
    1748           0 :         ptr++;
    1749           0 :         break;
    1750             : 
    1751           0 :         case '(':
    1752           0 :         bravalue = OP_COND;       /* Conditional group */
    1753           0 :         if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
    1754             :           {
    1755           0 :           condref = *ptr - '0';
    1756           0 :           while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
    1757           0 :           if (condref == 0)
    1758             :             {
    1759           0 :             *errorptr = ERR35;
    1760           0 :             goto FAILED;
    1761             :             }
    1762           0 :           ptr++;
    1763             :           }
    1764           0 :         else ptr--;
    1765           0 :         break;
    1766             : 
    1767           0 :         case '=':                 /* Positive lookahead */
    1768           0 :         bravalue = OP_ASSERT;
    1769           0 :         ptr++;
    1770           0 :         break;
    1771             : 
    1772           0 :         case '!':                 /* Negative lookahead */
    1773           0 :         bravalue = OP_ASSERT_NOT;
    1774           0 :         ptr++;
    1775           0 :         break;
    1776             : 
    1777           0 :         case '<':                 /* Lookbehinds */
    1778           0 :         switch (*(++ptr))
    1779             :           {
    1780           0 :           case '=':               /* Positive lookbehind */
    1781           0 :           bravalue = OP_ASSERTBACK;
    1782           0 :           ptr++;
    1783           0 :           break;
    1784             : 
    1785           0 :           case '!':               /* Negative lookbehind */
    1786           0 :           bravalue = OP_ASSERTBACK_NOT;
    1787           0 :           ptr++;
    1788           0 :           break;
    1789             : 
    1790           0 :           default:                /* Syntax error */
    1791           0 :           *errorptr = ERR24;
    1792           0 :           goto FAILED;
    1793             :           }
    1794           0 :         break;
    1795             : 
    1796           0 :         case '>':                 /* One-time brackets */
    1797           0 :         bravalue = OP_ONCE;
    1798           0 :         ptr++;
    1799           0 :         break;
    1800             : 
    1801           0 :         case 'R':                 /* Pattern recursion */
    1802           0 :         *code++ = OP_RECURSE;
    1803           0 :         ptr++;
    1804           0 :         continue;
    1805             : 
    1806           0 :         default:                  /* Option setting */
    1807           0 :         set = unset = 0;
    1808           0 :         optset = &set;
    1809             : 
    1810           0 :         while (*ptr != ')' && *ptr != ':')
    1811             :           {
    1812           0 :           switch (*ptr++)
    1813             :             {
    1814           0 :             case '-': optset = &unset; break;
    1815             : 
    1816           0 :             case 'i': *optset |= PCRE_CASELESS; break;
    1817           0 :             case 'm': *optset |= PCRE_MULTILINE; break;
    1818           0 :             case 's': *optset |= PCRE_DOTALL; break;
    1819           0 :             case 'x': *optset |= PCRE_EXTENDED; break;
    1820           0 :             case 'U': *optset |= PCRE_UNGREEDY; break;
    1821           0 :             case 'X': *optset |= PCRE_EXTRA; break;
    1822             : 
    1823           0 :             default:
    1824           0 :             *errorptr = ERR12;
    1825           0 :             goto FAILED;
    1826             :             }
    1827             :           }
    1828             : 
    1829             :         /* Set up the changed option bits, but don't change anything yet. */
    1830             : 
    1831           0 :         newoptions = (options | set) & (~unset);
    1832             : 
    1833             :         /* If the options ended with ')' this is not the start of a nested
    1834             :         group with option changes, so the options change at this level. At top
    1835             :         level there is nothing else to be done (the options will in fact have
    1836             :         been set from the start of compiling as a result of the first pass) but
    1837             :         at an inner level we must compile code to change the ims options if
    1838             :         necessary, and pass the new setting back so that it can be put at the
    1839             :         start of any following branches, and when this group ends, a resetting
    1840             :         item can be compiled. */
    1841             : 
    1842           0 :         if (*ptr == ')')
    1843             :           {
    1844           0 :           if ((options & PCRE_INGROUP) != 0 &&
    1845           0 :               (options & PCRE_IMS) != (newoptions & PCRE_IMS))
    1846             :             {
    1847           0 :             *code++ = OP_OPT;
    1848           0 :             *code++ = *optchanged = newoptions & PCRE_IMS;
    1849             :             }
    1850           0 :           options = newoptions;  /* Change options at this level */
    1851           0 :           previous = NULL;       /* This item can't be repeated */
    1852           0 :           continue;              /* It is complete */
    1853             :           }
    1854             : 
    1855             :         /* If the options ended with ':' we are heading into a nested group
    1856             :         with possible change of options. Such groups are non-capturing and are
    1857             :         not assertions of any kind. All we need to do is skip over the ':';
    1858             :         the newoptions value is handled below. */
    1859             : 
    1860           0 :         bravalue = OP_BRA;
    1861           0 :         ptr++;
    1862             :         }
    1863             :       }
    1864             : 
    1865             :     /* Else we have a referencing group; adjust the opcode. */
    1866             : 
    1867             :     else
    1868             :       {
    1869           0 :       if (++(*brackets) > EXTRACT_MAX)
    1870             :         {
    1871           0 :         *errorptr = ERR13;
    1872           0 :         goto FAILED;
    1873             :         }
    1874           0 :       bravalue = OP_BRA + *brackets;
    1875             :       }
    1876             : 
    1877             :     /* Process nested bracketed re. Assertions may not be repeated, but other
    1878             :     kinds can be. We copy code into a non-register variable in order to be able
    1879             :     to pass its address because some compilers complain otherwise. Pass in a
    1880             :     new setting for the ims options if they have changed. */
    1881             : 
    1882           0 :     previous = (bravalue >= OP_ONCE)? code : NULL;
    1883           0 :     *code = bravalue;
    1884           0 :     tempcode = code;
    1885             : 
    1886           0 :     if (!compile_regex(
    1887             :          options | PCRE_INGROUP,       /* Set for all nested groups */
    1888           0 :          ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
    1889             :            newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
    1890             :          brackets,                     /* Bracket level */
    1891             :          &tempcode,                    /* Where to put code (updated) */
    1892             :          &ptr,                         /* Input pointer (updated) */
    1893             :          errorptr,                     /* Where to put an error message */
    1894             :          (bravalue == OP_ASSERTBACK ||
    1895             :           bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
    1896             :          condref,                      /* Condition reference number */
    1897             :          &subreqchar,                  /* For possible last char */
    1898             :          &subcountlits,                /* For literal count */
    1899             :          cd))                          /* Tables block */
    1900           0 :       goto FAILED;
    1901             : 
    1902             :     /* At the end of compiling, code is still pointing to the start of the
    1903             :     group, while tempcode has been updated to point past the end of the group
    1904             :     and any option resetting that may follow it. The pattern pointer (ptr)
    1905             :     is on the bracket. */
    1906             : 
    1907             :     /* If this is a conditional bracket, check that there are no more than
    1908             :     two branches in the group. */
    1909             : 
    1910           0 :     if (bravalue == OP_COND)
    1911             :       {
    1912           0 :       uschar *tc = code;
    1913           0 :       condcount = 0;
    1914             : 
    1915             :       do {
    1916           0 :          condcount++;
    1917           0 :          tc += (tc[1] << 8) | tc[2];
    1918             :          }
    1919           0 :       while (*tc != OP_KET);
    1920             : 
    1921           0 :       if (condcount > 2)
    1922             :         {
    1923           0 :         *errorptr = ERR27;
    1924           0 :         goto FAILED;
    1925             :         }
    1926             :       }
    1927             : 
    1928             :     /* Handle updating of the required character. If the subpattern didn't
    1929             :     set one, leave it as it was. Otherwise, update it for normal brackets of
    1930             :     all kinds, forward assertions, and conditions with two branches. Don't
    1931             :     update the literal count for forward assertions, however. If the bracket
    1932             :     is followed by a quantifier with zero repeat, we have to back off. Hence
    1933             :     the definition of prevreqchar and subcountlits outside the main loop so
    1934             :     that they can be accessed for the back off. */
    1935             : 
    1936           0 :     if (subreqchar > 0 &&
    1937           0 :          (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
    1938           0 :          (bravalue == OP_COND && condcount == 2)))
    1939             :       {
    1940           0 :       prevreqchar = *reqchar;
    1941           0 :       *reqchar = subreqchar;
    1942           0 :       if (bravalue != OP_ASSERT) *countlits += subcountlits;
    1943             :       }
    1944             : 
    1945             :     /* Now update the main code pointer to the end of the group. */
    1946             : 
    1947           0 :     code = tempcode;
    1948             : 
    1949             :     /* Error if hit end of pattern */
    1950             : 
    1951           0 :     if (*ptr != ')')
    1952             :       {
    1953           0 :       *errorptr = ERR14;
    1954           0 :       goto FAILED;
    1955             :       }
    1956           0 :     break;
    1957             : 
    1958             :     /* Check \ for being a real metacharacter; if not, fall through and handle
    1959             :     it as a data character at the start of a string. Escape items are checked
    1960             :     for validity in the pre-compiling pass. */
    1961             : 
    1962           0 :     case '\\':
    1963           0 :     tempptr = ptr;
    1964           0 :     c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
    1965             : 
    1966             :     /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
    1967             :     are arranged to be the negation of the corresponding OP_values. For the
    1968             :     back references, the values are ESC_REF plus the reference number. Only
    1969             :     back references and those types that consume a character may be repeated.
    1970             :     We can test for values between ESC_b and ESC_Z for the latter; this may
    1971             :     have to change if any new ones are ever created. */
    1972             : 
    1973           0 :     if (c < 0)
    1974             :       {
    1975           0 :       if (-c >= ESC_REF)
    1976             :         {
    1977           0 :         previous = code;
    1978           0 :         *code++ = OP_REF;
    1979           0 :         *code++ = -c - ESC_REF;
    1980             :         }
    1981             :       else
    1982             :         {
    1983           0 :         previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
    1984           0 :         *code++ = -c;
    1985             :         }
    1986           0 :       continue;
    1987             :       }
    1988             : 
    1989             :     /* Data character: reset and fall through */
    1990             : 
    1991           0 :     ptr = tempptr;
    1992           0 :     c = '\\';
    1993             : 
    1994             :     /* Handle a run of data characters until a metacharacter is encountered.
    1995             :     The first character is guaranteed not to be whitespace or # when the
    1996             :     extended flag is set. */
    1997             : 
    1998           0 :     NORMAL_CHAR:
    1999             :     default:
    2000           0 :     previous = code;
    2001           0 :     *code = OP_CHARS;
    2002           0 :     code += 2;
    2003           0 :     length = 0;
    2004             : 
    2005             :     do
    2006             :       {
    2007           0 :       if ((options & PCRE_EXTENDED) != 0)
    2008             :         {
    2009           0 :         if ((cd->ctypes[c] & ctype_space) != 0) continue;
    2010           0 :         if (c == '#')
    2011             :           {
    2012             :           /* The space before the ; is to avoid a warning on a silly compiler
    2013             :           on the Macintosh. */
    2014           0 :           while ((c = *(++ptr)) != 0 && c != '\n') ;
    2015           0 :           if (c == 0) break;
    2016           0 :           continue;
    2017             :           }
    2018             :         }
    2019             : 
    2020             :       /* Backslash may introduce a data char or a metacharacter. Escaped items
    2021             :       are checked for validity in the pre-compiling pass. Stop the string
    2022             :       before a metaitem. */
    2023             : 
    2024           0 :       if (c == '\\')
    2025             :         {
    2026           0 :         tempptr = ptr;
    2027           0 :         c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
    2028           0 :         if (c < 0) { ptr = tempptr; break; }
    2029             : 
    2030             :         /* If a character is > 127 in UTF-8 mode, we have to turn it into
    2031             :         two or more characters in the UTF-8 encoding. */
    2032             : 
    2033             : #ifdef SUPPORT_UTF8
    2034             :         if (c > 127 && (options & PCRE_UTF8) != 0)
    2035             :           {
    2036             :           uschar buffer[8];
    2037             :           int len = ord2utf8(c, buffer);
    2038             :           for (c = 0; c < len; c++) *code++ = buffer[c];
    2039             :           length += len;
    2040             :           continue;
    2041             :           }
    2042             : #endif
    2043             :         }
    2044             : 
    2045             :       /* Ordinary character or single-char escape */
    2046             : 
    2047           0 :       *code++ = c;
    2048           0 :       length++;
    2049             :       }
    2050             : 
    2051             :     /* This "while" is the end of the "do" above. */
    2052             : 
    2053           0 :     while (length < MAXLIT && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
    2054             : 
    2055             :     /* Update the last character and the count of literals */
    2056             : 
    2057           0 :     prevreqchar = (length > 1)? code[-2] : *reqchar;
    2058           0 :     *reqchar = code[-1];
    2059           0 :     *countlits += length;
    2060             : 
    2061             :     /* Compute the length and set it in the data vector, and advance to
    2062             :     the next state. */
    2063             : 
    2064           0 :     previous[1] = length;
    2065           0 :     if (length < MAXLIT) ptr--;
    2066           0 :     break;
    2067             :     }
    2068             :   }                   /* end of big loop */
    2069             : 
    2070             : /* Control never reaches here by falling through, only by a goto for all the
    2071             : error states. Pass back the position in the pattern so that it can be displayed
    2072             : to the user for diagnosing the error. */
    2073             : 
    2074           0 : FAILED:
    2075           0 : *ptrptr = ptr;
    2076           0 : return FALSE;
    2077             : }
    2078             : 
    2079             : 
    2080             : 
    2081             : 
    2082             : /*************************************************
    2083             : *     Compile sequence of alternatives           *
    2084             : *************************************************/
    2085             : 
    2086             : /* On entry, ptr is pointing past the bracket character, but on return
    2087             : it points to the closing bracket, or vertical bar, or end of string.
    2088             : The code variable is pointing at the byte into which the BRA operator has been
    2089             : stored. If the ims options are changed at the start (for a (?ims: group) or
    2090             : during any branch, we need to insert an OP_OPT item at the start of every
    2091             : following branch to ensure they get set correctly at run time, and also pass
    2092             : the new options into every subsequent branch compile.
    2093             : 
    2094             : Argument:
    2095             :   options     the option bits
    2096             :   optchanged  new ims options to set as if (?ims) were at the start, or -1
    2097             :                for no change
    2098             :   brackets    -> int containing the number of extracting brackets used
    2099             :   codeptr     -> the address of the current code pointer
    2100             :   ptrptr      -> the address of the current pattern pointer
    2101             :   errorptr    -> pointer to error message
    2102             :   lookbehind  TRUE if this is a lookbehind assertion
    2103             :   condref     >= 0 for OPT_CREF setting at start of conditional group
    2104             :   reqchar     -> place to put the last required character, or a negative number
    2105             :   countlits   -> place to put the shortest literal count of any branch
    2106             :   cd          points to the data block with tables pointers
    2107             : 
    2108             : Returns:      TRUE on success
    2109             : */
    2110             : 
    2111             : static BOOL
    2112           0 : compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
    2113             :   const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
    2114             :   int *reqchar, int *countlits, compile_data *cd)
    2115             : {
    2116           0 : const uschar *ptr = *ptrptr;
    2117           0 : uschar *code = *codeptr;
    2118           0 : uschar *last_branch = code;
    2119           0 : uschar *start_bracket = code;
    2120           0 : uschar *reverse_count = NULL;
    2121           0 : int oldoptions = options & PCRE_IMS;
    2122             : int branchreqchar, branchcountlits;
    2123             : 
    2124           0 : *reqchar = -1;
    2125           0 : *countlits = INT_MAX;
    2126           0 : code += 3;
    2127             : 
    2128             : /* At the start of a reference-based conditional group, insert the reference
    2129             : number as an OP_CREF item. */
    2130             : 
    2131           0 : if (condref >= 0)
    2132             :   {
    2133           0 :   *code++ = OP_CREF;
    2134           0 :   *code++ = condref;
    2135             :   }
    2136             : 
    2137             : /* Loop for each alternative branch */
    2138             : 
    2139             : for (;;)
    2140           0 :   {
    2141             :   int length;
    2142             : 
    2143             :   /* Handle change of options */
    2144             : 
    2145           0 :   if (optchanged >= 0)
    2146             :     {
    2147           0 :     *code++ = OP_OPT;
    2148           0 :     *code++ = optchanged;
    2149           0 :     options = (options & ~PCRE_IMS) | optchanged;
    2150             :     }
    2151             : 
    2152             :   /* Set up dummy OP_REVERSE if lookbehind assertion */
    2153             : 
    2154           0 :   if (lookbehind)
    2155             :     {
    2156           0 :     *code++ = OP_REVERSE;
    2157           0 :     reverse_count = code;
    2158           0 :     *code++ = 0;
    2159           0 :     *code++ = 0;
    2160             :     }
    2161             : 
    2162             :   /* Now compile the branch */
    2163             : 
    2164           0 :   if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
    2165             :       &branchreqchar, &branchcountlits, cd))
    2166             :     {
    2167           0 :     *ptrptr = ptr;
    2168           0 :     return FALSE;
    2169             :     }
    2170             : 
    2171             :   /* Fill in the length of the last branch */
    2172             : 
    2173           0 :   length = code - last_branch;
    2174           0 :   last_branch[1] = length >> 8;
    2175           0 :   last_branch[2] = length & 255;
    2176             : 
    2177             :   /* Save the last required character if all branches have the same; a current
    2178             :   value of -1 means unset, while -2 means "previous branch had no last required
    2179             :   char".  */
    2180             : 
    2181           0 :   if (*reqchar != -2)
    2182             :     {
    2183           0 :     if (branchreqchar >= 0)
    2184             :       {
    2185           0 :       if (*reqchar == -1) *reqchar = branchreqchar;
    2186           0 :       else if (*reqchar != branchreqchar) *reqchar = -2;
    2187             :       }
    2188           0 :     else *reqchar = -2;
    2189             :     }
    2190             : 
    2191             :   /* Keep the shortest literal count */
    2192             : 
    2193           0 :   if (branchcountlits < *countlits) *countlits = branchcountlits;
    2194             :   DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
    2195             : 
    2196             :   /* If lookbehind, check that this branch matches a fixed-length string,
    2197             :   and put the length into the OP_REVERSE item. Temporarily mark the end of
    2198             :   the branch with OP_END. */
    2199             : 
    2200           0 :   if (lookbehind)
    2201             :     {
    2202           0 :     *code = OP_END;
    2203           0 :     length = find_fixedlength(last_branch, options);
    2204             :     DPRINTF(("fixed length = %d\n", length));
    2205           0 :     if (length < 0)
    2206             :       {
    2207           0 :       *errorptr = ERR25;
    2208           0 :       *ptrptr = ptr;
    2209           0 :       return FALSE;
    2210             :       }
    2211           0 :     reverse_count[0] = (length >> 8);
    2212           0 :     reverse_count[1] = length & 255;
    2213             :     }
    2214             : 
    2215             :   /* Reached end of expression, either ')' or end of pattern. Insert a
    2216             :   terminating ket and the length of the whole bracketed item, and return,
    2217             :   leaving the pointer at the terminating char. If any of the ims options
    2218             :   were changed inside the group, compile a resetting op-code following. */
    2219             : 
    2220           0 :   if (*ptr != '|')
    2221             :     {
    2222           0 :     length = code - start_bracket;
    2223           0 :     *code++ = OP_KET;
    2224           0 :     *code++ = length >> 8;
    2225           0 :     *code++ = length & 255;
    2226           0 :     if (optchanged >= 0)
    2227             :       {
    2228           0 :       *code++ = OP_OPT;
    2229           0 :       *code++ = oldoptions;
    2230             :       }
    2231           0 :     *codeptr = code;
    2232           0 :     *ptrptr = ptr;
    2233           0 :     return TRUE;
    2234             :     }
    2235             : 
    2236             :   /* Another branch follows; insert an "or" node and advance the pointer. */
    2237             : 
    2238           0 :   *code = OP_ALT;
    2239           0 :   last_branch = code;
    2240           0 :   code += 3;
    2241           0 :   ptr++;
    2242             :   }
    2243             : /* Control never reaches here */
    2244             : }
    2245             : 
    2246             : 
    2247             : 
    2248             : 
    2249             : /*************************************************
    2250             : *      Find first significant op code            *
    2251             : *************************************************/
    2252             : 
    2253             : /* This is called by several functions that scan a compiled expression looking
    2254             : for a fixed first character, or an anchoring op code etc. It skips over things
    2255             : that do not influence this. For one application, a change of caseless option is
    2256             : important.
    2257             : 
    2258             : Arguments:
    2259             :   code       pointer to the start of the group
    2260             :   options    pointer to external options
    2261             :   optbit     the option bit whose changing is significant, or
    2262             :              zero if none are
    2263             :   optstop    TRUE to return on option change, otherwise change the options
    2264             :                value and continue
    2265             : 
    2266             : Returns:     pointer to the first significant opcode
    2267             : */
    2268             : 
    2269             : static const uschar*
    2270           0 : first_significant_code(const uschar *code, int *options, int optbit,
    2271             :   BOOL optstop)
    2272             : {
    2273             : for (;;)
    2274             :   {
    2275           0 :   switch ((int)*code)
    2276             :     {
    2277           0 :     case OP_OPT:
    2278           0 :     if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
    2279             :       {
    2280           0 :       if (optstop) return code;
    2281           0 :       *options = (int)code[1];
    2282             :       }
    2283           0 :     code += 2;
    2284           0 :     break;
    2285             : 
    2286           0 :     case OP_CREF:
    2287           0 :     code += 2;
    2288           0 :     break;
    2289             : 
    2290           0 :     case OP_WORD_BOUNDARY:
    2291             :     case OP_NOT_WORD_BOUNDARY:
    2292           0 :     code++;
    2293           0 :     break;
    2294             : 
    2295           0 :     case OP_ASSERT_NOT:
    2296             :     case OP_ASSERTBACK:
    2297             :     case OP_ASSERTBACK_NOT:
    2298           0 :     do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
    2299           0 :     code += 3;
    2300           0 :     break;
    2301             : 
    2302           0 :     default:
    2303           0 :     return code;
    2304             :     }
    2305             :   }
    2306             : /* Control never reaches here */
    2307             : }
    2308             : 
    2309             : 
    2310             : 
    2311             : 
    2312             : /*************************************************
    2313             : *          Check for anchored expression         *
    2314             : *************************************************/
    2315             : 
    2316             : /* Try to find out if this is an anchored regular expression. Consider each
    2317             : alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
    2318             : all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
    2319             : it's anchored. However, if this is a multiline pattern, then only OP_SOD
    2320             : counts, since OP_CIRC can match in the middle.
    2321             : 
    2322             : A branch is also implicitly anchored if it starts with .* and DOTALL is set,
    2323             : because that will try the rest of the pattern at all possible matching points,
    2324             : so there is no point trying them again.
    2325             : 
    2326             : Arguments:
    2327             :   code       points to start of expression (the bracket)
    2328             :   options    points to the options setting
    2329             : 
    2330             : Returns:     TRUE or FALSE
    2331             : */
    2332             : 
    2333             : static BOOL
    2334           0 : is_anchored(register const uschar *code, int *options)
    2335             : {
    2336             : do {
    2337           0 :    const uschar *scode = first_significant_code(code + 3, options,
    2338             :      PCRE_MULTILINE, FALSE);
    2339           0 :    register int op = *scode;
    2340           0 :    if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
    2341           0 :      { if (!is_anchored(scode, options)) return FALSE; }
    2342           0 :    else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
    2343           0 :             (*options & PCRE_DOTALL) != 0)
    2344           0 :      { if (scode[1] != OP_ANY) return FALSE; }
    2345           0 :    else if (op != OP_SOD &&
    2346           0 :            ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
    2347           0 :      return FALSE;
    2348           0 :    code += (code[1] << 8) + code[2];
    2349             :    }
    2350           0 : while (*code == OP_ALT);
    2351           0 : return TRUE;
    2352             : }
    2353             : 
    2354             : 
    2355             : 
    2356             : /*************************************************
    2357             : *         Check for starting with ^ or .*        *
    2358             : *************************************************/
    2359             : 
    2360             : /* This is called to find out if every branch starts with ^ or .* so that
    2361             : "first char" processing can be done to speed things up in multiline
    2362             : matching and for non-DOTALL patterns that start with .* (which must start at
    2363             : the beginning or after \n).
    2364             : 
    2365             : Argument:  points to start of expression (the bracket)
    2366             : Returns:   TRUE or FALSE
    2367             : */
    2368             : 
    2369             : static BOOL
    2370           0 : is_startline(const uschar *code)
    2371             : {
    2372             : do {
    2373           0 :    const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
    2374           0 :    register int op = *scode;
    2375           0 :    if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
    2376           0 :      { if (!is_startline(scode)) return FALSE; }
    2377           0 :    else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
    2378           0 :      { if (scode[1] != OP_ANY) return FALSE; }
    2379           0 :    else if (op != OP_CIRC) return FALSE;
    2380           0 :    code += (code[1] << 8) + code[2];
    2381             :    }
    2382           0 : while (*code == OP_ALT);
    2383           0 : return TRUE;
    2384             : }
    2385             : 
    2386             : 
    2387             : 
    2388             : /*************************************************
    2389             : *          Check for fixed first char            *
    2390             : *************************************************/
    2391             : 
    2392             : /* Try to find out if there is a fixed first character. This is called for
    2393             : unanchored expressions, as it speeds up their processing quite considerably.
    2394             : Consider each alternative branch. If they all start with the same char, or with
    2395             : a bracket all of whose alternatives start with the same char (recurse ad lib),
    2396             : then we return that char, otherwise -1.
    2397             : 
    2398             : Arguments:
    2399             :   code       points to start of expression (the bracket)
    2400             :   options    pointer to the options (used to check casing changes)
    2401             : 
    2402             : Returns:     -1 or the fixed first char
    2403             : */
    2404             : 
    2405             : static int
    2406           0 : find_firstchar(const uschar *code, int *options)
    2407             : {
    2408           0 : register int c = -1;
    2409             : do {
    2410             :    int d;
    2411           0 :    const uschar *scode = first_significant_code(code + 3, options,
    2412             :      PCRE_CASELESS, TRUE);
    2413           0 :    register int op = *scode;
    2414             : 
    2415           0 :    if (op >= OP_BRA) op = OP_BRA;
    2416             : 
    2417           0 :    switch(op)
    2418             :      {
    2419           0 :      default:
    2420           0 :      return -1;
    2421             : 
    2422           0 :      case OP_BRA:
    2423             :      case OP_ASSERT:
    2424             :      case OP_ONCE:
    2425             :      case OP_COND:
    2426           0 :      if ((d = find_firstchar(scode, options)) < 0) return -1;
    2427           0 :      if (c < 0) c = d; else if (c != d) return -1;
    2428           0 :      break;
    2429             : 
    2430           0 :      case OP_EXACT:       /* Fall through */
    2431           0 :      scode++;
    2432             : 
    2433           0 :      case OP_CHARS:       /* Fall through */
    2434           0 :      scode++;
    2435             : 
    2436           0 :      case OP_PLUS:
    2437             :      case OP_MINPLUS:
    2438           0 :      if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
    2439           0 :      break;
    2440             :      }
    2441             : 
    2442           0 :    code += (code[1] << 8) + code[2];
    2443             :    }
    2444           0 : while (*code == OP_ALT);
    2445           0 : return c;
    2446             : }
    2447             : 
    2448             : 
    2449             : 
    2450             : 
    2451             : 
    2452             : /*************************************************
    2453             : *        Compile a Regular Expression            *
    2454             : *************************************************/
    2455             : 
    2456             : /* This function takes a string and returns a pointer to a block of store
    2457             : holding a compiled version of the expression.
    2458             : 
    2459             : Arguments:
    2460             :   pattern      the regular expression
    2461             :   options      various option bits
    2462             :   errorptr     pointer to pointer to error text
    2463             :   erroroffset  ptr offset in pattern where error was detected
    2464             :   tables       pointer to character tables or NULL
    2465             : 
    2466             : Returns:       pointer to compiled data block, or NULL on error,
    2467             :                with errorptr and erroroffset set
    2468             : */
    2469             : 
    2470             : pcre *
    2471           0 : pcre_compile(const char *pattern, int options, const char **errorptr,
    2472             :   int *erroroffset, const unsigned char *tables)
    2473             : {
    2474             : real_pcre *re;
    2475           0 : int length = 3;      /* For initial BRA plus length */
    2476             : int runlength;
    2477             : int c, reqchar, countlits;
    2478           0 : int bracount = 0;
    2479           0 : int top_backref = 0;
    2480           0 : int branch_extra = 0;
    2481             : int branch_newextra;
    2482           0 : unsigned int brastackptr = 0;
    2483             : size_t size;
    2484             : uschar *code;
    2485             : const uschar *ptr;
    2486             : compile_data compile_block;
    2487             : int brastack[BRASTACK_SIZE];
    2488             : uschar bralenstack[BRASTACK_SIZE];
    2489           0 : const size_t pattern_length = strlen(pattern);
    2490             : 
    2491             : #ifdef DEBUG
    2492             : uschar *code_base, *code_end;
    2493             : #endif
    2494             : 
    2495             : /* Can't support UTF8 unless PCRE has been compiled to include the code. */
    2496             : 
    2497             : #ifndef SUPPORT_UTF8
    2498           0 : if ((options & PCRE_UTF8) != 0)
    2499             :   {
    2500           0 :   *errorptr = ERR32;
    2501           0 :   return NULL;
    2502             :   }
    2503             : #endif
    2504             : 
    2505             : /* We can't pass back an error message if errorptr is NULL; I guess the best we
    2506             : can do is just return NULL. */
    2507             : 
    2508           0 : if (errorptr == NULL) return NULL;
    2509           0 : *errorptr = NULL;
    2510             : 
    2511             : /* However, we can give a message for this error */
    2512             : 
    2513           0 : if (erroroffset == NULL)
    2514             :   {
    2515           0 :   *errorptr = ERR16;
    2516           0 :   return NULL;
    2517             :   }
    2518           0 : *erroroffset = 0;
    2519             : 
    2520           0 : if ((options & ~PUBLIC_OPTIONS) != 0)
    2521             :   {
    2522           0 :   *errorptr = ERR17;
    2523           0 :   return NULL;
    2524             :   }
    2525             : 
    2526             : /* Set up pointers to the individual character tables */
    2527             : 
    2528           0 : if (tables == NULL) tables = pcre_default_tables;
    2529           0 : compile_block.lcc = tables + lcc_offset;
    2530           0 : compile_block.fcc = tables + fcc_offset;
    2531           0 : compile_block.cbits = tables + cbits_offset;
    2532           0 : compile_block.ctypes = tables + ctypes_offset;
    2533             : 
    2534             : /* Reflect pattern for debugging output */
    2535             : 
    2536             : DPRINTF(("------------------------------------------------------------------\n"));
    2537             : DPRINTF(("%s\n", pattern));
    2538             : 
    2539             : /* The first thing to do is to make a pass over the pattern to compute the
    2540             : amount of store required to hold the compiled code. This does not have to be
    2541             : perfect as long as errors are overestimates. At the same time we can detect any
    2542             : internal flag settings. Make an attempt to correct for any counted white space
    2543             : if an "extended" flag setting appears late in the pattern. We can't be so
    2544             : clever for #-comments. */
    2545             : 
    2546           0 : ptr = (const uschar *)(pattern - 1);
    2547           0 : while ((c = *(++ptr)) != 0)
    2548             :   {
    2549             :   int min, max;
    2550             :   int class_charcount;
    2551             : 
    2552           0 :   if ((options & PCRE_EXTENDED) != 0)
    2553             :     {
    2554           0 :     if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
    2555           0 :     if (c == '#')
    2556             :       {
    2557             :       /* The space before the ; is to avoid a warning on a silly compiler
    2558             :       on the Macintosh. */
    2559           0 :       while ((c = *(++ptr)) != 0 && c != '\n') ;
    2560           0 :       continue;
    2561             :       }
    2562             :     }
    2563             : 
    2564           0 :   switch(c)
    2565             :     {
    2566             :     /* A backslashed item may be an escaped "normal" character or a
    2567             :     character type. For a "normal" character, put the pointers and
    2568             :     character back so that tests for whitespace etc. in the input
    2569             :     are done correctly. */
    2570             : 
    2571           0 :     case '\\':
    2572             :       {
    2573           0 :       const uschar *save_ptr = ptr;
    2574           0 :       c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
    2575           0 :       if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
    2576           0 :       if (c >= 0)
    2577             :         {
    2578           0 :         ptr = save_ptr;
    2579           0 :         c = '\\';
    2580           0 :         goto NORMAL_CHAR;
    2581             :         }
    2582             :       }
    2583           0 :     length++;
    2584             : 
    2585             :     /* A back reference needs an additional char, plus either one or 5
    2586             :     bytes for a repeat. We also need to keep the value of the highest
    2587             :     back reference. */
    2588             : 
    2589           0 :     if (c <= -ESC_REF)
    2590             :       {
    2591           0 :       int refnum = -c - ESC_REF;
    2592           0 :       if (refnum > top_backref) top_backref = refnum;
    2593           0 :       length++;   /* For single back reference */
    2594           0 :       if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
    2595             :         {
    2596           0 :         ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
    2597           0 :         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
    2598           0 :         if ((min == 0 && (max == 1 || max == -1)) ||
    2599           0 :           (min == 1 && max == -1))
    2600           0 :             length++;
    2601           0 :         else length += 5;
    2602           0 :         if (ptr[1] == '?') ptr++;
    2603             :         }
    2604             :       }
    2605           0 :     continue;
    2606             : 
    2607           0 :     case '^':
    2608             :     case '.':
    2609             :     case '$':
    2610             :     case '*':     /* These repeats won't be after brackets; */
    2611             :     case '+':     /* those are handled separately */
    2612             :     case '?':
    2613           0 :     length++;
    2614           0 :     continue;
    2615             : 
    2616             :     /* This covers the cases of repeats after a single char, metachar, class,
    2617             :     or back reference. */
    2618             : 
    2619           0 :     case '{':
    2620           0 :     if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
    2621           0 :     ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
    2622           0 :     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
    2623           0 :     if ((min == 0 && (max == 1 || max == -1)) ||
    2624           0 :       (min == 1 && max == -1))
    2625           0 :         length++;
    2626             :     else
    2627             :       {
    2628           0 :       length--;   /* Uncount the original char or metachar */
    2629           0 :       if (min == 1) length++; else if (min > 0) length += 4;
    2630           0 :       if (max > 0) length += 4; else length += 2;
    2631             :       }
    2632           0 :     if (ptr[1] == '?') ptr++;
    2633           0 :     continue;
    2634             : 
    2635             :     /* An alternation contains an offset to the next branch or ket. If any ims
    2636             :     options changed in the previous branch(es), and/or if we are in a
    2637             :     lookbehind assertion, extra space will be needed at the start of the
    2638             :     branch. This is handled by branch_extra. */
    2639             : 
    2640           0 :     case '|':
    2641           0 :     length += 3 + branch_extra;
    2642           0 :     continue;
    2643             : 
    2644             :     /* A character class uses 33 characters. Don't worry about character types
    2645             :     that aren't allowed in classes - they'll get picked up during the compile.
    2646             :     A character class that contains only one character uses 2 or 3 bytes,
    2647             :     depending on whether it is negated or not. Notice this where we can. */
    2648             : 
    2649           0 :     case '[':
    2650           0 :     class_charcount = 0;
    2651           0 :     if (*(++ptr) == '^') ptr++;
    2652             :     do
    2653             :       {
    2654           0 :       if (*ptr == '\\')
    2655             :         {
    2656           0 :         int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
    2657             :           &compile_block);
    2658           0 :         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
    2659           0 :         if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
    2660             :         }
    2661           0 :       else class_charcount++;
    2662           0 :       ptr++;
    2663           0 :       if (*ptr == 0)
    2664             :         {
    2665           0 :         *errorptr = ERR6;
    2666           0 :         goto PCRE_ERROR_RETURN;
    2667             :         }
    2668             :       }
    2669           0 :     while (*ptr != ']');
    2670             : 
    2671             :     /* Repeats for negated single chars are handled by the general code */
    2672             : 
    2673           0 :     if (class_charcount == 1) length += 3; else
    2674             :       {
    2675           0 :       length += 33;
    2676             : 
    2677             :       /* A repeat needs either 1 or 5 bytes. */
    2678             : 
    2679           0 :       if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
    2680             :         {
    2681           0 :         ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
    2682           0 :         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
    2683           0 :         if ((min == 0 && (max == 1 || max == -1)) ||
    2684           0 :           (min == 1 && max == -1))
    2685           0 :             length++;
    2686           0 :         else length += 5;
    2687           0 :         if (ptr[1] == '?') ptr++;
    2688             :         }
    2689             :       }
    2690           0 :     continue;
    2691             : 
    2692             :     /* Brackets may be genuine groups or special things */
    2693             : 
    2694           0 :     case '(':
    2695           0 :     branch_newextra = 0;
    2696             : 
    2697             :     /* Handle special forms of bracket, which all start (? */
    2698             : 
    2699           0 :     if (ptr[1] == '?')
    2700             :       {
    2701             :       int set, unset;
    2702             :       int *optset;
    2703             : 
    2704           0 :       switch (c = ptr[2])
    2705             :         {
    2706             :         /* Skip over comments entirely */
    2707           0 :         case '#':
    2708           0 :         ptr += 3;
    2709           0 :         while (*ptr != 0 && *ptr != ')') ptr++;
    2710           0 :         if (*ptr == 0)
    2711             :           {
    2712           0 :           *errorptr = ERR18;
    2713           0 :           goto PCRE_ERROR_RETURN;
    2714             :           }
    2715           0 :         continue;
    2716             : 
    2717             :         /* Non-referencing groups and lookaheads just move the pointer on, and
    2718             :         then behave like a non-special bracket, except that they don't increment
    2719             :         the count of extracting brackets. Ditto for the "once only" bracket,
    2720             :         which is in Perl from version 5.005. */
    2721             : 
    2722           0 :         case ':':
    2723             :         case '=':
    2724             :         case '!':
    2725             :         case '>':
    2726           0 :         ptr += 2;
    2727           0 :         break;
    2728             : 
    2729             :         /* A recursive call to the regex is an extension, to provide the
    2730             :         facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */
    2731             : 
    2732           0 :         case 'R':
    2733           0 :         if (ptr[3] != ')')
    2734             :           {
    2735           0 :           *errorptr = ERR29;
    2736           0 :           goto PCRE_ERROR_RETURN;
    2737             :           }
    2738           0 :         ptr += 3;
    2739           0 :         length += 1;
    2740           0 :         break;
    2741             : 
    2742             :         /* Lookbehinds are in Perl from version 5.005 */
    2743             : 
    2744           0 :         case '<':
    2745           0 :         if (ptr[3] == '=' || ptr[3] == '!')
    2746             :           {
    2747           0 :           ptr += 3;
    2748           0 :           branch_newextra = 3;
    2749           0 :           length += 3;         /* For the first branch */
    2750           0 :           break;
    2751             :           }
    2752           0 :         *errorptr = ERR24;
    2753           0 :         goto PCRE_ERROR_RETURN;
    2754             : 
    2755             :         /* Conditionals are in Perl from version 5.005. The bracket must either
    2756             :         be followed by a number (for bracket reference) or by an assertion
    2757             :         group. */
    2758             : 
    2759           0 :         case '(':
    2760           0 :         if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
    2761             :           {
    2762           0 :           ptr += 4;
    2763           0 :           length += 2;
    2764           0 :           while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
    2765           0 :           if (*ptr != ')')
    2766             :             {
    2767           0 :             *errorptr = ERR26;
    2768           0 :             goto PCRE_ERROR_RETURN;
    2769             :             }
    2770             :           }
    2771             :         else   /* An assertion must follow */
    2772             :           {
    2773           0 :           ptr++;   /* Can treat like ':' as far as spacing is concerned */
    2774           0 :           if (ptr[2] != '?' ||
    2775           0 :              (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
    2776             :             {
    2777           0 :             ptr += 2;    /* To get right offset in message */
    2778           0 :             *errorptr = ERR28;
    2779           0 :             goto PCRE_ERROR_RETURN;
    2780             :             }
    2781             :           }
    2782           0 :         break;
    2783             : 
    2784             :         /* Else loop checking valid options until ) is met. Anything else is an
    2785             :         error. If we are without any brackets, i.e. at top level, the settings
    2786             :         act as if specified in the options, so massage the options immediately.
    2787             :         This is for backward compatibility with Perl 5.004. */
    2788             : 
    2789           0 :         default:
    2790           0 :         set = unset = 0;
    2791           0 :         optset = &set;
    2792           0 :         ptr += 2;
    2793             : 
    2794           0 :         for (;; ptr++)
    2795             :           {
    2796           0 :           c = *ptr;
    2797           0 :           switch (c)
    2798             :             {
    2799           0 :             case 'i':
    2800           0 :             *optset |= PCRE_CASELESS;
    2801           0 :             continue;
    2802             : 
    2803           0 :             case 'm':
    2804           0 :             *optset |= PCRE_MULTILINE;
    2805           0 :             continue;
    2806             : 
    2807           0 :             case 's':
    2808           0 :             *optset |= PCRE_DOTALL;
    2809           0 :             continue;
    2810             : 
    2811           0 :             case 'x':
    2812           0 :             *optset |= PCRE_EXTENDED;
    2813           0 :             continue;
    2814             : 
    2815           0 :             case 'X':
    2816           0 :             *optset |= PCRE_EXTRA;
    2817           0 :             continue;
    2818             : 
    2819           0 :             case 'U':
    2820           0 :             *optset |= PCRE_UNGREEDY;
    2821           0 :             continue;
    2822             : 
    2823           0 :             case '-':
    2824           0 :             optset = &unset;
    2825           0 :             continue;
    2826             : 
    2827             :             /* A termination by ')' indicates an options-setting-only item;
    2828             :             this is global at top level; otherwise nothing is done here and
    2829             :             it is handled during the compiling process on a per-bracket-group
    2830             :             basis. */
    2831             : 
    2832           0 :             case ')':
    2833           0 :             if (brastackptr == 0)
    2834             :               {
    2835           0 :               options = (options | set) & (~unset);
    2836           0 :               set = unset = 0;     /* To save length */
    2837             :               }
    2838             :             /* Fall through */
    2839             : 
    2840             :             /* A termination by ':' indicates the start of a nested group with
    2841             :             the given options set. This is again handled at compile time, but
    2842             :             we must allow for compiled space if any of the ims options are
    2843             :             set. We also have to allow for resetting space at the end of
    2844             :             the group, which is why 4 is added to the length and not just 2.
    2845             :             If there are several changes of options within the same group, this
    2846             :             will lead to an over-estimate on the length, but this shouldn't
    2847             :             matter very much. We also have to allow for resetting options at
    2848             :             the start of any alternations, which we do by setting
    2849             :             branch_newextra to 2. Finally, we record whether the case-dependent
    2850             :             flag ever changes within the regex. This is used by the "required
    2851             :             character" code. */
    2852             : 
    2853             :             case ':':
    2854           0 :             if (((set|unset) & PCRE_IMS) != 0)
    2855             :               {
    2856           0 :               length += 4;
    2857           0 :               branch_newextra = 2;
    2858           0 :               if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
    2859             :               }
    2860           0 :             goto END_OPTIONS;
    2861             : 
    2862             :             /* Unrecognized option character */
    2863             : 
    2864           0 :             default:
    2865           0 :             *errorptr = ERR12;
    2866           0 :             goto PCRE_ERROR_RETURN;
    2867             :             }
    2868             :           }
    2869             : 
    2870             :         /* If we hit a closing bracket, that's it - this is a freestanding
    2871             :         option-setting. We need to ensure that branch_extra is updated if
    2872             :         necessary. The only values branch_newextra can have here are 0 or 2.
    2873             :         If the value is 2, then branch_extra must either be 2 or 5, depending
    2874             :         on whether this is a lookbehind group or not. */
    2875             : 
    2876           0 :         END_OPTIONS:
    2877           0 :         if (c == ')')
    2878             :           {
    2879           0 :           if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
    2880           0 :             branch_extra += branch_newextra;
    2881           0 :           continue;
    2882             :           }
    2883             : 
    2884             :         /* If options were terminated by ':' control comes here. Fall through
    2885             :         to handle the group below. */
    2886             :         }
    2887             :       }
    2888             : 
    2889             :     /* Extracting brackets must be counted so we can process escapes in a
    2890             :     Perlish way. */
    2891             : 
    2892           0 :     else bracount++;
    2893             : 
    2894             :     /* Non-special forms of bracket. Save length for computing whole length
    2895             :     at end if there's a repeat that requires duplication of the group. Also
    2896             :     save the current value of branch_extra, and start the new group with
    2897             :     the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
    2898             :     for a lookbehind assertion. */
    2899             : 
    2900           0 :     if (brastackptr >= sizeof(brastack)/sizeof(int))
    2901             :       {
    2902           0 :       *errorptr = ERR19;
    2903           0 :       goto PCRE_ERROR_RETURN;
    2904             :       }
    2905             : 
    2906           0 :     bralenstack[brastackptr] = branch_extra;
    2907           0 :     branch_extra = branch_newextra;
    2908             : 
    2909           0 :     brastack[brastackptr++] = length;
    2910           0 :     length += 3;
    2911           0 :     continue;
    2912             : 
    2913             :     /* Handle ket. Look for subsequent max/min; for certain sets of values we
    2914             :     have to replicate this bracket up to that many times. If brastackptr is
    2915             :     0 this is an unmatched bracket which will generate an error, but take care
    2916             :     not to try to access brastack[-1] when computing the length and restoring
    2917             :     the branch_extra value. */
    2918             : 
    2919           0 :     case ')':
    2920           0 :     length += 3;
    2921             :       {
    2922           0 :       int minval = 1;
    2923           0 :       int maxval = 1;
    2924             :       int duplength;
    2925             : 
    2926           0 :       if (brastackptr > 0)
    2927             :         {
    2928           0 :         duplength = length - brastack[--brastackptr];
    2929           0 :         branch_extra = bralenstack[brastackptr];
    2930             :         }
    2931           0 :       else duplength = 0;
    2932             : 
    2933             :       /* Leave ptr at the final char; for read_repeat_counts this happens
    2934             :       automatically; for the others we need an increment. */
    2935             : 
    2936           0 :       if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
    2937             :         {
    2938           0 :         ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
    2939             :           &compile_block);
    2940           0 :         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
    2941             :         }
    2942           0 :       else if (c == '*') { minval = 0; maxval = -1; ptr++; }
    2943           0 :       else if (c == '+') { maxval = -1; ptr++; }
    2944           0 :       else if (c == '?') { minval = 0; ptr++; }
    2945             : 
    2946             :       /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
    2947             :       group, and if the maximum is greater than zero, we have to replicate
    2948             :       maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
    2949             :       bracket set - hence the 7. */
    2950             : 
    2951           0 :       if (minval == 0)
    2952             :         {
    2953           0 :         length++;
    2954           0 :         if (maxval > 0) length += (maxval - 1) * (duplength + 7);
    2955             :         }
    2956             : 
    2957             :       /* When the minimum is greater than zero, 1 we have to replicate up to
    2958             :       minval-1 times, with no additions required in the copies. Then, if
    2959             :       there is a limited maximum we have to replicate up to maxval-1 times
    2960             :       allowing for a BRAZERO item before each optional copy and nesting
    2961             :       brackets for all but one of the optional copies. */
    2962             : 
    2963             :       else
    2964             :         {
    2965           0 :         length += (minval - 1) * duplength;
    2966           0 :         if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
    2967           0 :           length += (maxval - minval) * (duplength + 7) - 6;
    2968             :         }
    2969             :       }
    2970           0 :     continue;
    2971             : 
    2972             :     /* Non-special character. For a run of such characters the length required
    2973             :     is the number of characters + 2, except that the maximum run length is 255.
    2974             :     We won't get a skipped space or a non-data escape or the start of a #
    2975             :     comment as the first character, so the length can't be zero. */
    2976             : 
    2977           0 :     NORMAL_CHAR:
    2978             :     default:
    2979           0 :     length += 2;
    2980           0 :     runlength = 0;
    2981             :     do
    2982             :       {
    2983           0 :       if ((options & PCRE_EXTENDED) != 0)
    2984             :         {
    2985           0 :         if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
    2986           0 :         if (c == '#')
    2987             :           {
    2988             :           /* The space before the ; is to avoid a warning on a silly compiler
    2989             :           on the Macintosh. */
    2990           0 :           while ((c = *(++ptr)) != 0 && c != '\n') ;
    2991           0 :           continue;
    2992             :           }
    2993             :         }
    2994             : 
    2995             :       /* Backslash may introduce a data char or a metacharacter; stop the
    2996             :       string before the latter. */
    2997             : 
    2998           0 :       if (c == '\\')
    2999             :         {
    3000           0 :         const uschar *saveptr = ptr;
    3001           0 :         c = check_escape(&ptr, errorptr, bracount, options, FALSE,
    3002             :           &compile_block);
    3003           0 :         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
    3004           0 :         if (c < 0) { ptr = saveptr; break; }
    3005             : 
    3006             : #ifdef SUPPORT_UTF8
    3007             :         if (c > 127 && (options & PCRE_UTF8) != 0)
    3008             :           {
    3009             :           int i;
    3010             :           for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
    3011             :             if (c <= utf8_table1[i]) break;
    3012             :           runlength += i;
    3013             :           }
    3014             : #endif
    3015             :         }
    3016             : 
    3017             :       /* Ordinary character or single-char escape */
    3018             : 
    3019           0 :       runlength++;
    3020             : 
    3021           0 :       if ((const char *)ptr > pattern + pattern_length)
    3022             :         {
    3023           0 :         *errorptr = "internal error";
    3024           0 :         goto PCRE_ERROR_RETURN;
    3025             :         }
    3026             :       }
    3027             : 
    3028             :     /* This "while" is the end of the "do" above. */
    3029             : 
    3030           0 :     while (runlength < MAXLIT &&
    3031           0 :       (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
    3032             : 
    3033           0 :     ptr--;
    3034           0 :     length += runlength;
    3035           0 :     continue;
    3036             :     }
    3037             :   }
    3038             : 
    3039           0 : length += 4;    /* For final KET and END */
    3040             : 
    3041           0 : if (length > 65539)
    3042             :   {
    3043           0 :   *errorptr = ERR20;
    3044           0 :   return NULL;
    3045             :   }
    3046             : 
    3047             : /* Compute the size of data block needed and get it, either from malloc or
    3048             : externally provided function. We specify "code[0]" in the offsetof() expression
    3049             : rather than just "code", because it has been reported that one broken compiler
    3050             : fails on "code" because it is also an independent variable. It should make no
    3051             : difference to the value of the offsetof(). */
    3052             : 
    3053           0 : size = length + offsetof(real_pcre, code[0]);
    3054           0 : re = (real_pcre *)(pcre_malloc)(size);
    3055             : 
    3056           0 : if (re == NULL)
    3057             :   {
    3058           0 :   *errorptr = ERR21;
    3059           0 :   return NULL;
    3060             :   }
    3061             : 
    3062             : /* Put in the magic number, and save the size, options, and table pointer */
    3063             : 
    3064           0 : re->magic_number = MAGIC_NUMBER;
    3065           0 : re->size = size;
    3066           0 : re->options = options;
    3067           0 : re->tables = tables;
    3068             : 
    3069             : /* Set up a starting, non-extracting bracket, then compile the expression. On
    3070             : error, *errorptr will be set non-NULL, so we don't need to look at the result
    3071             : of the function here. */
    3072             : 
    3073           0 : ptr = (const uschar *)pattern;
    3074           0 : code = re->code;
    3075           0 : *code = OP_BRA;
    3076           0 : bracount = 0;
    3077           0 : (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
    3078             :   &reqchar, &countlits, &compile_block);
    3079           0 : re->top_bracket = bracount;
    3080           0 : re->top_backref = top_backref;
    3081             : 
    3082             : /* If not reached end of pattern on success, there's an excess bracket. */
    3083             : 
    3084           0 : if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
    3085             : 
    3086             : /* Fill in the terminating state and check for disastrous overflow, but
    3087             : if debugging, leave the test till after things are printed out. */
    3088             : 
    3089           0 : *code++ = OP_END;
    3090             : 
    3091             : #ifndef DEBUG
    3092           0 : if (code - re->code > length) *errorptr = ERR23;
    3093             : #endif
    3094             : 
    3095             : /* Give an error if there's back reference to a non-existent capturing
    3096             : subpattern. */
    3097             : 
    3098           0 : if (top_backref > re->top_bracket) *errorptr = ERR15;
    3099             : 
    3100             : /* Failed to compile */
    3101             : 
    3102           0 : if (*errorptr != NULL)
    3103             :   {
    3104           0 :   (pcre_free)(re);
    3105           0 :   PCRE_ERROR_RETURN:
    3106           0 :   *erroroffset = ptr - (const uschar *)pattern;
    3107           0 :   return NULL;
    3108             :   }
    3109             : 
    3110             : /* If the anchored option was not passed, set flag if we can determine that the
    3111             : pattern is anchored by virtue of ^ characters or \A or anything else (such as
    3112             : starting with .* when DOTALL is set).
    3113             : 
    3114             : Otherwise, see if we can determine what the first character has to be, because
    3115             : that speeds up unanchored matches no end. If not, see if we can set the
    3116             : PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
    3117             : start with ^. and also when all branches start with .* for non-DOTALL matches.
    3118             : */
    3119             : 
    3120           0 : if ((options & PCRE_ANCHORED) == 0)
    3121             :   {
    3122           0 :   int temp_options = options;
    3123           0 :   if (is_anchored(re->code, &temp_options))
    3124           0 :     re->options |= PCRE_ANCHORED;
    3125             :   else
    3126             :     {
    3127           0 :     int ch = find_firstchar(re->code, &temp_options);
    3128           0 :     if (ch >= 0)
    3129             :       {
    3130           0 :       re->first_char = ch;
    3131           0 :       re->options |= PCRE_FIRSTSET;
    3132             :       }
    3133           0 :     else if (is_startline(re->code))
    3134           0 :       re->options |= PCRE_STARTLINE;
    3135             :     }
    3136             :   }
    3137             : 
    3138             : /* Save the last required character if there are at least two literal
    3139             : characters on all paths, or if there is no first character setting. */
    3140             : 
    3141           0 : if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
    3142             :   {
    3143           0 :   re->req_char = reqchar;
    3144           0 :   re->options |= PCRE_REQCHSET;
    3145             :   }
    3146             : 
    3147             : /* Print out the compiled data for debugging */
    3148             : 
    3149             : #ifdef DEBUG
    3150             : 
    3151             : printf("Length = %d top_bracket = %d top_backref = %d\n",
    3152             :   length, re->top_bracket, re->top_backref);
    3153             : 
    3154             : if (re->options != 0)
    3155             :   {
    3156             :   printf("%s%s%s%s%s%s%s%s%s\n",
    3157             :     ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
    3158             :     ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
    3159             :     ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
    3160             :     ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
    3161             :     ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
    3162             :     ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
    3163             :     ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
    3164             :     ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
    3165             :     ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
    3166             :   }
    3167             : 
    3168             : if ((re->options & PCRE_FIRSTSET) != 0)
    3169             :   {
    3170             :   if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
    3171             :     else printf("First char = \\x%02x\n", re->first_char);
    3172             :   }
    3173             : 
    3174             : if ((re->options & PCRE_REQCHSET) != 0)
    3175             :   {
    3176             :   if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
    3177             :     else printf("Req char = \\x%02x\n", re->req_char);
    3178             :   }
    3179             : 
    3180             : code_end = code;
    3181             : code_base = code = re->code;
    3182             : 
    3183             : while (code < code_end)
    3184             :   {
    3185             :   int charlength;
    3186             : 
    3187             :   printf("%3d ", code - code_base);
    3188             : 
    3189             :   if (*code >= OP_BRA)
    3190             :     {
    3191             :     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
    3192             :     code += 2;
    3193             :     }
    3194             : 
    3195             :   else switch(*code)
    3196             :     {
    3197             :     case OP_OPT:
    3198             :     printf(" %.2x %s", code[1], OP_names[*code]);
    3199             :     code++;
    3200             :     break;
    3201             : 
    3202             :     case OP_COND:
    3203             :     printf("%3d Cond", (code[1] << 8) + code[2]);
    3204             :     code += 2;
    3205             :     break;
    3206             : 
    3207             :     case OP_CREF:
    3208             :     printf(" %.2d %s", code[1], OP_names[*code]);
    3209             :     code++;
    3210             :     break;
    3211             : 
    3212             :     case OP_CHARS:
    3213             :     charlength = *(++code);
    3214             :     printf("%3d ", charlength);
    3215             :     while (charlength-- > 0)
    3216             :       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
    3217             :     break;
    3218             : 
    3219             :     case OP_KETRMAX:
    3220             :     case OP_KETRMIN:
    3221             :     case OP_ALT:
    3222             :     case OP_KET:
    3223             :     case OP_ASSERT:
    3224             :     case OP_ASSERT_NOT:
    3225             :     case OP_ASSERTBACK:
    3226             :     case OP_ASSERTBACK_NOT:
    3227             :     case OP_ONCE:
    3228             :     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
    3229             :     code += 2;
    3230             :     break;
    3231             : 
    3232             :     case OP_REVERSE:
    3233             :     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
    3234             :     code += 2;
    3235             :     break;
    3236             : 
    3237             :     case OP_STAR:
    3238             :     case OP_MINSTAR:
    3239             :     case OP_PLUS:
    3240             :     case OP_MINPLUS:
    3241             :     case OP_QUERY:
    3242             :     case OP_MINQUERY:
    3243             :     case OP_TYPESTAR:
    3244             :     case OP_TYPEMINSTAR:
    3245             :     case OP_TYPEPLUS:
    3246             :     case OP_TYPEMINPLUS:
    3247             :     case OP_TYPEQUERY:
    3248             :     case OP_TYPEMINQUERY:
    3249             :     if (*code >= OP_TYPESTAR)
    3250             :       printf("    %s", OP_names[code[1]]);
    3251             :     else if (isprint(c = code[1])) printf("    %c", c);
    3252             :       else printf("    \\x%02x", c);
    3253             :     printf("%s", OP_names[*code++]);
    3254             :     break;
    3255             : 
    3256             :     case OP_EXACT:
    3257             :     case OP_UPTO:
    3258             :     case OP_MINUPTO:
    3259             :     if (isprint(c = code[3])) printf("    %c{", c);
    3260             :       else printf("    \\x%02x{", c);
    3261             :     if (*code != OP_EXACT) printf("0,");
    3262             :     printf("%d}", (code[1] << 8) + code[2]);
    3263             :     if (*code == OP_MINUPTO) printf("?");
    3264             :     code += 3;
    3265             :     break;
    3266             : 
    3267             :     case OP_TYPEEXACT:
    3268             :     case OP_TYPEUPTO:
    3269             :     case OP_TYPEMINUPTO:
    3270             :     printf("    %s{", OP_names[code[3]]);
    3271             :     if (*code != OP_TYPEEXACT) printf(",");
    3272             :     printf("%d}", (code[1] << 8) + code[2]);
    3273             :     if (*code == OP_TYPEMINUPTO) printf("?");
    3274             :     code += 3;
    3275             :     break;
    3276             : 
    3277             :     case OP_NOT:
    3278             :     if (isprint(c = *(++code))) printf("    [^%c]", c);
    3279             :       else printf("    [^\\x%02x]", c);
    3280             :     break;
    3281             : 
    3282             :     case OP_NOTSTAR:
    3283             :     case OP_NOTMINSTAR:
    3284             :     case OP_NOTPLUS:
    3285             :     case OP_NOTMINPLUS:
    3286             :     case OP_NOTQUERY:
    3287             :     case OP_NOTMINQUERY:
    3288             :     if (isprint(c = code[1])) printf("    [^%c]", c);
    3289             :       else printf("    [^\\x%02x]", c);
    3290             :     printf("%s", OP_names[*code++]);
    3291             :     break;
    3292             : 
    3293             :     case OP_NOTEXACT:
    3294             :     case OP_NOTUPTO:
    3295             :     case OP_NOTMINUPTO:
    3296             :     if (isprint(c = code[3])) printf("    [^%c]{", c);
    3297             :       else printf("    [^\\x%02x]{", c);
    3298             :     if (*code != OP_NOTEXACT) printf(",");
    3299             :     printf("%d}", (code[1] << 8) + code[2]);
    3300             :     if (*code == OP_NOTMINUPTO) printf("?");
    3301             :     code += 3;
    3302             :     break;
    3303             : 
    3304             :     case OP_REF:
    3305             :     printf("    \\%d", *(++code));
    3306             :     code ++;
    3307             :     goto CLASS_REF_REPEAT;
    3308             : 
    3309             :     case OP_CLASS:
    3310             :       {
    3311             :       int i, min, max;
    3312             :       code++;
    3313             :       printf("    [");
    3314             : 
    3315             :       for (i = 0; i < 256; i++)
    3316             :         {
    3317             :         if ((code[i/8] & (1 << (i&7))) != 0)
    3318             :           {
    3319             :           int j;
    3320             :           for (j = i+1; j < 256; j++)
    3321             :             if ((code[j/8] & (1 << (j&7))) == 0) break;
    3322             :           if (i == '-' || i == ']') printf("\\");
    3323             :           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
    3324             :           if (--j > i)
    3325             :             {
    3326             :             printf("-");
    3327             :             if (j == '-' || j == ']') printf("\\");
    3328             :             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
    3329             :             }
    3330             :           i = j;
    3331             :           }
    3332             :         }
    3333             :       printf("]");
    3334             :       code += 32;
    3335             : 
    3336             :       CLASS_REF_REPEAT:
    3337             : 
    3338             :       switch(*code)
    3339             :         {
    3340             :         case OP_CRSTAR:
    3341             :         case OP_CRMINSTAR:
    3342             :         case OP_CRPLUS:
    3343             :         case OP_CRMINPLUS:
    3344             :         case OP_CRQUERY:
    3345             :         case OP_CRMINQUERY:
    3346             :         printf("%s", OP_names[*code]);
    3347             :         break;
    3348             : 
    3349             :         case OP_CRRANGE:
    3350             :         case OP_CRMINRANGE:
    3351             :         min = (code[1] << 8) + code[2];
    3352             :         max = (code[3] << 8) + code[4];
    3353             :         if (max == 0) printf("{%d,}", min);
    3354             :         else printf("{%d,%d}", min, max);
    3355             :         if (*code == OP_CRMINRANGE) printf("?");
    3356             :         code += 4;
    3357             :         break;
    3358             : 
    3359             :         default:
    3360             :         code--;
    3361             :         }
    3362             :       }
    3363             :     break;
    3364             : 
    3365             :     /* Anything else is just a one-node item */
    3366             : 
    3367             :     default:
    3368             :     printf("    %s", OP_names[*code]);
    3369             :     break;
    3370             :     }
    3371             : 
    3372             :   code++;
    3373             :   printf("\n");
    3374             :   }
    3375             : printf("------------------------------------------------------------------\n");
    3376             : 
    3377             : /* This check is done here in the debugging case so that the code that
    3378             : was compiled can be seen. */
    3379             : 
    3380             : if (code - re->code > length)
    3381             :   {
    3382             :   *errorptr = ERR23;
    3383             :   (pcre_free)(re);
    3384             :   *erroroffset = ptr - (uschar *)pattern;
    3385             :   return NULL;
    3386             :   }
    3387             : #endif
    3388             : 
    3389           0 : return (pcre *)re;
    3390             : }
    3391             : 
    3392             : 
    3393             : 
    3394             : /*************************************************
    3395             : *          Match a back-reference                *
    3396             : *************************************************/
    3397             : 
    3398             : /* If a back reference hasn't been set, the length that is passed is greater
    3399             : than the number of characters left in the string, so the match fails.
    3400             : 
    3401             : Arguments:
    3402             :   offset      index into the offset vector
    3403             :   eptr        points into the subject
    3404             :   length      length to be matched
    3405             :   md          points to match data block
    3406             :   ims         the ims flags
    3407             : 
    3408             : Returns:      TRUE if matched
    3409             : */
    3410             : 
    3411             : static BOOL
    3412           0 : match_ref(int offset, register const uschar *eptr, int length, match_data *md,
    3413             :   unsigned long int ims)
    3414             : {
    3415           0 : const uschar *p = md->start_subject + md->offset_vector[offset];
    3416             : 
    3417             : #ifdef DEBUG
    3418             : if (eptr >= md->end_subject)
    3419             :   printf("matching subject <null>");
    3420             : else
    3421             :   {
    3422             :   printf("matching subject ");
    3423             :   pchars(eptr, length, TRUE, md);
    3424             :   }
    3425             : printf(" against backref ");
    3426             : pchars(p, length, FALSE, md);
    3427             : printf("\n");
    3428             : #endif
    3429             : 
    3430             : /* Always fail if not enough characters left */
    3431             : 
    3432           0 : if (length > md->end_subject - eptr) return FALSE;
    3433             : 
    3434             : /* Separate the caselesss case for speed */
    3435             : 
    3436           0 : if ((ims & PCRE_CASELESS) != 0)
    3437             :   {
    3438           0 :   while (length-- > 0)
    3439           0 :     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
    3440             :   }
    3441             : else
    3442           0 :   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
    3443             : 
    3444           0 : return TRUE;
    3445             : }
    3446             : 
    3447             : 
    3448             : 
    3449             : /*************************************************
    3450             : *         Match from current position            *
    3451             : *************************************************/
    3452             : 
    3453             : /* On entry ecode points to the first opcode, and eptr to the first character
    3454             : in the subject string, while eptrb holds the value of eptr at the start of the
    3455             : last bracketed group - used for breaking infinite loops matching zero-length
    3456             : strings.
    3457             : 
    3458             : Arguments:
    3459             :    eptr        pointer in subject
    3460             :    ecode       position in code
    3461             :    offset_top  current top pointer
    3462             :    md          pointer to "static" info for the match
    3463             :    ims         current /i, /m, and /s options
    3464             :    eptrb       pointer to chain of blocks containing eptr at start of
    3465             :                  brackets - for testing for empty matches
    3466             :    flags       can contain
    3467             :                  match_condassert - this is an assertion condition
    3468             :                  match_isgroup - this is the start of a bracketed group
    3469             : 
    3470             : Returns:       TRUE if matched
    3471             : */
    3472             : 
    3473             : __attribute__((no_sanitize("memory"))) __attribute__((no_sanitize_memory)) static BOOL
    3474           0 : match(register const uschar *eptr, register const uschar *ecode,
    3475             :   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
    3476             :   int flags)
    3477             : {
    3478           0 : unsigned long int original_ims = ims;   /* Save for resetting on ')' */
    3479             : eptrblock newptrb;
    3480             : 
    3481             : /* At the start of a bracketed group, add the current subject pointer to the
    3482             : stack of such pointers, to be re-instated at the end of the group when we hit
    3483             : the closing ket. When match() is called in other circumstances, we don't add to
    3484             : the stack. */
    3485             : 
    3486           0 : if ((flags & match_isgroup) != 0)
    3487             :   {
    3488           0 :   newptrb.prev = eptrb;
    3489           0 :   newptrb.saved_eptr = eptr;
    3490           0 :   eptrb = &newptrb;
    3491             :   }
    3492             : 
    3493             : /* Now start processing the operations. */
    3494             : 
    3495             : for (;;)
    3496           0 :   {
    3497           0 :   int op = (int)*ecode;
    3498             :   int min, max, ctype;
    3499             :   register int i;
    3500             :   register int c;
    3501           0 :   BOOL minimize = FALSE;
    3502             : 
    3503             :   /* Opening capturing bracket. If there is space in the offset vector, save
    3504             :   the current subject position in the working slot at the top of the vector. We
    3505             :   mustn't change the current values of the data slot, because they may be set
    3506             :   from a previous iteration of this group, and be referred to by a reference
    3507             :   inside the group.
    3508             : 
    3509             :   If the bracket fails to match, we need to restore this value and also the
    3510             :   values of the final offsets, in case they were set by a previous iteration of
    3511             :   the same bracket.
    3512             : 
    3513             :   If there isn't enough space in the offset vector, treat this as if it were a
    3514             :   non-capturing bracket. Don't worry about setting the flag for the error case
    3515             :   here; that is handled in the code for KET. */
    3516             : 
    3517           0 :   if (op > OP_BRA)
    3518             :     {
    3519           0 :     int number = op - OP_BRA;
    3520           0 :     int offset = number << 1;
    3521             : 
    3522             : #ifdef DEBUG
    3523             :     printf("start bracket %d subject=", number);
    3524             :     pchars(eptr, 16, TRUE, md);
    3525             :     printf("\n");
    3526             : #endif
    3527             : 
    3528           0 :     if (offset < md->offset_max)
    3529             :       {
    3530           0 :       int save_offset1 = md->offset_vector[offset];
    3531           0 :       int save_offset2 = md->offset_vector[offset+1];
    3532           0 :       int save_offset3 = md->offset_vector[md->offset_end - number];
    3533             : 
    3534             :       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
    3535           0 :       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
    3536             : 
    3537             :       do
    3538             :         {
    3539           0 :         if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
    3540           0 :           return TRUE;
    3541           0 :         ecode += (ecode[1] << 8) + ecode[2];
    3542             :         }
    3543           0 :       while (*ecode == OP_ALT);
    3544             : 
    3545             :       DPRINTF(("bracket %d failed\n", number));
    3546             : 
    3547           0 :       md->offset_vector[offset] = save_offset1;
    3548           0 :       md->offset_vector[offset+1] = save_offset2;
    3549           0 :       md->offset_vector[md->offset_end - number] = save_offset3;
    3550           0 :       return FALSE;
    3551             :       }
    3552             : 
    3553             :     /* Insufficient room for saving captured contents */
    3554             : 
    3555           0 :     else op = OP_BRA;
    3556             :     }
    3557             : 
    3558             :   /* Other types of node can be handled by a switch */
    3559             : 
    3560           0 :   switch(op)
    3561             :     {
    3562           0 :     case OP_BRA:     /* Non-capturing bracket: optimized */
    3563             :     DPRINTF(("start bracket 0\n"));
    3564             :     do
    3565             :       {
    3566           0 :       if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
    3567           0 :         return TRUE;
    3568           0 :       ecode += (ecode[1] << 8) + ecode[2];
    3569             :       }
    3570           0 :     while (*ecode == OP_ALT);
    3571             :     DPRINTF(("bracket 0 failed\n"));
    3572           0 :     return FALSE;
    3573             : 
    3574             :     /* Conditional group: compilation checked that there are no more than
    3575             :     two branches. If the condition is false, skipping the first branch takes us
    3576             :     past the end if there is only one branch, but that's OK because that is
    3577             :     exactly what going to the ket would do. */
    3578             : 
    3579           0 :     case OP_COND:
    3580           0 :     if (ecode[3] == OP_CREF)         /* Condition is extraction test */
    3581             :       {
    3582           0 :       int offset = ecode[4] << 1;    /* Doubled reference number */
    3583           0 :       return match(eptr,
    3584           0 :         ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
    3585           0 :           5 : 3 + (ecode[1] << 8) + ecode[2]),
    3586             :         offset_top, md, ims, eptrb, match_isgroup);
    3587             :       }
    3588             : 
    3589             :     /* The condition is an assertion. Call match() to evaluate it - setting
    3590             :     the final argument TRUE causes it to stop at the end of an assertion. */
    3591             : 
    3592             :     else
    3593             :       {
    3594           0 :       if (match(eptr, ecode+3, offset_top, md, ims, NULL,
    3595             :           match_condassert | match_isgroup))
    3596             :         {
    3597           0 :         ecode += 3 + (ecode[4] << 8) + ecode[5];
    3598           0 :         while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
    3599             :         }
    3600           0 :       else ecode += (ecode[1] << 8) + ecode[2];
    3601           0 :       return match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup);
    3602             :       }
    3603             :     /* Control never reaches here */
    3604             : 
    3605             :     /* Skip over conditional reference data if encountered (should not be) */
    3606             : 
    3607           0 :     case OP_CREF:
    3608           0 :     ecode += 2;
    3609           0 :     break;
    3610             : 
    3611             :     /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
    3612             :     an empty string - recursion will then try other alternatives, if any. */
    3613             : 
    3614           0 :     case OP_END:
    3615           0 :     if (md->notempty && eptr == md->start_match) return FALSE;
    3616           0 :     md->end_match_ptr = eptr;          /* Record where we ended */
    3617           0 :     md->end_offset_top = offset_top;   /* and how many extracts were taken */
    3618           0 :     return TRUE;
    3619             : 
    3620             :     /* Change option settings */
    3621             : 
    3622           0 :     case OP_OPT:
    3623           0 :     ims = ecode[1];
    3624           0 :     ecode += 2;
    3625             :     DPRINTF(("ims set to %02lx\n", ims));
    3626           0 :     break;
    3627             : 
    3628             :     /* Assertion brackets. Check the alternative branches in turn - the
    3629             :     matching won't pass the KET for an assertion. If any one branch matches,
    3630             :     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
    3631             :     start of each branch to move the current point backwards, so the code at
    3632             :     this level is identical to the lookahead case. */
    3633             : 
    3634           0 :     case OP_ASSERT:
    3635             :     case OP_ASSERTBACK:
    3636             :     do
    3637             :       {
    3638           0 :       if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) break;
    3639           0 :       ecode += (ecode[1] << 8) + ecode[2];
    3640             :       }
    3641           0 :     while (*ecode == OP_ALT);
    3642           0 :     if (*ecode == OP_KET) return FALSE;
    3643             : 
    3644             :     /* If checking an assertion for a condition, return TRUE. */
    3645             : 
    3646           0 :     if ((flags & match_condassert) != 0) return TRUE;
    3647             : 
    3648             :     /* Continue from after the assertion, updating the offsets high water
    3649             :     mark, since extracts may have been taken during the assertion. */
    3650             : 
    3651           0 :     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
    3652           0 :     ecode += 3;
    3653           0 :     offset_top = md->end_offset_top;
    3654           0 :     continue;
    3655             : 
    3656             :     /* Negative assertion: all branches must fail to match */
    3657             : 
    3658           0 :     case OP_ASSERT_NOT:
    3659             :     case OP_ASSERTBACK_NOT:
    3660             :     do
    3661             :       {
    3662           0 :       if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup))
    3663           0 :         return FALSE;
    3664           0 :       ecode += (ecode[1] << 8) + ecode[2];
    3665             :       }
    3666           0 :     while (*ecode == OP_ALT);
    3667             : 
    3668           0 :     if ((flags & match_condassert) != 0) return TRUE;
    3669             : 
    3670           0 :     ecode += 3;
    3671           0 :     continue;
    3672             : 
    3673             :     /* Move the subject pointer back. This occurs only at the start of
    3674             :     each branch of a lookbehind assertion. If we are too close to the start to
    3675             :     move back, this match function fails. When working with UTF-8 we move
    3676             :     back a number of characters, not bytes. */
    3677             : 
    3678           0 :     case OP_REVERSE:
    3679             : #ifdef SUPPORT_UTF8
    3680             :     c = (ecode[1] << 8) + ecode[2];
    3681             :     for (i = 0; i < c; i++)
    3682             :       {
    3683             :       eptr--;
    3684             :       BACKCHAR(eptr)
    3685             :       }
    3686             : #else
    3687           0 :     eptr -= (ecode[1] << 8) + ecode[2];
    3688             : #endif
    3689             : 
    3690           0 :     if (eptr < md->start_subject) return FALSE;
    3691           0 :     ecode += 3;
    3692           0 :     break;
    3693             : 
    3694             :     /* Recursion matches the current regex, nested. If there are any capturing
    3695             :     brackets started but not finished, we have to save their starting points
    3696             :     and reinstate them after the recursion. However, we don't know how many
    3697             :     such there are (offset_top records the completed total) so we just have
    3698             :     to save all the potential data. There may be up to 99 such values, which
    3699             :     is a bit large to put on the stack, but using malloc for small numbers
    3700             :     seems expensive. As a compromise, the stack is used when there are fewer
    3701             :     than 16 values to store; otherwise malloc is used. A problem is what to do
    3702             :     if the malloc fails ... there is no way of returning to the top level with
    3703             :     an error. Save the top 15 values on the stack, and accept that the rest
    3704             :     may be wrong. */
    3705             : 
    3706           0 :     case OP_RECURSE:
    3707             :       {
    3708             :       BOOL rc;
    3709             :       int *save;
    3710             :       int stacksave[15];
    3711             : 
    3712           0 :       c = md->offset_max;
    3713             : 
    3714           0 :       if (c < 16) save = stacksave; else
    3715             :         {
    3716           0 :         save = (int *)(pcre_malloc)((c+1) * sizeof(int));
    3717           0 :         if (save == NULL)
    3718             :           {
    3719           0 :           save = stacksave;
    3720           0 :           c = 15;
    3721             :           }
    3722             :         }
    3723             : 
    3724           0 :       for (i = 1; i <= c; i++)
    3725           0 :         save[i] = md->offset_vector[md->offset_end - i];
    3726           0 :       rc = match(eptr, md->start_pattern, offset_top, md, ims, eptrb,
    3727             :         match_isgroup);
    3728           0 :       for (i = 1; i <= c; i++)
    3729           0 :         md->offset_vector[md->offset_end - i] = save[i];
    3730           0 :       if (save != stacksave) (pcre_free)(save);
    3731           0 :       if (!rc) return FALSE;
    3732             : 
    3733             :       /* In case the recursion has set more capturing values, save the final
    3734             :       number, then move along the subject till after the recursive match,
    3735             :       and advance one byte in the pattern code. */
    3736             : 
    3737           0 :       offset_top = md->end_offset_top;
    3738           0 :       eptr = md->end_match_ptr;
    3739           0 :       ecode++;
    3740             :       }
    3741           0 :     break;
    3742             : 
    3743             :     /* "Once" brackets are like assertion brackets except that after a match,
    3744             :     the point in the subject string is not moved back. Thus there can never be
    3745             :     a move back into the brackets. Check the alternative branches in turn - the
    3746             :     matching won't pass the KET for this kind of subpattern. If any one branch
    3747             :     matches, we carry on as at the end of a normal bracket, leaving the subject
    3748             :     pointer. */
    3749             : 
    3750           0 :     case OP_ONCE:
    3751             :       {
    3752           0 :       const uschar *prev = ecode;
    3753           0 :       const uschar *saved_eptr = eptr;
    3754             : 
    3755             :       do
    3756             :         {
    3757           0 :         if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
    3758           0 :           break;
    3759           0 :         ecode += (ecode[1] << 8) + ecode[2];
    3760             :         }
    3761           0 :       while (*ecode == OP_ALT);
    3762             : 
    3763             :       /* If hit the end of the group (which could be repeated), fail */
    3764             : 
    3765           0 :       if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
    3766             : 
    3767             :       /* Continue as from after the assertion, updating the offsets high water
    3768             :       mark, since extracts may have been taken. */
    3769             : 
    3770           0 :       do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
    3771             : 
    3772           0 :       offset_top = md->end_offset_top;
    3773           0 :       eptr = md->end_match_ptr;
    3774             : 
    3775             :       /* For a non-repeating ket, just continue at this level. This also
    3776             :       happens for a repeating ket if no characters were matched in the group.
    3777             :       This is the forcible breaking of infinite loops as implemented in Perl
    3778             :       5.005. If there is an options reset, it will get obeyed in the normal
    3779             :       course of events. */
    3780             : 
    3781           0 :       if (*ecode == OP_KET || eptr == saved_eptr)
    3782             :         {
    3783           0 :         ecode += 3;
    3784           0 :         break;
    3785             :         }
    3786             : 
    3787             :       /* The repeating kets try the rest of the pattern or restart from the
    3788             :       preceding bracket, in the appropriate order. We need to reset any options
    3789             :       that changed within the bracket before re-running it, so check the next
    3790             :       opcode. */
    3791             : 
    3792           0 :       if (ecode[3] == OP_OPT)
    3793             :         {
    3794           0 :         ims = (ims & ~PCRE_IMS) | ecode[4];
    3795             :         DPRINTF(("ims set to %02lx at group repeat\n", ims));
    3796             :         }
    3797             : 
    3798           0 :       if (*ecode == OP_KETRMIN)
    3799             :         {
    3800           0 :         if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
    3801           0 :             match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
    3802           0 :               return TRUE;
    3803             :         }
    3804             :       else  /* OP_KETRMAX */
    3805             :         {
    3806           0 :         if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
    3807           0 :             match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
    3808             :         }
    3809             :       }
    3810           0 :     return FALSE;
    3811             : 
    3812             :     /* An alternation is the end of a branch; scan along to find the end of the
    3813             :     bracketed group and go to there. */
    3814             : 
    3815           0 :     case OP_ALT:
    3816           0 :     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
    3817           0 :     break;
    3818             : 
    3819             :     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
    3820             :     that it may occur zero times. It may repeat infinitely, or not at all -
    3821             :     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
    3822             :     repeat limits are compiled as a number of copies, with the optional ones
    3823             :     preceded by BRAZERO or BRAMINZERO. */
    3824             : 
    3825           0 :     case OP_BRAZERO:
    3826             :       {
    3827           0 :       const uschar *next = ecode+1;
    3828           0 :       if (match(eptr, next, offset_top, md, ims, eptrb, match_isgroup))
    3829           0 :         return TRUE;
    3830           0 :       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
    3831           0 :       ecode = next + 3;
    3832             :       }
    3833           0 :     break;
    3834             : 
    3835           0 :     case OP_BRAMINZERO:
    3836             :       {
    3837           0 :       const uschar *next = ecode+1;
    3838           0 :       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
    3839           0 :       if (match(eptr, next+3, offset_top, md, ims, eptrb, match_isgroup))
    3840           0 :         return TRUE;
    3841           0 :       ecode++;
    3842             :       }
    3843           0 :     break;
    3844             : 
    3845             :     /* End of a group, repeated or non-repeating. If we are at the end of
    3846             :     an assertion "group", stop matching and return TRUE, but record the
    3847             :     current high water mark for use by positive assertions. Do this also
    3848             :     for the "once" (not-backup up) groups. */
    3849             : 
    3850           0 :     case OP_KET:
    3851             :     case OP_KETRMIN:
    3852             :     case OP_KETRMAX:
    3853             :       {
    3854           0 :       const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
    3855           0 :       const uschar *saved_eptr = eptrb->saved_eptr;
    3856             : 
    3857           0 :       eptrb = eptrb->prev;    /* Back up the stack of bracket start pointers */
    3858             : 
    3859           0 :       if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
    3860           0 :           *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
    3861           0 :           *prev == OP_ONCE)
    3862             :         {
    3863           0 :         md->end_match_ptr = eptr;      /* For ONCE */
    3864           0 :         md->end_offset_top = offset_top;
    3865           0 :         return TRUE;
    3866             :         }
    3867             : 
    3868             :       /* In all other cases except a conditional group we have to check the
    3869             :       group number back at the start and if necessary complete handling an
    3870             :       extraction by setting the offsets and bumping the high water mark. */
    3871             : 
    3872           0 :       if (*prev != OP_COND)
    3873             :         {
    3874           0 :         int number = *prev - OP_BRA;
    3875           0 :         int offset = number << 1;
    3876             : 
    3877             : #ifdef DEBUG
    3878             :         printf("end bracket %d", number);
    3879             :         printf("\n");
    3880             : #endif
    3881             : 
    3882           0 :         if (number > 0)
    3883             :           {
    3884           0 :           if (offset >= md->offset_max) md->offset_overflow = TRUE; else
    3885             :             {
    3886           0 :             md->offset_vector[offset] =
    3887           0 :               md->offset_vector[md->offset_end - number];
    3888           0 :             md->offset_vector[offset+1] = eptr - md->start_subject;
    3889           0 :             if (offset_top <= offset) offset_top = offset + 2;
    3890             :             }
    3891             :           }
    3892             :         }
    3893             : 
    3894             :       /* Reset the value of the ims flags, in case they got changed during
    3895             :       the group. */
    3896             : 
    3897           0 :       ims = original_ims;
    3898             :       DPRINTF(("ims reset to %02lx\n", ims));
    3899             : 
    3900             :       /* For a non-repeating ket, just continue at this level. This also
    3901             :       happens for a repeating ket if no characters were matched in the group.
    3902             :       This is the forcible breaking of infinite loops as implemented in Perl
    3903             :       5.005. If there is an options reset, it will get obeyed in the normal
    3904             :       course of events. */
    3905             : 
    3906           0 :       if (*ecode == OP_KET || eptr == saved_eptr)
    3907             :         {
    3908           0 :         ecode += 3;
    3909           0 :         break;
    3910             :         }
    3911             : 
    3912             :       /* The repeating kets try the rest of the pattern or restart from the
    3913             :       preceding bracket, in the appropriate order. */
    3914             : 
    3915           0 :       if (*ecode == OP_KETRMIN)
    3916             :         {
    3917           0 :         if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
    3918           0 :             match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
    3919           0 :               return TRUE;
    3920             :         }
    3921             :       else  /* OP_KETRMAX */
    3922             :         {
    3923           0 :         if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
    3924           0 :             match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
    3925             :         }
    3926             :       }
    3927           0 :     return FALSE;
    3928             : 
    3929             :     /* Start of subject unless notbol, or after internal newline if multiline */
    3930             : 
    3931           0 :     case OP_CIRC:
    3932           0 :     if (md->notbol && eptr == md->start_subject) return FALSE;
    3933           0 :     if ((ims & PCRE_MULTILINE) != 0)
    3934             :       {
    3935           0 :       if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
    3936           0 :       ecode++;
    3937           0 :       break;
    3938             :       }
    3939             :     /* ... else fall through */
    3940             : 
    3941             :     /* Start of subject assertion */
    3942             : 
    3943             :     case OP_SOD:
    3944           0 :     if (eptr != md->start_subject) return FALSE;
    3945           0 :     ecode++;
    3946           0 :     break;
    3947             : 
    3948             :     /* Assert before internal newline if multiline, or before a terminating
    3949             :     newline unless endonly is set, else end of subject unless noteol is set. */
    3950             : 
    3951           0 :     case OP_DOLL:
    3952           0 :     if ((ims & PCRE_MULTILINE) != 0)
    3953             :       {
    3954           0 :       if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
    3955           0 :         else { if (md->noteol) return FALSE; }
    3956           0 :       ecode++;
    3957           0 :       break;
    3958             :       }
    3959             :     else
    3960             :       {
    3961           0 :       if (md->noteol) return FALSE;
    3962           0 :       if (!md->endonly)
    3963             :         {
    3964           0 :         if (eptr < md->end_subject - 1 ||
    3965           0 :            (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
    3966             : 
    3967           0 :         ecode++;
    3968           0 :         break;
    3969             :         }
    3970             :       }
    3971             :     /* ... else fall through */
    3972             : 
    3973             :     /* End of subject assertion (\z) */
    3974             : 
    3975             :     case OP_EOD:
    3976           0 :     if (eptr < md->end_subject) return FALSE;
    3977           0 :     ecode++;
    3978           0 :     break;
    3979             : 
    3980             :     /* End of subject or ending \n assertion (\Z) */
    3981             : 
    3982           0 :     case OP_EODN:
    3983           0 :     if (eptr < md->end_subject - 1 ||
    3984           0 :        (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
    3985           0 :     ecode++;
    3986           0 :     break;
    3987             : 
    3988             :     /* Word boundary assertions */
    3989             : 
    3990           0 :     case OP_NOT_WORD_BOUNDARY:
    3991             :     case OP_WORD_BOUNDARY:
    3992             :       {
    3993           0 :       BOOL prev_is_word = (eptr != md->start_subject) &&
    3994           0 :         ((md->ctypes[eptr[-1]] & ctype_word) != 0);
    3995           0 :       BOOL cur_is_word = (eptr < md->end_subject) &&
    3996           0 :         ((md->ctypes[*eptr] & ctype_word) != 0);
    3997           0 :       if ((*ecode++ == OP_WORD_BOUNDARY)?
    3998             :            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
    3999           0 :         return FALSE;
    4000             :       }
    4001           0 :     break;
    4002             : 
    4003             :     /* Match a single character type; inline for speed */
    4004             : 
    4005           0 :     case OP_ANY:
    4006           0 :     if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
    4007           0 :       return FALSE;
    4008           0 :     if (eptr++ >= md->end_subject) return FALSE;
    4009             : #ifdef SUPPORT_UTF8
    4010             :     if (md->utf8)
    4011             :       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    4012             : #endif
    4013           0 :     ecode++;
    4014           0 :     break;
    4015             : 
    4016           0 :     case OP_NOT_DIGIT:
    4017           0 :     if (eptr >= md->end_subject ||
    4018           0 :        (md->ctypes[*eptr++] & ctype_digit) != 0)
    4019           0 :       return FALSE;
    4020           0 :     ecode++;
    4021           0 :     break;
    4022             : 
    4023           0 :     case OP_DIGIT:
    4024           0 :     if (eptr >= md->end_subject ||
    4025           0 :        (md->ctypes[*eptr++] & ctype_digit) == 0)
    4026           0 :       return FALSE;
    4027           0 :     ecode++;
    4028           0 :     break;
    4029             : 
    4030           0 :     case OP_NOT_WHITESPACE:
    4031           0 :     if (eptr >= md->end_subject ||
    4032           0 :        (md->ctypes[*eptr++] & ctype_space) != 0)
    4033           0 :       return FALSE;
    4034           0 :     ecode++;
    4035           0 :     break;
    4036             : 
    4037           0 :     case OP_WHITESPACE:
    4038           0 :     if (eptr >= md->end_subject ||
    4039           0 :        (md->ctypes[*eptr++] & ctype_space) == 0)
    4040           0 :       return FALSE;
    4041           0 :     ecode++;
    4042           0 :     break;
    4043             : 
    4044           0 :     case OP_NOT_WORDCHAR:
    4045           0 :     if (eptr >= md->end_subject ||
    4046           0 :        (md->ctypes[*eptr++] & ctype_word) != 0)
    4047           0 :       return FALSE;
    4048           0 :     ecode++;
    4049           0 :     break;
    4050             : 
    4051           0 :     case OP_WORDCHAR:
    4052           0 :     if (eptr >= md->end_subject ||
    4053           0 :        (md->ctypes[*eptr++] & ctype_word) == 0)
    4054           0 :       return FALSE;
    4055           0 :     ecode++;
    4056           0 :     break;
    4057             : 
    4058             :     /* Match a back reference, possibly repeatedly. Look past the end of the
    4059             :     item to see if there is repeat information following. The code is similar
    4060             :     to that for character classes, but repeated for efficiency. Then obey
    4061             :     similar code to character type repeats - written out again for speed.
    4062             :     However, if the referenced string is the empty string, always treat
    4063             :     it as matched, any number of times (otherwise there could be infinite
    4064             :     loops). */
    4065             : 
    4066           0 :     case OP_REF:
    4067             :       {
    4068             :       int length;
    4069           0 :       int offset = ecode[1] << 1;                /* Doubled reference number */
    4070           0 :       ecode += 2;                                /* Advance past the item */
    4071             : 
    4072             :       /* If the reference is unset, set the length to be longer than the amount
    4073             :       of subject left; this ensures that every attempt at a match fails. We
    4074             :       can't just fail here, because of the possibility of quantifiers with zero
    4075             :       minima. */
    4076             : 
    4077           0 :       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
    4078           0 :         md->end_subject - eptr + 1 :
    4079           0 :         md->offset_vector[offset+1] - md->offset_vector[offset];
    4080             : 
    4081             :       /* Set up for repetition, or handle the non-repeated case */
    4082             : 
    4083           0 :       switch (*ecode)
    4084             :         {
    4085           0 :         case OP_CRSTAR:
    4086             :         case OP_CRMINSTAR:
    4087             :         case OP_CRPLUS:
    4088             :         case OP_CRMINPLUS:
    4089             :         case OP_CRQUERY:
    4090             :         case OP_CRMINQUERY:
    4091           0 :         c = *ecode++ - OP_CRSTAR;
    4092           0 :         minimize = (c & 1) != 0;
    4093           0 :         min = rep_min[c];                 /* Pick up values from tables; */
    4094           0 :         max = rep_max[c];                 /* zero for max => infinity */
    4095           0 :         if (max == 0) max = INT_MAX;
    4096           0 :         break;
    4097             : 
    4098           0 :         case OP_CRRANGE:
    4099             :         case OP_CRMINRANGE:
    4100           0 :         minimize = (*ecode == OP_CRMINRANGE);
    4101           0 :         min = (ecode[1] << 8) + ecode[2];
    4102           0 :         max = (ecode[3] << 8) + ecode[4];
    4103           0 :         if (max == 0) max = INT_MAX;
    4104           0 :         ecode += 5;
    4105           0 :         break;
    4106             : 
    4107           0 :         default:               /* No repeat follows */
    4108           0 :         if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
    4109           0 :         eptr += length;
    4110           0 :         continue;              /* With the main loop */
    4111             :         }
    4112             : 
    4113             :       /* If the length of the reference is zero, just continue with the
    4114             :       main loop. */
    4115             : 
    4116           0 :       if (length == 0) continue;
    4117             : 
    4118             :       /* First, ensure the minimum number of matches are present. We get back
    4119             :       the length of the reference string explicitly rather than passing the
    4120             :       address of eptr, so that eptr can be a register variable. */
    4121             : 
    4122           0 :       for (i = 1; i <= min; i++)
    4123             :         {
    4124           0 :         if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
    4125           0 :         eptr += length;
    4126             :         }
    4127             : 
    4128             :       /* If min = max, continue at the same level without recursion.
    4129             :       They are not both allowed to be zero. */
    4130             : 
    4131           0 :       if (min == max) continue;
    4132             : 
    4133             :       /* If minimizing, keep trying and advancing the pointer */
    4134             : 
    4135           0 :       if (minimize)
    4136             :         {
    4137           0 :         for (i = min;; i++)
    4138             :           {
    4139           0 :           if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
    4140           0 :             return TRUE;
    4141           0 :           if (i >= max || !match_ref(offset, eptr, length, md, ims))
    4142           0 :             return FALSE;
    4143           0 :           eptr += length;
    4144             :           }
    4145             :         /* Control never gets here */
    4146             :         }
    4147             : 
    4148             :       /* If maximizing, find the longest string and work backwards */
    4149             : 
    4150             :       else
    4151             :         {
    4152           0 :         const uschar *pp = eptr;
    4153           0 :         for (i = min; i < max; i++)
    4154             :           {
    4155           0 :           if (!match_ref(offset, eptr, length, md, ims)) break;
    4156           0 :           eptr += length;
    4157             :           }
    4158           0 :         while (eptr >= pp)
    4159             :           {
    4160           0 :           if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
    4161           0 :             return TRUE;
    4162           0 :           eptr -= length;
    4163             :           }
    4164           0 :         return FALSE;
    4165             :         }
    4166             :       }
    4167             :     /* Control never gets here */
    4168             : 
    4169             : 
    4170             : 
    4171             :     /* Match a character class, possibly repeatedly. Look past the end of the
    4172             :     item to see if there is repeat information following. Then obey similar
    4173             :     code to character type repeats - written out again for speed. */
    4174             : 
    4175           0 :     case OP_CLASS:
    4176             :       {
    4177           0 :       const uschar *data = ecode + 1;  /* Save for matching */
    4178           0 :       ecode += 33;                     /* Advance past the item */
    4179             : 
    4180           0 :       switch (*ecode)
    4181             :         {
    4182           0 :         case OP_CRSTAR:
    4183             :         case OP_CRMINSTAR:
    4184             :         case OP_CRPLUS:
    4185             :         case OP_CRMINPLUS:
    4186             :         case OP_CRQUERY:
    4187             :         case OP_CRMINQUERY:
    4188           0 :         c = *ecode++ - OP_CRSTAR;
    4189           0 :         minimize = (c & 1) != 0;
    4190           0 :         min = rep_min[c];                 /* Pick up values from tables; */
    4191           0 :         max = rep_max[c];                 /* zero for max => infinity */
    4192           0 :         if (max == 0) max = INT_MAX;
    4193           0 :         break;
    4194             : 
    4195           0 :         case OP_CRRANGE:
    4196             :         case OP_CRMINRANGE:
    4197           0 :         minimize = (*ecode == OP_CRMINRANGE);
    4198           0 :         min = (ecode[1] << 8) + ecode[2];
    4199           0 :         max = (ecode[3] << 8) + ecode[4];
    4200           0 :         if (max == 0) max = INT_MAX;
    4201           0 :         ecode += 5;
    4202           0 :         break;
    4203             : 
    4204           0 :         default:               /* No repeat follows */
    4205           0 :         min = max = 1;
    4206           0 :         break;
    4207             :         }
    4208             : 
    4209             :       /* First, ensure the minimum number of matches are present. */
    4210             : 
    4211           0 :       for (i = 1; i <= min; i++)
    4212             :         {
    4213           0 :         if (eptr >= md->end_subject) return FALSE;
    4214           0 :         GETCHARINC(c, eptr)         /* Get character; increment eptr */
    4215             : 
    4216             : #ifdef SUPPORT_UTF8
    4217             :         /* We do not yet support class members > 255 */
    4218             :         if (c > 255) return FALSE;
    4219             : #endif
    4220             : 
    4221           0 :         if ((data[c/8] & (1 << (c&7))) != 0) continue;
    4222           0 :         return FALSE;
    4223             :         }
    4224             : 
    4225             :       /* If max == min we can continue with the main loop without the
    4226             :       need to recurse. */
    4227             : 
    4228           0 :       if (min == max) continue;
    4229             : 
    4230             :       /* If minimizing, keep testing the rest of the expression and advancing
    4231             :       the pointer while it matches the class. */
    4232             : 
    4233           0 :       if (minimize)
    4234             :         {
    4235           0 :         for (i = min;; i++)
    4236             :           {
    4237           0 :           if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
    4238           0 :             return TRUE;
    4239           0 :           if (i >= max || eptr >= md->end_subject) return FALSE;
    4240           0 :           GETCHARINC(c, eptr)       /* Get character; increment eptr */
    4241             : 
    4242             : #ifdef SUPPORT_UTF8
    4243             :           /* We do not yet support class members > 255 */
    4244             :           if (c > 255) return FALSE;
    4245             : #endif
    4246           0 :           if ((data[c/8] & (1 << (c&7))) != 0) continue;
    4247           0 :           return FALSE;
    4248             :           }
    4249             :         /* Control never gets here */
    4250             :         }
    4251             : 
    4252             :       /* If maximizing, find the longest possible run, then work backwards. */
    4253             : 
    4254             :       else
    4255             :         {
    4256           0 :         const uschar *pp = eptr;
    4257           0 :         int len = 1;
    4258           0 :         for (i = min; i < max; i++)
    4259             :           {
    4260           0 :           if (eptr >= md->end_subject) break;
    4261           0 :           GETCHARLEN(c, eptr, len)  /* Get character, set length if UTF-8 */
    4262             : 
    4263             : #ifdef SUPPORT_UTF8
    4264             :           /* We do not yet support class members > 255 */
    4265             :           if (c > 255) break;
    4266             : #endif
    4267           0 :           if ((data[c/8] & (1 << (c&7))) == 0) break;
    4268           0 :           eptr += len;
    4269             :           }
    4270             : 
    4271           0 :         while (eptr >= pp)
    4272             :           {
    4273           0 :           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
    4274           0 :             return TRUE;
    4275             : 
    4276             : #ifdef SUPPORT_UTF8
    4277             :           BACKCHAR(eptr)
    4278             : #endif
    4279             :           }
    4280           0 :         return FALSE;
    4281             :         }
    4282             :       }
    4283             :     /* Control never gets here */
    4284             : 
    4285             :     /* Match a run of characters */
    4286             : 
    4287           0 :     case OP_CHARS:
    4288             :       {
    4289           0 :       register int length = ecode[1];
    4290           0 :       ecode += 2;
    4291             : 
    4292             : #ifdef DEBUG    /* Sigh. Some compilers never learn. */
    4293             :       if (eptr >= md->end_subject)
    4294             :         printf("matching subject <null> against pattern ");
    4295             :       else
    4296             :         {
    4297             :         printf("matching subject ");
    4298             :         pchars(eptr, length, TRUE, md);
    4299             :         printf(" against pattern ");
    4300             :         }
    4301             :       pchars(ecode, length, FALSE, md);
    4302             :       printf("\n");
    4303             : #endif
    4304             : 
    4305           0 :       if (length > md->end_subject - eptr) return FALSE;
    4306           0 :       if ((ims & PCRE_CASELESS) != 0)
    4307             :         {
    4308           0 :         while (length-- > 0)
    4309           0 :           if (md->lcc[*ecode++] != md->lcc[*eptr++])
    4310           0 :             return FALSE;
    4311             :         }
    4312             :       else
    4313             :         {
    4314           0 :         while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
    4315             :         }
    4316             :       }
    4317           0 :     break;
    4318             : 
    4319             :     /* Match a single character repeatedly; different opcodes share code. */
    4320             : 
    4321           0 :     case OP_EXACT:
    4322           0 :     min = max = (ecode[1] << 8) + ecode[2];
    4323           0 :     ecode += 3;
    4324           0 :     goto REPEATCHAR;
    4325             : 
    4326           0 :     case OP_UPTO:
    4327             :     case OP_MINUPTO:
    4328           0 :     min = 0;
    4329           0 :     max = (ecode[1] << 8) + ecode[2];
    4330           0 :     minimize = *ecode == OP_MINUPTO;
    4331           0 :     ecode += 3;
    4332           0 :     goto REPEATCHAR;
    4333             : 
    4334           0 :     case OP_STAR:
    4335             :     case OP_MINSTAR:
    4336             :     case OP_PLUS:
    4337             :     case OP_MINPLUS:
    4338             :     case OP_QUERY:
    4339             :     case OP_MINQUERY:
    4340           0 :     c = *ecode++ - OP_STAR;
    4341           0 :     minimize = (c & 1) != 0;
    4342           0 :     min = rep_min[c];                 /* Pick up values from tables; */
    4343           0 :     max = rep_max[c];                 /* zero for max => infinity */
    4344           0 :     if (max == 0) max = INT_MAX;
    4345             : 
    4346             :     /* Common code for all repeated single-character matches. We can give
    4347             :     up quickly if there are fewer than the minimum number of characters left in
    4348             :     the subject. */
    4349             : 
    4350           0 :     REPEATCHAR:
    4351           0 :     if (min > md->end_subject - eptr) return FALSE;
    4352           0 :     c = *ecode++;
    4353             : 
    4354             :     /* The code is duplicated for the caseless and caseful cases, for speed,
    4355             :     since matching characters is likely to be quite common. First, ensure the
    4356             :     minimum number of matches are present. If min = max, continue at the same
    4357             :     level without recursing. Otherwise, if minimizing, keep trying the rest of
    4358             :     the expression and advancing one matching character if failing, up to the
    4359             :     maximum. Alternatively, if maximizing, find the maximum number of
    4360             :     characters and work backwards. */
    4361             : 
    4362             :     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
    4363             :       max, eptr));
    4364             : 
    4365           0 :     if ((ims & PCRE_CASELESS) != 0)
    4366             :       {
    4367           0 :       c = md->lcc[c];
    4368           0 :       for (i = 1; i <= min; i++)
    4369           0 :         if (c != md->lcc[*eptr++]) return FALSE;
    4370           0 :       if (min == max) continue;
    4371           0 :       if (minimize)
    4372             :         {
    4373           0 :         for (i = min;; i++)
    4374             :           {
    4375           0 :           if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
    4376           0 :             return TRUE;
    4377           0 :           if (i >= max || eptr >= md->end_subject ||
    4378           0 :               c != md->lcc[*eptr++])
    4379           0 :             return FALSE;
    4380             :           }
    4381             :         /* Control never gets here */
    4382             :         }
    4383             :       else
    4384             :         {
    4385           0 :         const uschar *pp = eptr;
    4386           0 :         for (i = min; i < max; i++)
    4387             :           {
    4388           0 :           if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
    4389           0 :           eptr++;
    4390             :           }
    4391           0 :         while (eptr >= pp)
    4392           0 :           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
    4393           0 :             return TRUE;
    4394           0 :         return FALSE;
    4395             :         }
    4396             :       /* Control never gets here */
    4397             :       }
    4398             : 
    4399             :     /* Caseful comparisons */
    4400             : 
    4401             :     else
    4402             :       {
    4403           0 :       for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
    4404           0 :       if (min == max) continue;
    4405           0 :       if (minimize)
    4406             :         {
    4407           0 :         for (i = min;; i++)
    4408             :           {
    4409           0 :           if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
    4410           0 :             return TRUE;
    4411           0 :           if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
    4412             :           }
    4413             :         /* Control never gets here */
    4414             :         }
    4415             :       else
    4416             :         {
    4417           0 :         const uschar *pp = eptr;
    4418           0 :         for (i = min; i < max; i++)
    4419             :           {
    4420           0 :           if (eptr >= md->end_subject || c != *eptr) break;
    4421           0 :           eptr++;
    4422             :           }
    4423           0 :         while (eptr >= pp)
    4424           0 :          if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
    4425           0 :            return TRUE;
    4426           0 :         return FALSE;
    4427             :         }
    4428             :       }
    4429             :     /* Control never gets here */
    4430             : 
    4431             :     /* Match a negated single character */
    4432             : 
    4433           0 :     case OP_NOT:
    4434           0 :     if (eptr >= md->end_subject) return FALSE;
    4435           0 :     ecode++;
    4436           0 :     if ((ims & PCRE_CASELESS) != 0)
    4437             :       {
    4438           0 :       if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
    4439             :       }
    4440             :     else
    4441             :       {
    4442           0 :       if (*ecode++ == *eptr++) return FALSE;
    4443             :       }
    4444           0 :     break;
    4445             : 
    4446             :     /* Match a negated single character repeatedly. This is almost a repeat of
    4447             :     the code for a repeated single character, but I haven't found a nice way of
    4448             :     commoning these up that doesn't require a test of the positive/negative
    4449             :     option for each character match. Maybe that wouldn't add very much to the
    4450             :     time taken, but character matching *is* what this is all about... */
    4451             : 
    4452           0 :     case OP_NOTEXACT:
    4453           0 :     min = max = (ecode[1] << 8) + ecode[2];
    4454           0 :     ecode += 3;
    4455           0 :     goto REPEATNOTCHAR;
    4456             : 
    4457           0 :     case OP_NOTUPTO:
    4458             :     case OP_NOTMINUPTO:
    4459           0 :     min = 0;
    4460           0 :     max = (ecode[1] << 8) + ecode[2];
    4461           0 :     minimize = *ecode == OP_NOTMINUPTO;
    4462           0 :     ecode += 3;
    4463           0 :     goto REPEATNOTCHAR;
    4464             : 
    4465           0 :     case OP_NOTSTAR:
    4466             :     case OP_NOTMINSTAR:
    4467             :     case OP_NOTPLUS:
    4468             :     case OP_NOTMINPLUS:
    4469             :     case OP_NOTQUERY:
    4470             :     case OP_NOTMINQUERY:
    4471           0 :     c = *ecode++ - OP_NOTSTAR;
    4472           0 :     minimize = (c & 1) != 0;
    4473           0 :     min = rep_min[c];                 /* Pick up values from tables; */
    4474           0 :     max = rep_max[c];                 /* zero for max => infinity */
    4475           0 :     if (max == 0) max = INT_MAX;
    4476             : 
    4477             :     /* Common code for all repeated single-character matches. We can give
    4478             :     up quickly if there are fewer than the minimum number of characters left in
    4479             :     the subject. */
    4480             : 
    4481           0 :     REPEATNOTCHAR:
    4482           0 :     if (min > md->end_subject - eptr) return FALSE;
    4483           0 :     c = *ecode++;
    4484             : 
    4485             :     /* The code is duplicated for the caseless and caseful cases, for speed,
    4486             :     since matching characters is likely to be quite common. First, ensure the
    4487             :     minimum number of matches are present. If min = max, continue at the same
    4488             :     level without recursing. Otherwise, if minimizing, keep trying the rest of
    4489             :     the expression and advancing one matching character if failing, up to the
    4490             :     maximum. Alternatively, if maximizing, find the maximum number of
    4491             :     characters and work backwards. */
    4492             : 
    4493             :     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
    4494             :       max, eptr));
    4495             : 
    4496           0 :     if ((ims & PCRE_CASELESS) != 0)
    4497             :       {
    4498           0 :       c = md->lcc[c];
    4499           0 :       for (i = 1; i <= min; i++)
    4500           0 :         if (c == md->lcc[*eptr++]) return FALSE;
    4501           0 :       if (min == max) continue;
    4502           0 :       if (minimize)
    4503             :         {
    4504           0 :         for (i = min;; i++)
    4505             :           {
    4506           0 :           if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
    4507           0 :             return TRUE;
    4508           0 :           if (i >= max || eptr >= md->end_subject ||
    4509           0 :               c == md->lcc[*eptr++])
    4510           0 :             return FALSE;
    4511             :           }
    4512             :         /* Control never gets here */
    4513             :         }
    4514             :       else
    4515             :         {
    4516           0 :         const uschar *pp = eptr;
    4517           0 :         for (i = min; i < max; i++)
    4518             :           {
    4519           0 :           if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
    4520           0 :           eptr++;
    4521             :           }
    4522           0 :         while (eptr >= pp)
    4523           0 :           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
    4524           0 :             return TRUE;
    4525           0 :         return FALSE;
    4526             :         }
    4527             :       /* Control never gets here */
    4528             :       }
    4529             : 
    4530             :     /* Caseful comparisons */
    4531             : 
    4532             :     else
    4533             :       {
    4534           0 :       for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
    4535           0 :       if (min == max) continue;
    4536           0 :       if (minimize)
    4537             :         {
    4538           0 :         for (i = min;; i++)
    4539             :           {
    4540           0 :           if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
    4541           0 :             return TRUE;
    4542           0 :           if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
    4543             :           }
    4544             :         /* Control never gets here */
    4545             :         }
    4546             :       else
    4547             :         {
    4548           0 :         const uschar *pp = eptr;
    4549           0 :         for (i = min; i < max; i++)
    4550             :           {
    4551           0 :           if (eptr >= md->end_subject || c == *eptr) break;
    4552           0 :           eptr++;
    4553             :           }
    4554           0 :         while (eptr >= pp)
    4555           0 :          if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
    4556           0 :            return TRUE;
    4557           0 :         return FALSE;
    4558             :         }
    4559             :       }
    4560             :     /* Control never gets here */
    4561             : 
    4562             :     /* Match a single character type repeatedly; several different opcodes
    4563             :     share code. This is very similar to the code for single characters, but we
    4564             :     repeat it in the interests of efficiency. */
    4565             : 
    4566           0 :     case OP_TYPEEXACT:
    4567           0 :     min = max = (ecode[1] << 8) + ecode[2];
    4568           0 :     minimize = TRUE;
    4569           0 :     ecode += 3;
    4570           0 :     goto REPEATTYPE;
    4571             : 
    4572           0 :     case OP_TYPEUPTO:
    4573             :     case OP_TYPEMINUPTO:
    4574           0 :     min = 0;
    4575           0 :     max = (ecode[1] << 8) + ecode[2];
    4576           0 :     minimize = *ecode == OP_TYPEMINUPTO;
    4577           0 :     ecode += 3;
    4578           0 :     goto REPEATTYPE;
    4579             : 
    4580           0 :     case OP_TYPESTAR:
    4581             :     case OP_TYPEMINSTAR:
    4582             :     case OP_TYPEPLUS:
    4583             :     case OP_TYPEMINPLUS:
    4584             :     case OP_TYPEQUERY:
    4585             :     case OP_TYPEMINQUERY:
    4586           0 :     c = *ecode++ - OP_TYPESTAR;
    4587           0 :     minimize = (c & 1) != 0;
    4588           0 :     min = rep_min[c];                 /* Pick up values from tables; */
    4589           0 :     max = rep_max[c];                 /* zero for max => infinity */
    4590           0 :     if (max == 0) max = INT_MAX;
    4591             : 
    4592             :     /* Common code for all repeated single character type matches */
    4593             : 
    4594           0 :     REPEATTYPE:
    4595           0 :     ctype = *ecode++;      /* Code for the character type */
    4596             : 
    4597             :     /* First, ensure the minimum number of matches are present. Use inline
    4598             :     code for maximizing the speed, and do the type test once at the start
    4599             :     (i.e. keep it out of the loop). Also we can test that there are at least
    4600             :     the minimum number of bytes before we start, except when doing '.' in
    4601             :     UTF8 mode. Leave the test in in all cases; in the special case we have
    4602             :     to test after each character. */
    4603             : 
    4604           0 :     if (min > md->end_subject - eptr) return FALSE;
    4605           0 :     if (min > 0) switch(ctype)
    4606             :       {
    4607           0 :       case OP_ANY:
    4608             : #ifdef SUPPORT_UTF8
    4609             :       if (md->utf8)
    4610             :         {
    4611             :         for (i = 1; i <= min; i++)
    4612             :           {
    4613             :           if (eptr >= md->end_subject ||
    4614             :              (*eptr++ == '\n' && (ims & PCRE_DOTALL) == 0))
    4615             :             return FALSE;
    4616             :           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    4617             :           }
    4618             :         break;
    4619             :         }
    4620             : #endif
    4621             :       /* Non-UTF8 can be faster */
    4622           0 :       if ((ims & PCRE_DOTALL) == 0)
    4623           0 :         { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
    4624           0 :       else eptr += min;
    4625           0 :       break;
    4626             : 
    4627           0 :       case OP_NOT_DIGIT:
    4628           0 :       for (i = 1; i <= min; i++)
    4629           0 :         if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
    4630           0 :       break;
    4631             : 
    4632           0 :       case OP_DIGIT:
    4633           0 :       for (i = 1; i <= min; i++)
    4634           0 :         if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
    4635           0 :       break;
    4636             : 
    4637           0 :       case OP_NOT_WHITESPACE:
    4638           0 :       for (i = 1; i <= min; i++)
    4639           0 :         if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
    4640           0 :       break;
    4641             : 
    4642           0 :       case OP_WHITESPACE:
    4643           0 :       for (i = 1; i <= min; i++)
    4644           0 :         if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
    4645           0 :       break;
    4646             : 
    4647           0 :       case OP_NOT_WORDCHAR:
    4648           0 :       for (i = 1; i <= min; i++)
    4649           0 :         if ((md->ctypes[*eptr++] & ctype_word) != 0)
    4650           0 :           return FALSE;
    4651           0 :       break;
    4652             : 
    4653           0 :       case OP_WORDCHAR:
    4654           0 :       for (i = 1; i <= min; i++)
    4655           0 :         if ((md->ctypes[*eptr++] & ctype_word) == 0)
    4656           0 :           return FALSE;
    4657           0 :       break;
    4658             :       }
    4659             : 
    4660             :     /* If min = max, continue at the same level without recursing */
    4661             : 
    4662           0 :     if (min == max) continue;
    4663             : 
    4664             :     /* If minimizing, we have to test the rest of the pattern before each
    4665             :     subsequent match. */
    4666             : 
    4667           0 :     if (minimize)
    4668             :       {
    4669           0 :       for (i = min;; i++)
    4670             :         {
    4671           0 :         if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) return TRUE;
    4672           0 :         if (i >= max || eptr >= md->end_subject) return FALSE;
    4673             : 
    4674           0 :         c = *eptr++;
    4675             :         switch(ctype)
    4676             :           {
    4677           0 :           case OP_ANY:
    4678           0 :           if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
    4679             : #ifdef SUPPORT_UTF8
    4680             :           if (md->utf8)
    4681             :             while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    4682             : #endif
    4683           0 :           break;
    4684             : 
    4685           0 :           case OP_NOT_DIGIT:
    4686           0 :           if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
    4687           0 :           break;
    4688             : 
    4689           0 :           case OP_DIGIT:
    4690           0 :           if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
    4691           0 :           break;
    4692             : 
    4693           0 :           case OP_NOT_WHITESPACE:
    4694           0 :           if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
    4695           0 :           break;
    4696             : 
    4697           0 :           case OP_WHITESPACE:
    4698           0 :           if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
    4699           0 :           break;
    4700             : 
    4701           0 :           case OP_NOT_WORDCHAR:
    4702           0 :           if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
    4703           0 :           break;
    4704             : 
    4705           0 :           case OP_WORDCHAR:
    4706           0 :           if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
    4707           0 :           break;
    4708             :           }
    4709           0 :         }
    4710             :       /* Control never gets here */
    4711             :       }
    4712             : 
    4713             :     /* If maximizing it is worth using inline code for speed, doing the type
    4714             :     test once at the start (i.e. keep it out of the loop). */
    4715             : 
    4716             :     else
    4717             :       {
    4718           0 :       const uschar *pp = eptr;
    4719             :       switch(ctype)
    4720             :         {
    4721           0 :         case OP_ANY:
    4722             : 
    4723             :         /* Special code is required for UTF8, but when the maximum is unlimited
    4724             :         we don't need it. */
    4725             : 
    4726             : #ifdef SUPPORT_UTF8
    4727             :         if (md->utf8 && max < INT_MAX)
    4728             :           {
    4729             :           if ((ims & PCRE_DOTALL) == 0)
    4730             :             {
    4731             :             for (i = min; i < max; i++)
    4732             :               {
    4733             :               if (eptr >= md->end_subject || *eptr++ == '\n') break;
    4734             :               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    4735             :               }
    4736             :             }
    4737             :           else
    4738             :             {
    4739             :             for (i = min; i < max; i++)
    4740             :               {
    4741             :               eptr++;
    4742             :               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    4743             :               }
    4744             :             }
    4745             :           break;
    4746             :           }
    4747             : #endif
    4748             :         /* Non-UTF8 can be faster */
    4749           0 :         if ((ims & PCRE_DOTALL) == 0)
    4750             :           {
    4751           0 :           for (i = min; i < max; i++)
    4752             :             {
    4753           0 :             if (eptr >= md->end_subject || *eptr == '\n') break;
    4754           0 :             eptr++;
    4755             :             }
    4756             :           }
    4757             :         else
    4758             :           {
    4759           0 :           c = max - min;
    4760           0 :           if (c > md->end_subject - eptr) c = md->end_subject - eptr;
    4761           0 :           eptr += c;
    4762             :           }
    4763           0 :         break;
    4764             : 
    4765           0 :         case OP_NOT_DIGIT:
    4766           0 :         for (i = min; i < max; i++)
    4767             :           {
    4768           0 :           if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
    4769             :             break;
    4770           0 :           eptr++;
    4771             :           }
    4772           0 :         break;
    4773             : 
    4774           0 :         case OP_DIGIT:
    4775           0 :         for (i = min; i < max; i++)
    4776             :           {
    4777           0 :           if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
    4778             :             break;
    4779           0 :           eptr++;
    4780             :           }
    4781           0 :         break;
    4782             : 
    4783           0 :         case OP_NOT_WHITESPACE:
    4784           0 :         for (i = min; i < max; i++)
    4785             :           {
    4786           0 :           if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
    4787             :             break;
    4788           0 :           eptr++;
    4789             :           }
    4790           0 :         break;
    4791             : 
    4792           0 :         case OP_WHITESPACE:
    4793           0 :         for (i = min; i < max; i++)
    4794             :           {
    4795           0 :           if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
    4796             :             break;
    4797           0 :           eptr++;
    4798             :           }
    4799           0 :         break;
    4800             : 
    4801           0 :         case OP_NOT_WORDCHAR:
    4802           0 :         for (i = min; i < max; i++)
    4803             :           {
    4804           0 :           if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
    4805             :             break;
    4806           0 :           eptr++;
    4807             :           }
    4808           0 :         break;
    4809             : 
    4810           0 :         case OP_WORDCHAR:
    4811           0 :         for (i = min; i < max; i++)
    4812             :           {
    4813           0 :           if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
    4814             :             break;
    4815           0 :           eptr++;
    4816             :           }
    4817           0 :         break;
    4818             :         }
    4819             : 
    4820           0 :       while (eptr >= pp)
    4821             :         {
    4822           0 :         if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
    4823           0 :           return TRUE;
    4824             : #ifdef SUPPORT_UTF8
    4825             :         if (md->utf8)
    4826             :           while (eptr > pp && (*eptr & 0xc0) == 0x80) eptr--;
    4827             : #endif
    4828             :         }
    4829           0 :       return FALSE;
    4830             :       }
    4831             :     /* Control never gets here */
    4832             : 
    4833             :     /* There's been some horrible disaster. */
    4834             : 
    4835           0 :     default:
    4836             :     DPRINTF(("Unknown opcode %d\n", *ecode));
    4837           0 :     md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
    4838           0 :     return FALSE;
    4839             :     }
    4840             : 
    4841             :   /* Do not stick any code in here without much thought; it is assumed
    4842             :   that "continue" in the code above comes out to here to repeat the main
    4843             :   loop. */
    4844             : 
    4845             :   }             /* End of main loop */
    4846             : /* Control never reaches here */
    4847             : }
    4848             : 
    4849             : 
    4850             : 
    4851             : 
    4852             : /*************************************************
    4853             : *         Execute a Regular Expression           *
    4854             : *************************************************/
    4855             : 
    4856             : /* This function applies a compiled re to a subject string and picks out
    4857             : portions of the string if it matches. Two elements in the vector are set for
    4858             : each substring: the offsets to the start and end of the substring.
    4859             : 
    4860             : Arguments:
    4861             :   external_re     points to the compiled expression
    4862             :   external_extra  points to "hints" from pcre_study() or is NULL
    4863             :   subject         points to the subject string
    4864             :   length          length of subject string (may contain binary zeros)
    4865             :   start_offset    where to start in the subject string
    4866             :   options         option bits
    4867             :   offsets         points to a vector of ints to be filled in with offsets
    4868             :   offsetcount     the number of elements in the vector
    4869             : 
    4870             : Returns:          > 0 => success; value is the number of elements filled in
    4871             :                   = 0 => success, but offsets is not big enough
    4872             :                    -1 => failed to match
    4873             :                  < -1 => some kind of unexpected problem
    4874             : */
    4875             : 
    4876             : __attribute__((no_sanitize("memory"))) __attribute__((no_sanitize_memory)) int
    4877           0 : pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
    4878             :   const char *subject, int length, int start_offset, int options, int *offsets,
    4879             :   int offsetcount)
    4880             : {
    4881             : int resetcount, ocount;
    4882           0 : int first_char = -1;
    4883           0 : int req_char = -1;
    4884           0 : int req_char2 = -1;
    4885           0 : unsigned long int ims = 0;
    4886             : match_data match_block;
    4887           0 : const uschar *start_bits = NULL;
    4888           0 : const uschar *start_match = (const uschar *)subject + start_offset;
    4889             : const uschar *end_subject;
    4890           0 : const uschar *req_char_ptr = start_match - 1;
    4891           0 : const real_pcre *re = (const real_pcre *)external_re;
    4892           0 : const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
    4893           0 : BOOL using_temporary_offsets = FALSE;
    4894           0 : BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
    4895           0 : BOOL startline = (re->options & PCRE_STARTLINE) != 0;
    4896             : 
    4897           0 : if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
    4898             : 
    4899           0 : if (re == NULL || subject == NULL ||
    4900           0 :    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
    4901           0 : if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
    4902             : 
    4903           0 : match_block.start_pattern = re->code;
    4904           0 : match_block.start_subject = (const uschar *)subject;
    4905           0 : match_block.end_subject = match_block.start_subject + length;
    4906           0 : end_subject = match_block.end_subject;
    4907             : 
    4908           0 : match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
    4909           0 : match_block.utf8 = (re->options & PCRE_UTF8) != 0;
    4910             : 
    4911           0 : match_block.notbol = (options & PCRE_NOTBOL) != 0;
    4912           0 : match_block.noteol = (options & PCRE_NOTEOL) != 0;
    4913           0 : match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
    4914             : 
    4915           0 : match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
    4916             : 
    4917           0 : match_block.lcc = re->tables + lcc_offset;
    4918           0 : match_block.ctypes = re->tables + ctypes_offset;
    4919             : 
    4920             : /* The ims options can vary during the matching as a result of the presence
    4921             : of (?ims) items in the pattern. They are kept in a local variable so that
    4922             : restoring at the exit of a group is easy. */
    4923             : 
    4924           0 : ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
    4925             : 
    4926             : /* If the expression has got more back references than the offsets supplied can
    4927             : hold, we get a temporary bit of working store to use during the matching.
    4928             : Otherwise, we can use the vector supplied, rounding down its size to a multiple
    4929             : of 3. */
    4930             : 
    4931           0 : ocount = offsetcount - (offsetcount % 3);
    4932             : 
    4933           0 : if (re->top_backref > 0 && re->top_backref >= ocount/3)
    4934             :   {
    4935           0 :   ocount = re->top_backref * 3 + 3;
    4936           0 :   match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
    4937           0 :   if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
    4938           0 :   using_temporary_offsets = TRUE;
    4939             :   DPRINTF(("Got memory to hold back references\n"));
    4940             :   }
    4941           0 : else match_block.offset_vector = offsets;
    4942             : 
    4943           0 : match_block.offset_end = ocount;
    4944           0 : match_block.offset_max = (2*ocount)/3;
    4945           0 : match_block.offset_overflow = FALSE;
    4946             : 
    4947             : /* Compute the minimum number of offsets that we need to reset each time. Doing
    4948             : this makes a huge difference to execution time when there aren't many brackets
    4949             : in the pattern. */
    4950             : 
    4951           0 : resetcount = 2 + re->top_bracket * 2;
    4952           0 : if (resetcount > offsetcount) resetcount = ocount;
    4953             : 
    4954             : /* Reset the working variable associated with each extraction. These should
    4955             : never be used unless previously set, but they get saved and restored, and so we
    4956             : initialize them to avoid reading uninitialized locations. */
    4957             : 
    4958           0 : if (match_block.offset_vector != NULL)
    4959             :   {
    4960           0 :   register int *iptr = match_block.offset_vector + ocount;
    4961           0 :   register int *iend = iptr - resetcount/2 + 1;
    4962           0 :   while (--iptr >= iend) *iptr = -1;
    4963             :   }
    4964             : 
    4965             : /* Set up the first character to match, if available. The first_char value is
    4966             : never set for an anchored regular expression, but the anchoring may be forced
    4967             : at run time, so we have to test for anchoring. The first char may be unset for
    4968             : an unanchored pattern, of course. If there's no first char and the pattern was
    4969             : studied, there may be a bitmap of possible first characters. */
    4970             : 
    4971           0 : if (!anchored)
    4972             :   {
    4973           0 :   if ((re->options & PCRE_FIRSTSET) != 0)
    4974             :     {
    4975           0 :     first_char = re->first_char;
    4976           0 :     if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
    4977             :     }
    4978             :   else
    4979           0 :     if (!startline && extra != NULL &&
    4980           0 :       (extra->options & PCRE_STUDY_MAPPED) != 0)
    4981           0 :         start_bits = extra->start_bits;
    4982             :   }
    4983             : 
    4984             : /* For anchored or unanchored matches, there may be a "last known required
    4985             : character" set. If the PCRE_CASELESS is set, implying that the match starts
    4986             : caselessly, or if there are any changes of this flag within the regex, set up
    4987             : both cases of the character. Otherwise set the two values the same, which will
    4988             : avoid duplicate testing (which takes significant time). This covers the vast
    4989             : majority of cases. It will be suboptimal when the case flag changes in a regex
    4990             : and the required character in fact is caseful. */
    4991             : 
    4992           0 : if ((re->options & PCRE_REQCHSET) != 0)
    4993             :   {
    4994           0 :   req_char = re->req_char;
    4995           0 :   req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)?
    4996           0 :     (re->tables + fcc_offset)[req_char] : req_char;
    4997             :   }
    4998             : 
    4999             : /* Loop for handling unanchored repeated matching attempts; for anchored regexs
    5000             : the loop runs just once. */
    5001             : 
    5002             : do
    5003             :   {
    5004             :   int rc;
    5005           0 :   register int *iptr = match_block.offset_vector;
    5006           0 :   register int *iend = iptr + resetcount;
    5007             : 
    5008             :   /* Reset the maximum number of extractions we might see. */
    5009             : 
    5010           0 :   while (iptr < iend) *iptr++ = -1;
    5011             : 
    5012             :   /* Advance to a unique first char if possible */
    5013             : 
    5014           0 :   if (first_char >= 0)
    5015             :     {
    5016           0 :     if ((ims & PCRE_CASELESS) != 0)
    5017           0 :       while (start_match < end_subject &&
    5018           0 :              match_block.lcc[*start_match] != first_char)
    5019           0 :         start_match++;
    5020             :     else
    5021           0 :       while (start_match < end_subject && *start_match != first_char)
    5022           0 :         start_match++;
    5023             :     }
    5024             : 
    5025             :   /* Or to just after \n for a multiline match if possible */
    5026             : 
    5027           0 :   else if (startline)
    5028             :     {
    5029           0 :     if (start_match > match_block.start_subject + start_offset)
    5030             :       {
    5031           0 :       while (start_match < end_subject && start_match[-1] != '\n')
    5032           0 :         start_match++;
    5033             :       }
    5034             :     }
    5035             : 
    5036             :   /* Or to a non-unique first char after study */
    5037             : 
    5038           0 :   else if (start_bits != NULL)
    5039             :     {
    5040           0 :     while (start_match < end_subject)
    5041             :       {
    5042           0 :       register int c = *start_match;
    5043           0 :       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
    5044             :       }
    5045             :     }
    5046             : 
    5047             : #ifdef DEBUG  /* Sigh. Some compilers never learn. */
    5048             :   printf(">>>> Match against: ");
    5049             :   pchars(start_match, end_subject - start_match, TRUE, &match_block);
    5050             :   printf("\n");
    5051             : #endif
    5052             : 
    5053             :   /* If req_char is set, we know that that character must appear in the subject
    5054             :   for the match to succeed. If the first character is set, req_char must be
    5055             :   later in the subject; otherwise the test starts at the match point. This
    5056             :   optimization can save a huge amount of backtracking in patterns with nested
    5057             :   unlimited repeats that aren't going to match. We don't know what the state of
    5058             :   case matching may be when this character is hit, so test for it in both its
    5059             :   cases if necessary. However, the different cased versions will not be set up
    5060             :   unless PCRE_CASELESS was given or the casing state changes within the regex.
    5061             :   Writing separate code makes it go faster, as does using an autoincrement and
    5062             :   backing off on a match. */
    5063             : 
    5064           0 :   if (req_char >= 0)
    5065             :     {
    5066           0 :     register const uschar *p = start_match + ((first_char >= 0)? 1 : 0);
    5067             : 
    5068             :     /* We don't need to repeat the search if we haven't yet reached the
    5069             :     place we found it at last time. */
    5070             : 
    5071           0 :     if (p > req_char_ptr)
    5072             :       {
    5073             :       /* Do a single test if no case difference is set up */
    5074             : 
    5075           0 :       if (req_char == req_char2)
    5076             :         {
    5077           0 :         while (p < end_subject)
    5078             :           {
    5079           0 :           if (*p++ == req_char) { p--; break; }
    5080             :           }
    5081             :         }
    5082             : 
    5083             :       /* Otherwise test for either case */
    5084             : 
    5085             :       else
    5086             :         {
    5087           0 :         while (p < end_subject)
    5088             :           {
    5089           0 :           register int pp = *p++;
    5090           0 :           if (pp == req_char || pp == req_char2) { p--; break; }
    5091             :           }
    5092             :         }
    5093             : 
    5094             :       /* If we can't find the required character, break the matching loop */
    5095             : 
    5096           0 :       if (p >= end_subject) break;
    5097             : 
    5098             :       /* If we have found the required character, save the point where we
    5099             :       found it, so that we don't search again next time round the loop if
    5100             :       the start hasn't passed this character yet. */
    5101             : 
    5102           0 :       req_char_ptr = p;
    5103             :       }
    5104             :     }
    5105             : 
    5106             :   /* When a match occurs, substrings will be set for all internal extractions;
    5107             :   we just need to set up the whole thing as substring 0 before returning. If
    5108             :   there were too many extractions, set the return code to zero. In the case
    5109             :   where we had to get some local store to hold offsets for backreferences, copy
    5110             :   those back references that we can. In this case there need not be overflow
    5111             :   if certain parts of the pattern were not used. */
    5112             : 
    5113           0 :   match_block.start_match = start_match;
    5114           0 :   if (!match(start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
    5115           0 :     continue;
    5116             : 
    5117             :   /* Copy the offset information from temporary store if necessary */
    5118             : 
    5119           0 :   if (using_temporary_offsets)
    5120             :     {
    5121           0 :     if (offsetcount >= 4)
    5122             :       {
    5123           0 :       memcpy(offsets + 2, match_block.offset_vector + 2,
    5124           0 :         (offsetcount - 2) * sizeof(int));
    5125             :       DPRINTF(("Copied offsets from temporary memory\n"));
    5126             :       }
    5127           0 :     if (match_block.end_offset_top > offsetcount)
    5128           0 :       match_block.offset_overflow = TRUE;
    5129             : 
    5130             :     DPRINTF(("Freeing temporary memory\n"));
    5131           0 :     (pcre_free)(match_block.offset_vector);
    5132             :     }
    5133             : 
    5134           0 :   rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
    5135             : 
    5136           0 :   if (match_block.offset_end < 2) rc = 0; else
    5137             :     {
    5138           0 :     offsets[0] = start_match - match_block.start_subject;
    5139           0 :     offsets[1] = match_block.end_match_ptr - match_block.start_subject;
    5140             :     }
    5141             : 
    5142             :   DPRINTF((">>>> returning %d\n", rc));
    5143           0 :   return rc;
    5144             :   }
    5145             : 
    5146             : /* This "while" is the end of the "do" above */
    5147             : 
    5148           0 : while (!anchored &&
    5149           0 :        match_block.errorcode == PCRE_ERROR_NOMATCH &&
    5150           0 :        start_match++ < end_subject);
    5151             : 
    5152           0 : if (using_temporary_offsets)
    5153             :   {
    5154             :   DPRINTF(("Freeing temporary memory\n"));
    5155           0 :   (pcre_free)(match_block.offset_vector);
    5156             :   }
    5157             : 
    5158             : DPRINTF((">>>> returning %d\n", match_block.errorcode));
    5159             : 
    5160           0 : return match_block.errorcode;
    5161             : }
    5162             : 
    5163             : /* End of pcre.c */

Generated by: LCOV version 1.14