LCOV - code coverage report
Current view: top level - fuzz/pcre - study.c (source / functions) Hit Total Coverage
Test: trace.lcov_info_final Lines: 0 158 0.0 %
Date: 2021-02-22 04:51:02 Functions: 0 3 0.0 %

          Line data    Source code
       1             : /*************************************************
       2             : *      Perl-Compatible Regular Expressions       *
       3             : *************************************************/
       4             : 
       5             : /*
       6             : This is a library of functions to support regular expressions whose syntax
       7             : and semantics are as close as possible to those of the Perl 5 language. See
       8             : the file Tech.Notes for some information on the internals.
       9             : 
      10             : Written by: Philip Hazel <ph10@cam.ac.uk>
      11             : 
      12             :            Copyright (c) 1997-2000 University of Cambridge
      13             : 
      14             : -----------------------------------------------------------------------------
      15             : Permission is granted to anyone to use this software for any purpose on any
      16             : computer system, and to redistribute it freely, subject to the following
      17             : restrictions:
      18             : 
      19             : 1. This software is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
      22             : 
      23             : 2. The origin of this software must not be misrepresented, either by
      24             :    explicit claim or by omission.
      25             : 
      26             : 3. Altered versions must be plainly marked as such, and must not be
      27             :    misrepresented as being the original software.
      28             : 
      29             : 4. If PCRE is embedded in any software that is released under the GNU
      30             :    General Purpose Licence (GPL), then the terms of that licence shall
      31             :    supersede any condition above with which it is incompatible.
      32             : -----------------------------------------------------------------------------
      33             : */
      34             : 
      35             : 
      36             : /* Include the internals header, which itself includes Standard C headers plus
      37             : the external pcre header. */
      38             : 
      39             : #include "internal.h"
      40             : 
      41             : 
      42             : 
      43             : /*************************************************
      44             : *      Set a bit and maybe its alternate case    *
      45             : *************************************************/
      46             : 
      47             : /* Given a character, set its bit in the table, and also the bit for the other
      48             : version of a letter if we are caseless.
      49             : 
      50             : Arguments:
      51             :   start_bits    points to the bit map
      52             :   c             is the character
      53             :   caseless      the caseless flag
      54             :   cd            the block with char table pointers
      55             : 
      56             : Returns:        nothing
      57             : */
      58             : 
      59             : static void
      60           0 : set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
      61             : {
      62           0 : start_bits[c/8] |= (1 << (c&7));
      63           0 : if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
      64           0 :   start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
      65           0 : }
      66             : 
      67             : 
      68             : 
      69             : /*************************************************
      70             : *          Create bitmap of starting chars       *
      71             : *************************************************/
      72             : 
      73             : /* This function scans a compiled unanchored expression and attempts to build a
      74             : bitmap of the set of initial characters. If it can't, it returns FALSE. As time
      75             : goes by, we may be able to get more clever at doing this.
      76             : 
      77             : Arguments:
      78             :   code         points to an expression
      79             :   start_bits   points to a 32-byte table, initialized to 0
      80             :   caseless     the current state of the caseless flag
      81             :   cd           the block with char table pointers
      82             : 
      83             : Returns:       TRUE if table built, FALSE otherwise
      84             : */
      85             : 
      86             : static BOOL
      87           0 : set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
      88             :   compile_data *cd)
      89             : {
      90             : register int c;
      91             : 
      92             : /* This next statement and the later reference to dummy are here in order to
      93             : trick the optimizer of the IBM C compiler for OS/2 into generating correct
      94             : code. Apparently IBM isn't going to fix the problem, and we would rather not
      95             : disable optimization (in this module it actually makes a big difference, and
      96             : the pcre module can use all the optimization it can get). */
      97             : 
      98             : volatile int dummy;
      99             : 
     100             : do
     101             :   {
     102           0 :   const uschar *tcode = code + 3;
     103           0 :   BOOL try_next = TRUE;
     104             : 
     105           0 :   while (try_next)
     106             :     {
     107           0 :     try_next = FALSE;
     108             : 
     109             :     /* If a branch starts with a bracket or a positive lookahead assertion,
     110             :     recurse to set bits from within them. That's all for this branch. */
     111             : 
     112           0 :     if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
     113             :       {
     114           0 :       if (!set_start_bits(tcode, start_bits, caseless, cd))
     115           0 :         return FALSE;
     116             :       }
     117             : 
     118           0 :     else switch(*tcode)
     119             :       {
     120           0 :       default:
     121           0 :       return FALSE;
     122             : 
     123             :       /* Skip over lookbehind and negative lookahead assertions */
     124             : 
     125           0 :       case OP_ASSERT_NOT:
     126             :       case OP_ASSERTBACK:
     127             :       case OP_ASSERTBACK_NOT:
     128           0 :       try_next = TRUE;
     129           0 :       do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
     130           0 :       tcode += 3;
     131           0 :       break;
     132             : 
     133             :       /* Skip over an option setting, changing the caseless flag */
     134             : 
     135           0 :       case OP_OPT:
     136           0 :       caseless = (tcode[1] & PCRE_CASELESS) != 0;
     137           0 :       tcode += 2;
     138           0 :       try_next = TRUE;
     139           0 :       break;
     140             : 
     141             :       /* BRAZERO does the bracket, but carries on. */
     142             : 
     143           0 :       case OP_BRAZERO:
     144             :       case OP_BRAMINZERO:
     145           0 :       if (!set_start_bits(++tcode, start_bits, caseless, cd))
     146           0 :         return FALSE;
     147           0 :       dummy = 1;
     148           0 :       do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
     149           0 :       tcode += 3;
     150           0 :       try_next = TRUE;
     151           0 :       break;
     152             : 
     153             :       /* Single-char * or ? sets the bit and tries the next item */
     154             : 
     155           0 :       case OP_STAR:
     156             :       case OP_MINSTAR:
     157             :       case OP_QUERY:
     158             :       case OP_MINQUERY:
     159           0 :       set_bit(start_bits, tcode[1], caseless, cd);
     160           0 :       tcode += 2;
     161           0 :       try_next = TRUE;
     162           0 :       break;
     163             : 
     164             :       /* Single-char upto sets the bit and tries the next */
     165             : 
     166           0 :       case OP_UPTO:
     167             :       case OP_MINUPTO:
     168           0 :       set_bit(start_bits, tcode[3], caseless, cd);
     169           0 :       tcode += 4;
     170           0 :       try_next = TRUE;
     171           0 :       break;
     172             : 
     173             :       /* At least one single char sets the bit and stops */
     174             : 
     175           0 :       case OP_EXACT:       /* Fall through */
     176           0 :       tcode++;
     177             : 
     178           0 :       case OP_CHARS:       /* Fall through */
     179           0 :       tcode++;
     180             : 
     181           0 :       case OP_PLUS:
     182             :       case OP_MINPLUS:
     183           0 :       set_bit(start_bits, tcode[1], caseless, cd);
     184           0 :       break;
     185             : 
     186             :       /* Single character type sets the bits and stops */
     187             : 
     188           0 :       case OP_NOT_DIGIT:
     189           0 :       for (c = 0; c < 32; c++)
     190           0 :         start_bits[c] |= ~cd->cbits[c+cbit_digit];
     191           0 :       break;
     192             : 
     193           0 :       case OP_DIGIT:
     194           0 :       for (c = 0; c < 32; c++)
     195           0 :         start_bits[c] |= cd->cbits[c+cbit_digit];
     196           0 :       break;
     197             : 
     198           0 :       case OP_NOT_WHITESPACE:
     199           0 :       for (c = 0; c < 32; c++)
     200           0 :         start_bits[c] |= ~cd->cbits[c+cbit_space];
     201           0 :       break;
     202             : 
     203           0 :       case OP_WHITESPACE:
     204           0 :       for (c = 0; c < 32; c++)
     205           0 :         start_bits[c] |= cd->cbits[c+cbit_space];
     206           0 :       break;
     207             : 
     208           0 :       case OP_NOT_WORDCHAR:
     209           0 :       for (c = 0; c < 32; c++)
     210           0 :         start_bits[c] |= ~cd->cbits[c+cbit_word];
     211           0 :       break;
     212             : 
     213           0 :       case OP_WORDCHAR:
     214           0 :       for (c = 0; c < 32; c++)
     215           0 :         start_bits[c] |= cd->cbits[c+cbit_word];
     216           0 :       break;
     217             : 
     218             :       /* One or more character type fudges the pointer and restarts, knowing
     219             :       it will hit a single character type and stop there. */
     220             : 
     221           0 :       case OP_TYPEPLUS:
     222             :       case OP_TYPEMINPLUS:
     223           0 :       tcode++;
     224           0 :       try_next = TRUE;
     225           0 :       break;
     226             : 
     227           0 :       case OP_TYPEEXACT:
     228           0 :       tcode += 3;
     229           0 :       try_next = TRUE;
     230           0 :       break;
     231             : 
     232             :       /* Zero or more repeats of character types set the bits and then
     233             :       try again. */
     234             : 
     235           0 :       case OP_TYPEUPTO:
     236             :       case OP_TYPEMINUPTO:
     237           0 :       tcode += 2;               /* Fall through */
     238             : 
     239           0 :       case OP_TYPESTAR:
     240             :       case OP_TYPEMINSTAR:
     241             :       case OP_TYPEQUERY:
     242             :       case OP_TYPEMINQUERY:
     243           0 :       switch(tcode[1])
     244             :         {
     245           0 :         case OP_NOT_DIGIT:
     246           0 :         for (c = 0; c < 32; c++)
     247           0 :           start_bits[c] |= ~cd->cbits[c+cbit_digit];
     248           0 :         break;
     249             : 
     250           0 :         case OP_DIGIT:
     251           0 :         for (c = 0; c < 32; c++)
     252           0 :           start_bits[c] |= cd->cbits[c+cbit_digit];
     253           0 :         break;
     254             : 
     255           0 :         case OP_NOT_WHITESPACE:
     256           0 :         for (c = 0; c < 32; c++)
     257           0 :           start_bits[c] |= ~cd->cbits[c+cbit_space];
     258           0 :         break;
     259             : 
     260           0 :         case OP_WHITESPACE:
     261           0 :         for (c = 0; c < 32; c++)
     262           0 :           start_bits[c] |= cd->cbits[c+cbit_space];
     263           0 :         break;
     264             : 
     265           0 :         case OP_NOT_WORDCHAR:
     266           0 :         for (c = 0; c < 32; c++)
     267           0 :           start_bits[c] |= ~cd->cbits[c+cbit_word];
     268           0 :         break;
     269             : 
     270           0 :         case OP_WORDCHAR:
     271           0 :         for (c = 0; c < 32; c++)
     272           0 :           start_bits[c] |= cd->cbits[c+cbit_word];
     273           0 :         break;
     274             :         }
     275             : 
     276           0 :       tcode += 2;
     277           0 :       try_next = TRUE;
     278           0 :       break;
     279             : 
     280             :       /* Character class: set the bits and either carry on or not,
     281             :       according to the repeat count. */
     282             : 
     283           0 :       case OP_CLASS:
     284             :         {
     285           0 :         tcode++;
     286           0 :         for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
     287           0 :         tcode += 32;
     288           0 :         switch (*tcode)
     289             :           {
     290           0 :           case OP_CRSTAR:
     291             :           case OP_CRMINSTAR:
     292             :           case OP_CRQUERY:
     293             :           case OP_CRMINQUERY:
     294           0 :           tcode++;
     295           0 :           try_next = TRUE;
     296           0 :           break;
     297             : 
     298           0 :           case OP_CRRANGE:
     299             :           case OP_CRMINRANGE:
     300           0 :           if (((tcode[1] << 8) + tcode[2]) == 0)
     301             :             {
     302           0 :             tcode += 5;
     303           0 :             try_next = TRUE;
     304             :             }
     305           0 :           break;
     306             :           }
     307           0 :         }
     308           0 :       break; /* End of class handling */
     309             : 
     310             :       }      /* End of switch */
     311             :     }        /* End of try_next loop */
     312             : 
     313           0 :   code += (code[1] << 8) + code[2];   /* Advance to next branch */
     314             :   }
     315           0 : while (*code == OP_ALT);
     316           0 : return TRUE;
     317             : }
     318             : 
     319             : 
     320             : 
     321             : /*************************************************
     322             : *          Study a compiled expression           *
     323             : *************************************************/
     324             : 
     325             : /* This function is handed a compiled expression that it must study to produce
     326             : information that will speed up the matching. It returns a pcre_extra block
     327             : which then gets handed back to pcre_exec().
     328             : 
     329             : Arguments:
     330             :   re        points to the compiled expression
     331             :   options   contains option bits
     332             :   errorptr  points to where to place error messages;
     333             :             set NULL unless error
     334             : 
     335             : Returns:    pointer to a pcre_extra block,
     336             :             NULL on error or if no optimization possible
     337             : */
     338             : 
     339             : pcre_extra *
     340           0 : pcre_study(const pcre *external_re, int options, const char **errorptr)
     341             : {
     342             : uschar start_bits[32];
     343             : real_pcre_extra *extra;
     344           0 : const real_pcre *re = (const real_pcre *)external_re;
     345             : compile_data compile_block;
     346             : 
     347           0 : *errorptr = NULL;
     348             : 
     349           0 : if (re == NULL || re->magic_number != MAGIC_NUMBER)
     350             :   {
     351           0 :   *errorptr = "argument is not a compiled regular expression";
     352           0 :   return NULL;
     353             :   }
     354             : 
     355           0 : if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
     356             :   {
     357           0 :   *errorptr = "unknown or incorrect option bit(s) set";
     358           0 :   return NULL;
     359             :   }
     360             : 
     361             : /* For an anchored pattern, or an unchored pattern that has a first char, or a
     362             : multiline pattern that matches only at "line starts", no further processing at
     363             : present. */
     364             : 
     365           0 : if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
     366           0 :   return NULL;
     367             : 
     368             : /* Set the character tables in the block which is passed around */
     369             : 
     370           0 : compile_block.lcc = re->tables + lcc_offset;
     371           0 : compile_block.fcc = re->tables + fcc_offset;
     372           0 : compile_block.cbits = re->tables + cbits_offset;
     373           0 : compile_block.ctypes = re->tables + ctypes_offset;
     374             : 
     375             : /* See if we can find a fixed set of initial characters for the pattern. */
     376             : 
     377           0 : memset(start_bits, 0, 32 * sizeof(uschar));
     378           0 : if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
     379           0 :   &compile_block)) return NULL;
     380             : 
     381             : /* Get an "extra" block and put the information therein. */
     382             : 
     383           0 : extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra));
     384             : 
     385           0 : if (extra == NULL)
     386             :   {
     387           0 :   *errorptr = "failed to get memory";
     388           0 :   return NULL;
     389             :   }
     390             : 
     391           0 : extra->options = PCRE_STUDY_MAPPED;
     392           0 : memcpy(extra->start_bits, start_bits, sizeof(start_bits));
     393             : 
     394           0 : return (pcre_extra *)extra;
     395             : }
     396             : 
     397             : /* End of study.c */

Generated by: LCOV version 1.14