LCOV - code coverage report
Current view: top level - fuzz - urlmatch.c (source / functions) Hit Total Coverage
Test: trace.lcov_info_final Lines: 311 378 82.3 %
Date: 2021-02-22 04:51:02 Functions: 20 22 90.9 %

          Line data    Source code
       1             : /*********************************************************************
       2             :  *
       3             :  * File        :  $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
       4             :  *
       5             :  * Purpose     :  Declares functions to match URLs against URL
       6             :  *                patterns.
       7             :  *
       8             :  * Copyright   :  Written by and Copyright (C) 2001-2020
       9             :  *                the Privoxy team. https://www.privoxy.org/
      10             :  *
      11             :  *                Based on the Internet Junkbuster originally written
      12             :  *                by and Copyright (C) 1997 Anonymous Coders and
      13             :  *                Junkbusters Corporation.  http://www.junkbusters.com
      14             :  *
      15             :  *                This program is free software; you can redistribute it
      16             :  *                and/or modify it under the terms of the GNU General
      17             :  *                Public License as published by the Free Software
      18             :  *                Foundation; either version 2 of the License, or (at
      19             :  *                your option) any later version.
      20             :  *
      21             :  *                This program is distributed in the hope that it will
      22             :  *                be useful, but WITHOUT ANY WARRANTY; without even the
      23             :  *                implied warranty of MERCHANTABILITY or FITNESS FOR A
      24             :  *                PARTICULAR PURPOSE.  See the GNU General Public
      25             :  *                License for more details.
      26             :  *
      27             :  *                The GNU General Public License should be included with
      28             :  *                this file.  If not, you can view it at
      29             :  *                http://www.gnu.org/copyleft/gpl.html
      30             :  *                or write to the Free Software Foundation, Inc., 59
      31             :  *                Temple Place - Suite 330, Boston, MA  02111-1307, USA.
      32             :  *
      33             :  *********************************************************************/
      34             : 
      35             : 
      36             : #include "config.h"
      37             : 
      38             : #ifndef _WIN32
      39             : #include <stdio.h>
      40             : #include <sys/types.h>
      41             : #endif
      42             : 
      43             : #include <stdlib.h>
      44             : #include <ctype.h>
      45             : #include <assert.h>
      46             : #include <string.h>
      47             : 
      48             : #if !defined(_WIN32)
      49             : #include <unistd.h>
      50             : #endif
      51             : 
      52             : #include "project.h"
      53             : #include "urlmatch.h"
      54             : #include "ssplit.h"
      55             : #include "miscutil.h"
      56             : #include "errlog.h"
      57             : 
      58             : enum regex_anchoring
      59             : {
      60             :    NO_ANCHORING,
      61             :    LEFT_ANCHORED,
      62             :    RIGHT_ANCHORED,
      63             :    RIGHT_ANCHORED_HOST
      64             : };
      65             : static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern);
      66             : #ifdef FEATURE_PCRE_HOST_PATTERNS
      67             : static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern);
      68             : #endif
      69             : 
      70             : /*********************************************************************
      71             :  *
      72             :  * Function    :  free_http_request
      73             :  *
      74             :  * Description :  Freez a http_request structure
      75             :  *
      76             :  * Parameters  :
      77             :  *          1  :  http = points to a http_request structure to free
      78             :  *
      79             :  * Returns     :  N/A
      80             :  *
      81             :  *********************************************************************/
      82       35411 : void free_http_request(struct http_request *http)
      83             : {
      84       35411 :    assert(http);
      85             : 
      86       35411 :    freez(http->cmd);
      87       35411 :    freez(http->ocmd);
      88       35411 :    freez(http->gpc);
      89       35411 :    freez(http->host);
      90       35411 :    freez(http->url);
      91       35411 :    freez(http->hostport);
      92       35411 :    freez(http->path);
      93       35411 :    freez(http->version);
      94       35411 :    freez(http->host_ip_addr_str);
      95       35411 :    freez(http->dbuffer);
      96       35411 :    freez(http->dvec);
      97       35411 :    http->dcount = 0;
      98       35411 : }
      99             : 
     100             : 
     101             : /*********************************************************************
     102             :  *
     103             :  * Function    :  init_domain_components
     104             :  *
     105             :  * Description :  Splits the domain name so we can compare it
     106             :  *                against wildcards. It used to be part of
     107             :  *                parse_http_url, but was separated because the
     108             :  *                same code is required in chat in case of
     109             :  *                intercepted requests.
     110             :  *
     111             :  * Parameters  :
     112             :  *          1  :  http = pointer to the http structure to hold elements.
     113             :  *
     114             :  * Returns     :  JB_ERR_OK on success
     115             :  *                JB_ERR_PARSE on malformed command/URL
     116             :  *                             or >100 domains deep.
     117             :  *
     118             :  *********************************************************************/
     119       35120 : jb_err init_domain_components(struct http_request *http)
     120             : {
     121             :    char *vec[BUFFER_SIZE];
     122             :    size_t size;
     123             :    char *p;
     124             : 
     125       35120 :    http->dbuffer = strdup_or_die(http->host);
     126             : 
     127             :    /* map to lower case */
     128      529977 :    for (p = http->dbuffer; *p ; p++)
     129             :    {
     130      494857 :       *p = (char)privoxy_tolower(*p);
     131             :    }
     132             : 
     133             :    /* split the domain name into components */
     134       35120 :    http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
     135             : 
     136       35120 :    if (http->dcount <= 0)
     137             :    {
     138             :       /*
     139             :        * Error: More than SZ(vec) components in domain
     140             :        *    or: no components in domain
     141             :        */
     142         633 :       log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
     143         633 :       return JB_ERR_PARSE;
     144             :    }
     145             : 
     146             :    /* save a copy of the pointers in dvec */
     147       34487 :    size = (size_t)http->dcount * sizeof(*http->dvec);
     148             : 
     149       34487 :    http->dvec = malloc_or_die(size);
     150             : 
     151       34487 :    memcpy(http->dvec, vec, size);
     152             : 
     153       34487 :    return JB_ERR_OK;
     154             : }
     155             : 
     156             : 
     157             : /*********************************************************************
     158             :  *
     159             :  * Function    :  url_requires_percent_encoding
     160             :  *
     161             :  * Description :  Checks if an URL contains invalid characters
     162             :  *                according to RFC 3986 that should be percent-encoded.
     163             :  *                Does not verify whether or not the passed string
     164             :  *                actually is a valid URL.
     165             :  *
     166             :  * Parameters  :
     167             :  *          1  :  url = URL to check
     168             :  *
     169             :  * Returns     :  True in case of valid URLs, false otherwise
     170             :  *
     171             :  *********************************************************************/
     172        1186 : int url_requires_percent_encoding(const char *url)
     173             : {
     174             :    static const char allowed_characters[128] = {
     175             :       '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
     176             :       '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
     177             :       '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
     178             :       '\0', '\0', '\0', '!',  '\0', '#',  '$',  '%',  '&',  '\'',
     179             :       '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',  '0',  '1',
     180             :       '2',  '3',  '4',  '5',  '6',  '7',  '8',  '9',  ':',  ';',
     181             :       '\0', '=',  '\0', '?',  '@',  'A',  'B',  'C',  'D',  'E',
     182             :       'F',  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
     183             :       'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',  'X',  'Y',
     184             :       'Z',  '[',  '\0', ']',  '\0', '_',  '\0', 'a',  'b',  'c',
     185             :       'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',
     186             :       'n',  'o',  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
     187             :       'x',  'y',  'z',  '\0', '\0', '\0', '~',  '\0'
     188             :    };
     189             : 
     190       40554 :    while (*url != '\0')
     191             :    {
     192       39682 :       const unsigned int i = (unsigned char)*url++;
     193       39682 :       if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
     194             :       {
     195         314 :          return TRUE;
     196             :       }
     197             :    }
     198             : 
     199         872 :    return FALSE;
     200             : 
     201             : }
     202             : 
     203             : 
     204             : /*********************************************************************
     205             :  *
     206             :  * Function    :  parse_http_url
     207             :  *
     208             :  * Description :  Parse out the host and port from the URL.  Find the
     209             :  *                hostname & path, port (if ':'), and/or password (if '@')
     210             :  *
     211             :  * Parameters  :
     212             :  *          1  :  url = URL (or is it URI?) to break down
     213             :  *          2  :  http = pointer to the http structure to hold elements.
     214             :  *                       Must be initialized with valid values (like NULLs).
     215             :  *          3  :  require_protocol = Whether or not URLs without
     216             :  *                                   protocol are acceptable.
     217             :  *
     218             :  * Returns     :  JB_ERR_OK on success
     219             :  *                JB_ERR_PARSE on malformed command/URL
     220             :  *                             or >100 domains deep.
     221             :  *
     222             :  *********************************************************************/
     223       35290 : jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
     224             : {
     225       35290 :    int host_available = 1; /* A proxy can dream. */
     226             : 
     227             :    /*
     228             :     * Save our initial URL
     229             :     */
     230       35290 :    http->url = strdup_or_die(url);
     231             : 
     232             :    /*
     233             :     * Check for * URI. If found, we're done.
     234             :     */
     235       35290 :    if (*http->url == '*')
     236             :    {
     237          41 :       http->path = strdup_or_die("*");
     238          41 :       http->hostport = strdup_or_die("");
     239          41 :       if (http->url[1] != '\0')
     240             :       {
     241           1 :          return JB_ERR_PARSE;
     242             :       }
     243          40 :       return JB_ERR_OK;
     244             :    }
     245             : 
     246             : 
     247             :    /*
     248             :     * Split URL into protocol,hostport,path.
     249             :     */
     250             :    {
     251             :       char *buf;
     252             :       char *url_noproto;
     253             :       char *url_path;
     254             : 
     255       35249 :       buf = strdup_or_die(url);
     256             : 
     257             :       /* Find the start of the URL in our scratch space */
     258       35249 :       url_noproto = buf;
     259       35249 :       if (strncmpic(url_noproto, "http://",  7) == 0)
     260             :       {
     261       17246 :          url_noproto += 7;
     262             :       }
     263       18003 :       else if (strncmpic(url_noproto, "https://", 8) == 0)
     264             :       {
     265             :          /*
     266             :           * Should only happen when called from cgi_show_url_info()
     267             :           * or when the request was https-inspected and the request
     268             :           * line got rewritten.
     269             :           */
     270         117 :          url_noproto += 8;
     271         117 :          http->ssl = 1;
     272             :       }
     273       17886 :       else if (*url_noproto == '/')
     274             :       {
     275             :         /*
     276             :          * Short request line without protocol and host.
     277             :          * Most likely because the client's request
     278             :          * was intercepted and redirected into Privoxy.
     279             :          */
     280       17263 :          http->host = NULL;
     281       17263 :          host_available = 0;
     282             :       }
     283         623 :       else if (require_protocol)
     284             :       {
     285          77 :          freez(buf);
     286          77 :          return JB_ERR_PARSE;
     287             :       }
     288             : 
     289       35172 :       url_path = strchr(url_noproto, '/');
     290       35172 :       if (url_path != NULL)
     291             :       {
     292             :          /*
     293             :           * Got a path.
     294             :           *
     295             :           * If FEATURE_HTTPS_INSPECTION isn't available, ignore the
     296             :           * path for https URLs so that we get consistent behaviour
     297             :           * if a https URL is parsed. When the URL is actually
     298             :           * retrieved, https hides the path part.
     299             :           */
     300       33484 :          http->path = strdup_or_die(
     301             : #ifndef FEATURE_HTTPS_INSPECTION
     302             :             http->ssl ? "/" :
     303             : #endif
     304             :             url_path
     305             :          );
     306       33484 :          *url_path = '\0';
     307       33484 :          http->hostport = string_tolower(url_noproto);
     308             :       }
     309             :       else
     310             :       {
     311             :          /*
     312             :           * Repair broken HTTP requests that don't contain a path,
     313             :           * or CONNECT requests
     314             :           */
     315        1688 :          http->path = strdup_or_die("/");
     316        1688 :          http->hostport = string_tolower(url_noproto);
     317             :       }
     318             : 
     319       35172 :       freez(buf);
     320             : 
     321       35172 :       if (http->hostport == NULL)
     322             :       {
     323           0 :          return JB_ERR_PARSE;
     324             :       }
     325             :    }
     326             : 
     327       35172 :    if (!host_available)
     328             :    {
     329             :       /* Without host, there is nothing left to do here */
     330       17263 :       return JB_ERR_OK;
     331             :    }
     332             : 
     333             :    /*
     334             :     * Split hostport into user/password (ignored), host, port.
     335             :     */
     336             :    {
     337             :       char *buf;
     338             :       char *host;
     339             :       char *port;
     340             : 
     341       17909 :       buf = strdup_or_die(http->hostport);
     342             : 
     343             :       /* check if url contains username and/or password */
     344       17909 :       host = strchr(buf, '@');
     345       17909 :       if (host != NULL)
     346             :       {
     347             :          /* Contains username/password, skip it and the @ sign. */
     348           4 :          host++;
     349             :       }
     350             :       else
     351             :       {
     352             :          /* No username or password. */
     353       17905 :          host = buf;
     354             :       }
     355             : 
     356             :       /* Move after hostname before port number */
     357       17909 :       if (*host == '[')
     358             :       {
     359             :          /* Numeric IPv6 address delimited by brackets */
     360           4 :          host++;
     361           4 :          port = strchr(host, ']');
     362             : 
     363           4 :          if (port == NULL)
     364             :          {
     365             :             /* Missing closing bracket */
     366           1 :             freez(buf);
     367           1 :             return JB_ERR_PARSE;
     368             :          }
     369             : 
     370           3 :          *port++ = '\0';
     371             : 
     372           3 :          if (*port == '\0')
     373             :          {
     374           1 :             port = NULL;
     375             :          }
     376           2 :          else if (*port != ':')
     377             :          {
     378             :             /* Garbage after closing bracket */
     379           1 :             freez(buf);
     380           1 :             return JB_ERR_PARSE;
     381             :          }
     382             :       }
     383             :       else
     384             :       {
     385             :          /* Plain non-escaped hostname */
     386       17905 :          port = strchr(host, ':');
     387             :       }
     388             : 
     389             :       /* check if url contains port */
     390       17907 :       if (port != NULL)
     391             :       {
     392             :          /* Contains port */
     393             :          char *endptr;
     394             :          long parsed_port;
     395             :          /* Terminate hostname and point to start of port string */
     396         871 :          *port++ = '\0';
     397         871 :          parsed_port = strtol(port, &endptr, 10);
     398         871 :          if ((parsed_port <= 0) || (parsed_port > 65535) || (*endptr != '\0'))
     399             :          {
     400          86 :             log_error(LOG_LEVEL_ERROR, "Invalid port in URL: %s.", url);
     401          86 :             freez(buf);
     402          86 :             return JB_ERR_PARSE;
     403             :          }
     404         785 :          http->port = (int)parsed_port;
     405             :       }
     406             :       else
     407             :       {
     408             :          /* No port specified. */
     409       17036 :          http->port = (http->ssl ? 443 : 80);
     410             :       }
     411             : 
     412       17821 :       http->host = strdup_or_die(host);
     413             : 
     414       17821 :       freez(buf);
     415             :    }
     416             : 
     417             :    /* Split domain name so we can compare it against wildcards */
     418       17821 :    return init_domain_components(http);
     419             : 
     420             : }
     421             : 
     422             : 
     423             : /*********************************************************************
     424             :  *
     425             :  * Function    :  unknown_method
     426             :  *
     427             :  * Description :  Checks whether a method is unknown.
     428             :  *
     429             :  * Parameters  :
     430             :  *          1  :  method = points to a http method
     431             :  *
     432             :  * Returns     :  TRUE if it's unknown, FALSE otherwise.
     433             :  *
     434             :  *********************************************************************/
     435       34108 : static int unknown_method(const char *method)
     436             : {
     437             :    static const char * const known_http_methods[] = {
     438             :       /* Basic HTTP request type */
     439             :       "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
     440             :       /* webDAV extensions (RFC2518) */
     441             :       "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
     442             :       /*
     443             :        * Microsoft webDAV extension for Exchange 2000.  See:
     444             :        * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
     445             :        * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
     446             :        */
     447             :       "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
     448             :       /*
     449             :        * Another Microsoft webDAV extension for Exchange 2000.  See:
     450             :        * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
     451             :        * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
     452             :        * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
     453             :        */
     454             :       "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
     455             :       /*
     456             :        * Yet another WebDAV extension, this time for
     457             :        * Web Distributed Authoring and Versioning (RFC3253)
     458             :        */
     459             :       "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
     460             :       "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
     461             :       /*
     462             :        * The PATCH method is defined by RFC5789, the format of the
     463             :        * actual patch in the body depends on the application, but from
     464             :        * Privoxy's point of view it doesn't matter.
     465             :        */
     466             :       "PATCH",
     467             :    };
     468             :    int i;
     469             : 
     470       83054 :    for (i = 0; i < SZ(known_http_methods); i++)
     471             :    {
     472       82895 :       if (0 == strcmpic(method, known_http_methods[i]))
     473             :       {
     474       33949 :          return FALSE;
     475             :       }
     476             :    }
     477             : 
     478         159 :    return TRUE;
     479             : 
     480             : }
     481             : 
     482             : 
     483             : /*********************************************************************
     484             :  *
     485             :  * Function    :  normalize_http_version
     486             :  *
     487             :  * Description :  Take a supported HTTP version string and remove
     488             :  *                leading zeroes etc., reject unsupported versions.
     489             :  *
     490             :  *                This is an explicit RFC 2616 (3.1) MUST and
     491             :  *                RFC 7230 mandates that intermediaries send their
     492             :  *                own HTTP-version in forwarded messages.
     493             :  *
     494             :  * Parameters  :
     495             :  *          1  :  http_version = HTTP version string
     496             :  *
     497             :  * Returns     :  JB_ERR_OK on success
     498             :  *                JB_ERR_PARSE if the HTTP version is unsupported
     499             :  *
     500             :  *********************************************************************/
     501       33949 : static jb_err normalize_http_version(char *http_version)
     502             : {
     503             :    unsigned int major_version;
     504             :    unsigned int minor_version;
     505             : 
     506       33949 :    if (2 != sscanf(http_version, "HTTP/%u.%u", &major_version, &minor_version))
     507             :    {
     508          73 :       log_error(LOG_LEVEL_ERROR, "Unsupported HTTP version: %s", http_version);
     509          73 :       return JB_ERR_PARSE;
     510             :    }
     511             : 
     512       33876 :    if (major_version != 1 || (minor_version != 0 && minor_version != 1))
     513             :    {
     514           3 :       log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
     515             :          "versions are 1.0 and 1.1. This rules out: %s", http_version);
     516           3 :       return JB_ERR_PARSE;
     517             :    }
     518             : 
     519       33873 :    assert(strlen(http_version) >= 8);
     520       33873 :    snprintf(http_version, 9, "HTTP/%u.%u", major_version, minor_version);
     521             : 
     522       33873 :    return JB_ERR_OK;
     523             : 
     524             : }
     525             : 
     526             : 
     527             : /*********************************************************************
     528             :  *
     529             :  * Function    :  parse_http_request
     530             :  *
     531             :  * Description :  Parse out the host and port from the URL.  Find the
     532             :  *                hostname & path, port (if ':'), and/or password (if '@')
     533             :  *
     534             :  * Parameters  :
     535             :  *          1  :  req = HTTP request line to break down
     536             :  *          2  :  http = pointer to the http structure to hold elements
     537             :  *
     538             :  * Returns     :  JB_ERR_OK on success
     539             :  *                JB_ERR_CGI_PARAMS on malformed command/URL
     540             :  *                                  or >100 domains deep.
     541             :  *
     542             :  *********************************************************************/
     543       34320 : jb_err parse_http_request(const char *req, struct http_request *http)
     544             : {
     545             :    char *buf;
     546             :    char *v[3];
     547             :    int n;
     548             :    jb_err err;
     549             : 
     550       34320 :    memset(http, '\0', sizeof(*http));
     551             : 
     552       34320 :    buf = strdup_or_die(req);
     553             : 
     554       34320 :    n = ssplit(buf, " \r\n", v, SZ(v));
     555       34320 :    if (n != 3)
     556             :    {
     557         212 :       freez(buf);
     558         212 :       return JB_ERR_PARSE;
     559             :    }
     560             : 
     561             :    /*
     562             :     * Fail in case of unknown methods
     563             :     * which we might not handle correctly.
     564             :     *
     565             :     * XXX: There should be a config option
     566             :     * to forward requests with unknown methods
     567             :     * anyway. Most of them don't need special
     568             :     * steps.
     569             :     */
     570       34108 :    if (unknown_method(v[0]))
     571             :    {
     572         159 :       log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
     573         159 :       freez(buf);
     574         159 :       return JB_ERR_PARSE;
     575             :    }
     576             : 
     577       33949 :    if (JB_ERR_OK != normalize_http_version(v[2]))
     578             :    {
     579          76 :       freez(buf);
     580          76 :       return JB_ERR_PARSE;
     581             :    }
     582             : 
     583       33873 :    http->ssl = !strcmpic(v[0], "CONNECT");
     584             : 
     585       33873 :    err = parse_http_url(v[1], http, !http->ssl);
     586       33873 :    if (err)
     587             :    {
     588          25 :       freez(buf);
     589          25 :       return err;
     590             :    }
     591             : 
     592             :    /*
     593             :     * Copy the details into the structure
     594             :     */
     595       33848 :    http->cmd = strdup_or_die(req);
     596       33848 :    http->gpc = strdup_or_die(v[0]);
     597       33848 :    http->version = strdup_or_die(v[2]);
     598       33848 :    http->ocmd = strdup_or_die(http->cmd);
     599             : 
     600       33848 :    freez(buf);
     601             : 
     602       33848 :    return JB_ERR_OK;
     603             : 
     604             : }
     605             : 
     606             : 
     607             : /*********************************************************************
     608             :  *
     609             :  * Function    :  compile_pattern
     610             :  *
     611             :  * Description :  Compiles a host, domain or TAG pattern.
     612             :  *
     613             :  * Parameters  :
     614             :  *          1  :  pattern = The pattern to compile.
     615             :  *          2  :  anchoring = How the regex should be modified
     616             :  *                            before compilation. Can be either
     617             :  *                            one of NO_ANCHORING, LEFT_ANCHORED,
     618             :  *                            RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
     619             :  *          3  :  url     = In case of failures, the spec member is
     620             :  *                          logged and the structure freed.
     621             :  *          4  :  regex   = Where the compiled regex should be stored.
     622             :  *
     623             :  * Returns     :  JB_ERR_OK - Success
     624             :  *                JB_ERR_PARSE - Cannot parse regex
     625             :  *
     626             :  *********************************************************************/
     627       34410 : static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
     628             :                               struct pattern_spec *url, regex_t **regex)
     629             : {
     630             :    int errcode;
     631       34410 :    const char *fmt = NULL;
     632             :    char *rebuf;
     633             :    size_t rebuf_size;
     634             : 
     635       34410 :    assert(pattern);
     636             : 
     637       34410 :    if (pattern[0] == '\0')
     638             :    {
     639           1 :       *regex = NULL;
     640           1 :       return JB_ERR_OK;
     641             :    }
     642             : 
     643       34409 :    switch (anchoring)
     644             :    {
     645       12396 :       case NO_ANCHORING:
     646       12396 :          fmt = "%s";
     647       12396 :          break;
     648           0 :       case RIGHT_ANCHORED:
     649           0 :          fmt = "%s$";
     650           0 :          break;
     651           0 :       case RIGHT_ANCHORED_HOST:
     652           0 :          fmt = "%s\\.?$";
     653           0 :          break;
     654       22013 :       case LEFT_ANCHORED:
     655       22013 :          fmt = "^%s";
     656       22013 :          break;
     657           0 :       default:
     658           0 :          log_error(LOG_LEVEL_FATAL,
     659             :             "Invalid anchoring in compile_pattern %d", anchoring);
     660             :    }
     661       34409 :    rebuf_size = strlen(pattern) + strlen(fmt);
     662       34409 :    rebuf = malloc_or_die(rebuf_size);
     663       34409 :    *regex = zalloc_or_die(sizeof(**regex));
     664             : 
     665       34409 :    snprintf(rebuf, rebuf_size, fmt, pattern);
     666             : 
     667       34409 :    errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
     668             : 
     669       34409 :    if (errcode)
     670             :    {
     671          93 :       size_t errlen = regerror(errcode, *regex, rebuf, rebuf_size);
     672          93 :       if (errlen > (rebuf_size - (size_t)1))
     673             :       {
     674           8 :          errlen = rebuf_size - (size_t)1;
     675             :       }
     676          93 :       rebuf[errlen] = '\0';
     677          93 :       log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
     678             :          pattern, url->spec, rebuf);
     679          93 :       free_pattern_spec(url);
     680          93 :       freez(rebuf);
     681             : 
     682          93 :       return JB_ERR_PARSE;
     683             :    }
     684       34316 :    freez(rebuf);
     685             : 
     686       34316 :    return JB_ERR_OK;
     687             : 
     688             : }
     689             : 
     690             : 
     691             : /*********************************************************************
     692             :  *
     693             :  * Function    :  compile_url_pattern
     694             :  *
     695             :  * Description :  Compiles the three parts of an URL pattern.
     696             :  *
     697             :  * Parameters  :
     698             :  *          1  :  url = Target pattern_spec to be filled in.
     699             :  *          2  :  buf = The url pattern to compile. Will be messed up.
     700             :  *
     701             :  * Returns     :  JB_ERR_OK - Success
     702             :  *                JB_ERR_MEMORY - Out of memory
     703             :  *                JB_ERR_PARSE - Cannot parse regex
     704             :  *
     705             :  *********************************************************************/
     706       53236 : static jb_err compile_url_pattern(struct pattern_spec *url, char *buf)
     707             : {
     708             :    char *p;
     709       53236 :    const size_t prefix_length = 18;
     710             : 
     711             : #ifdef FEATURE_PCRE_HOST_PATTERNS
     712       53236 :    if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0)
     713             :    {
     714           0 :       url->pattern.url_spec.host_regex_type = PCRE_HOST_PATTERN;
     715             :       /* Overwrite the "PCRE-HOST-PATTERN:" prefix */
     716           0 :       memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1);
     717             :    }
     718             :    else
     719             :    {
     720       53236 :       url->pattern.url_spec.host_regex_type = VANILLA_HOST_PATTERN;
     721             :    }
     722             : #else
     723             :    if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0)
     724             :    {
     725             :       log_error(LOG_LEVEL_ERROR,
     726             :          "PCRE-HOST-PATTERN detected while Privoxy has been compiled "
     727             :          "without FEATURE_PCRE_HOST_PATTERNS: %s",
     728             :          buf);
     729             :       /* Overwrite the "PCRE-HOST-PATTERN:" prefix */
     730             :       memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1);
     731             :       /*
     732             :        * The pattern will probably not work as expected.
     733             :        * We don't simply return JB_ERR_PARSE here so the
     734             :        * regression tests can be loaded with and without
     735             :        * FEATURE_PCRE_HOST_PATTERNS.
     736             :        */
     737             :    }
     738             : #endif
     739             : 
     740       53236 :    p = strchr(buf, '/');
     741       53236 :    if (NULL != p)
     742             :    {
     743             :       /*
     744             :        * Only compile the regex if it consists of more than
     745             :        * a single slash, otherwise it wouldn't affect the result.
     746             :        */
     747       37520 :       if (p[1] != '\0')
     748             :       {
     749             :          /*
     750             :           * XXX: does it make sense to compile the slash at the beginning?
     751             :           */
     752       22013 :          jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->pattern.url_spec.preg);
     753             : 
     754       22013 :          if (JB_ERR_OK != err)
     755             :          {
     756          91 :             return err;
     757             :          }
     758             :       }
     759       37429 :       *p = '\0';
     760             :    }
     761             : 
     762             :    /*
     763             :     * IPv6 numeric hostnames can contain colons, thus we need
     764             :     * to delimit the hostname before the real port separator.
     765             :     * As brackets are already used in the hostname pattern,
     766             :     * we use angle brackets ('<', '>') instead.
     767             :     */
     768       53145 :    if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
     769             :    {
     770          22 :       *p++ = '\0';
     771          22 :       buf++;
     772             : 
     773          22 :       if (*p == '\0')
     774             :       {
     775             :          /* IPv6 address without port number */
     776          16 :          p = NULL;
     777             :       }
     778           6 :       else if (*p != ':')
     779             :       {
     780             :          /* Garbage after address delimiter */
     781           1 :          return JB_ERR_PARSE;
     782             :       }
     783             :    }
     784             :    else
     785             :    {
     786       53123 :       p = strchr(buf, ':');
     787             :    }
     788             : 
     789       53144 :    if (NULL != p)
     790             :    {
     791        3306 :       *p++ = '\0';
     792        3306 :       url->pattern.url_spec.port_list = strdup_or_die(p);
     793             :    }
     794             :    else
     795             :    {
     796       49838 :       url->pattern.url_spec.port_list = NULL;
     797             :    }
     798             : 
     799       53144 :    if (buf[0] != '\0')
     800             :    {
     801             : #ifdef FEATURE_PCRE_HOST_PATTERNS
     802       37604 :       if (url->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
     803             :       {
     804           0 :          return compile_pcre_host_pattern(url, buf);
     805             :       }
     806             :       else
     807             : #endif
     808             :       {
     809       37604 :          return compile_vanilla_host_pattern(url, buf);
     810             :       }
     811             :    }
     812             : 
     813       15540 :    return JB_ERR_OK;
     814             : 
     815             : }
     816             : 
     817             : 
     818             : #ifdef FEATURE_PCRE_HOST_PATTERNS
     819             : /*********************************************************************
     820             :  *
     821             :  * Function    :  compile_pcre_host_pattern
     822             :  *
     823             :  * Description :  Parses and compiles a pcre host pattern.
     824             :  *
     825             :  * Parameters  :
     826             :  *          1  :  url = Target pattern_spec to be filled in.
     827             :  *          2  :  host_pattern = Host pattern to compile.
     828             :  *
     829             :  * Returns     :  JB_ERR_OK - Success
     830             :  *                JB_ERR_MEMORY - Out of memory
     831             :  *                JB_ERR_PARSE - Cannot parse regex
     832             :  *
     833             :  *********************************************************************/
     834           0 : static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern)
     835             : {
     836           0 :    return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex);
     837             : }
     838             : #endif /* def FEATURE_PCRE_HOST_PATTERNS */
     839             : 
     840             : 
     841             : /*********************************************************************
     842             :  *
     843             :  * Function    :  compile_vanilla_host_pattern
     844             :  *
     845             :  * Description :  Parses and "compiles" an old-school host pattern.
     846             :  *
     847             :  * Parameters  :
     848             :  *          1  :  url = Target pattern_spec to be filled in.
     849             :  *          2  :  host_pattern = Host pattern to parse.
     850             :  *
     851             :  * Returns     :  JB_ERR_OK - Success
     852             :  *                JB_ERR_PARSE - Cannot parse regex
     853             :  *
     854             :  *********************************************************************/
     855       37604 : static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern)
     856             : {
     857             :    char *v[150];
     858             :    size_t size;
     859             :    char *p;
     860             : 
     861             :    /*
     862             :     * Parse domain part
     863             :     */
     864       37604 :    if (host_pattern[strlen(host_pattern) - 1] == '.')
     865             :    {
     866        3172 :       url->pattern.url_spec.unanchored |= ANCHOR_RIGHT;
     867             :    }
     868       37604 :    if (host_pattern[0] == '.')
     869             :    {
     870       18688 :       url->pattern.url_spec.unanchored |= ANCHOR_LEFT;
     871             :    }
     872             : 
     873             :    /*
     874             :     * Split domain into components
     875             :     */
     876       37604 :    url->pattern.url_spec.dbuffer = strdup_or_die(host_pattern);
     877             : 
     878             :    /*
     879             :     * Map to lower case
     880             :     */
     881      488996 :    for (p = url->pattern.url_spec.dbuffer; *p ; p++)
     882             :    {
     883      451392 :       *p = (char)privoxy_tolower(*p);
     884             :    }
     885             : 
     886             :    /*
     887             :     * Split the domain name into components
     888             :     */
     889       37604 :    url->pattern.url_spec.dcount = ssplit(url->pattern.url_spec.dbuffer, ".", v, SZ(v));
     890             : 
     891       37604 :    if (url->pattern.url_spec.dcount < 0)
     892             :    {
     893           1 :       free_pattern_spec(url);
     894           1 :       return JB_ERR_PARSE;
     895             :    }
     896       37603 :    else if (url->pattern.url_spec.dcount != 0)
     897             :    {
     898             :       /*
     899             :        * Save a copy of the pointers in dvec
     900             :        */
     901       37592 :       size = (size_t)url->pattern.url_spec.dcount * sizeof(*url->pattern.url_spec.dvec);
     902             : 
     903       37592 :       url->pattern.url_spec.dvec = malloc_or_die(size);
     904             : 
     905       37592 :       memcpy(url->pattern.url_spec.dvec, v, size);
     906             :    }
     907             :    /*
     908             :     * else dcount == 0 in which case we needn't do anything,
     909             :     * since dvec will never be accessed and the pattern will
     910             :     * match all domains.
     911             :     */
     912       37603 :    return JB_ERR_OK;
     913             : }
     914             : 
     915             : 
     916             : /*********************************************************************
     917             :  *
     918             :  * Function    :  simplematch
     919             :  *
     920             :  * Description :  String matching, with a (greedy) '*' wildcard that
     921             :  *                stands for zero or more arbitrary characters and
     922             :  *                character classes in [], which take both enumerations
     923             :  *                and ranges.
     924             :  *
     925             :  * Parameters  :
     926             :  *          1  :  pattern = pattern for matching
     927             :  *          2  :  text    = text to be matched
     928             :  *
     929             :  * Returns     :  0 if match, else nonzero
     930             :  *
     931             :  *********************************************************************/
     932      408098 : static int simplematch(const char *pattern, const char *text)
     933             : {
     934      408098 :    const unsigned char *pat = (const unsigned char *)pattern;
     935      408098 :    const unsigned char *txt = (const unsigned char *)text;
     936      408098 :    const unsigned char *fallback = pat;
     937      408098 :    int wildcard = 0;
     938             : 
     939      408098 :    unsigned char lastchar = 'a';
     940             :    unsigned i;
     941             :    unsigned char charmap[32];
     942             : 
     943     1069560 :    while (*txt)
     944             :    {
     945             : 
     946             :       /* EOF pattern but !EOF text? */
     947      927419 :       if (*pat == '\0')
     948             :       {
     949         367 :          if (wildcard)
     950             :          {
     951           0 :             pat = fallback;
     952             :          }
     953             :          else
     954             :          {
     955         367 :             return 1;
     956             :          }
     957             :       }
     958             : 
     959             :       /* '*' in the pattern?  */
     960      927052 :       if (*pat == '*')
     961             :       {
     962             : 
     963             :          /* The pattern ends afterwards? Speed up the return. */
     964           0 :          if (*++pat == '\0')
     965             :          {
     966           0 :             return 0;
     967             :          }
     968             : 
     969             :          /* Else, set wildcard mode and remember position after '*' */
     970           0 :          wildcard = 1;
     971           0 :          fallback = pat;
     972             :       }
     973             : 
     974             :       /* Character range specification? */
     975      927052 :       if (*pat == '[')
     976             :       {
     977           0 :          memset(charmap, '\0', sizeof(charmap));
     978             : 
     979           0 :          while (*++pat != ']')
     980             :          {
     981           0 :             if (!*pat)
     982             :             {
     983           0 :                return 1;
     984             :             }
     985           0 :             else if (*pat == '-')
     986             :             {
     987           0 :                if ((*++pat == ']') || *pat == '\0')
     988             :                {
     989           0 :                   return(1);
     990             :                }
     991           0 :                for (i = lastchar; i <= *pat; i++)
     992             :                {
     993           0 :                   charmap[i / 8] |= (unsigned char)(1 << (i % 8));
     994             :                }
     995             :             }
     996             :             else
     997             :             {
     998           0 :                charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
     999           0 :                lastchar = *pat;
    1000             :             }
    1001             :          }
    1002             :       } /* -END- if Character range specification */
    1003             : 
    1004             : 
    1005             :       /*
    1006             :        * Char match, or char range match?
    1007             :        */
    1008      927052 :       if ((*pat == *txt)
    1009      265590 :        || (*pat == '?')
    1010      265590 :        || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
    1011             :       {
    1012             :          /*
    1013             :           * Success: Go ahead
    1014             :           */
    1015      661462 :          pat++;
    1016             :       }
    1017      265590 :       else if (!wildcard)
    1018             :       {
    1019             :          /*
    1020             :           * No match && no wildcard: No luck
    1021             :           */
    1022      265590 :          return 1;
    1023             :       }
    1024           0 :       else if (pat != fallback)
    1025             :       {
    1026             :          /*
    1027             :           * Increment text pointer if in char range matching
    1028             :           */
    1029           0 :          if (*pat == ']')
    1030             :          {
    1031           0 :             txt++;
    1032             :          }
    1033             :          /*
    1034             :           * Wildcard mode && nonmatch beyond fallback: Rewind pattern
    1035             :           */
    1036           0 :          pat = fallback;
    1037             :          /*
    1038             :           * Restart matching from current text pointer
    1039             :           */
    1040           0 :          continue;
    1041             :       }
    1042      661462 :       txt++;
    1043             :    }
    1044             : 
    1045             :    /* Cut off extra '*'s */
    1046      142141 :    if (*pat == '*') pat++;
    1047             : 
    1048             :    /* If this is the pattern's end, fine! */
    1049      142141 :    return(*pat);
    1050             : 
    1051             : }
    1052             : 
    1053             : 
    1054             : /*********************************************************************
    1055             :  *
    1056             :  * Function    :  simple_domaincmp
    1057             :  *
    1058             :  * Description :  Domain-wise Compare fqdn's.  The comparison is
    1059             :  *                both left- and right-anchored.  The individual
    1060             :  *                domain names are compared with simplematch().
    1061             :  *                This is only used by domain_match.
    1062             :  *
    1063             :  * Parameters  :
    1064             :  *          1  :  pv = array of patterns to compare
    1065             :  *          2  :  fv = array of domain components to compare
    1066             :  *          3  :  len = length of the arrays (both arrays are the
    1067             :  *                      same length - if they weren't, it couldn't
    1068             :  *                      possibly be a match).
    1069             :  *
    1070             :  * Returns     :  0 => domains are equivalent, else no match.
    1071             :  *
    1072             :  *********************************************************************/
    1073      325998 : static int simple_domaincmp(char **pv, char **fv, int len)
    1074             : {
    1075             :    int n;
    1076             : 
    1077      460859 :    for (n = 0; n < len; n++)
    1078             :    {
    1079      408098 :       if (simplematch(pv[n], fv[n]))
    1080             :       {
    1081      273237 :          return 1;
    1082             :       }
    1083             :    }
    1084             : 
    1085       52761 :    return 0;
    1086             : 
    1087             : }
    1088             : 
    1089             : 
    1090             : /*********************************************************************
    1091             :  *
    1092             :  * Function    :  domain_match
    1093             :  *
    1094             :  * Description :  Domain-wise Compare fqdn's. Governed by the bimap in
    1095             :  *                p.pattern->unachored, the comparison is un-, left-,
    1096             :  *                right-anchored, or both.
    1097             :  *                The individual domain names are compared with
    1098             :  *                simplematch().
    1099             :  *
    1100             :  * Parameters  :
    1101             :  *          1  :  p = a domain that may contain a '*' as a wildcard.
    1102             :  *          2  :  fqdn = domain name against which the patterns are compared.
    1103             :  *
    1104             :  * Returns     :  0 => domains are equivalent, else no match.
    1105             :  *
    1106             :  *********************************************************************/
    1107      415692 : static int domain_match(const struct pattern_spec *p, const struct http_request *fqdn)
    1108             : {
    1109             :    char **pv, **fv;  /* vectors  */
    1110             :    int    plen, flen;
    1111      415692 :    int unanchored = p->pattern.url_spec.unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
    1112             : 
    1113      415692 :    plen = p->pattern.url_spec.dcount;
    1114      415692 :    flen = fqdn->dcount;
    1115             : 
    1116      415692 :    if (flen < plen)
    1117             :    {
    1118             :       /* fqdn is too short to match this pattern */
    1119       79800 :       return 1;
    1120             :    }
    1121             : 
    1122      335892 :    pv   = p->pattern.url_spec.dvec;
    1123      335892 :    fv   = fqdn->dvec;
    1124             : 
    1125      335892 :    if (unanchored == ANCHOR_LEFT)
    1126             :    {
    1127             :       /*
    1128             :        * Right anchored.
    1129             :        *
    1130             :        * Convert this into a fully anchored pattern with
    1131             :        * the fqdn and pattern the same length
    1132             :        */
    1133      152930 :       fv += (flen - plen); /* flen - plen >= 0 due to check above */
    1134      152930 :       return simple_domaincmp(pv, fv, plen);
    1135             :    }
    1136      182962 :    else if (unanchored == 0)
    1137             :    {
    1138             :       /* Fully anchored, check length */
    1139      148929 :       if (flen != plen)
    1140             :       {
    1141       57753 :          return 1;
    1142             :       }
    1143       91176 :       return simple_domaincmp(pv, fv, plen);
    1144             :    }
    1145       34033 :    else if (unanchored == ANCHOR_RIGHT)
    1146             :    {
    1147             :       /* Left anchored, ignore all extra in fqdn */
    1148           0 :       return simple_domaincmp(pv, fv, plen);
    1149             :    }
    1150             :    else
    1151             :    {
    1152             :       /* Unanchored */
    1153             :       int n;
    1154       34033 :       int maxn = flen - plen;
    1155      114896 :       for (n = 0; n <= maxn; n++)
    1156             :       {
    1157       81892 :          if (!simple_domaincmp(pv, fv, plen))
    1158             :          {
    1159        1029 :             return 0;
    1160             :          }
    1161             :          /*
    1162             :           * Doesn't match from start of fqdn
    1163             :           * Try skipping first part of fqdn
    1164             :           */
    1165       80863 :          fv++;
    1166             :       }
    1167       33004 :       return 1;
    1168             :    }
    1169             : 
    1170             : }
    1171             : 
    1172             : 
    1173             : /*********************************************************************
    1174             :  *
    1175             :  * Function    :  create_pattern_spec
    1176             :  *
    1177             :  * Description :  Creates a "pattern_spec" structure from a string.
    1178             :  *                When finished, free with free_pattern_spec().
    1179             :  *
    1180             :  * Parameters  :
    1181             :  *          1  :  pattern = Target pattern_spec to be filled in.
    1182             :  *                          Will be zeroed before use.
    1183             :  *          2  :  buf = Source pattern, null terminated.  NOTE: The
    1184             :  *                      contents of this buffer are destroyed by this
    1185             :  *                      function.  If this function succeeds, the
    1186             :  *                      buffer is copied to pattern->spec.  If this
    1187             :  *                      function fails, the contents of the buffer
    1188             :  *                      are lost forever.
    1189             :  *
    1190             :  * Returns     :  JB_ERR_OK - Success
    1191             :  *                JB_ERR_PARSE - Cannot parse regex (Detailed message
    1192             :  *                               written to system log)
    1193             :  *
    1194             :  *********************************************************************/
    1195       65633 : jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf)
    1196             : {
    1197             :    static const struct
    1198             :    {
    1199             :       /** The tag pattern prefix to match */
    1200             :       const char *prefix;
    1201             : 
    1202             :       /** The length of the prefix to match */
    1203             :       const size_t prefix_length;
    1204             : 
    1205             :       /** The pattern flag */
    1206             :       const unsigned flag;
    1207             :    } tag_pattern[] = {
    1208             :       { "TAG:",              4, PATTERN_SPEC_TAG_PATTERN},
    1209             :  #ifdef FEATURE_CLIENT_TAGS
    1210             :       { "CLIENT-TAG:",      11, PATTERN_SPEC_CLIENT_TAG_PATTERN},
    1211             :  #endif
    1212             :       { "NO-REQUEST-TAG:",  15, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN},
    1213             :       { "NO-RESPONSE-TAG:", 16, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN}
    1214             :    };
    1215             :    int i;
    1216             : 
    1217       65633 :    assert(pattern);
    1218       65633 :    assert(buf);
    1219             : 
    1220       65633 :    memset(pattern, '\0', sizeof(*pattern));
    1221             : 
    1222             :    /* Remember the original specification for the CGI pages. */
    1223       65633 :    pattern->spec = strdup_or_die(buf);
    1224             : 
    1225             :    /* Check if it's a tag pattern */
    1226      290970 :    for (i = 0; i < SZ(tag_pattern); i++)
    1227             :    {
    1228      237734 :       if (0 == strncmpic(pattern->spec, tag_pattern[i].prefix, tag_pattern[i].prefix_length))
    1229             :       {
    1230             :          /* The regex starts after the prefix */
    1231       12397 :          const char *tag_regex = buf + tag_pattern[i].prefix_length;
    1232             : 
    1233       12397 :          pattern->flags |= tag_pattern[i].flag;
    1234             : 
    1235       12397 :          return compile_pattern(tag_regex, NO_ANCHORING, pattern,
    1236             :             &pattern->pattern.tag_regex);
    1237             :       }
    1238             :    }
    1239             : 
    1240             :    /* If it isn't a tag pattern it must be an URL pattern. */
    1241       53236 :    pattern->flags |= PATTERN_SPEC_URL_PATTERN;
    1242             : 
    1243       53236 :    return compile_url_pattern(pattern, buf);
    1244             : 
    1245             : }
    1246             : 
    1247             : 
    1248             : /*********************************************************************
    1249             :  *
    1250             :  * Function    :  free_pattern_spec
    1251             :  *
    1252             :  * Description :  Called from the "unloaders".  Freez the pattern
    1253             :  *                structure elements.
    1254             :  *
    1255             :  * Parameters  :
    1256             :  *          1  :  pattern = pointer to a pattern_spec structure.
    1257             :  *
    1258             :  * Returns     :  N/A
    1259             :  *
    1260             :  *********************************************************************/
    1261         574 : void free_pattern_spec(struct pattern_spec *pattern)
    1262             : {
    1263         574 :    if (pattern == NULL) return;
    1264             : 
    1265         574 :    freez(pattern->spec);
    1266             : #ifdef FEATURE_PCRE_HOST_PATTERNS
    1267         574 :    if (pattern->pattern.url_spec.host_regex)
    1268             :    {
    1269           4 :       regfree(pattern->pattern.url_spec.host_regex);
    1270           4 :       freez(pattern->pattern.url_spec.host_regex);
    1271             :    }
    1272             : #endif /* def FEATURE_PCRE_HOST_PATTERNS */
    1273         574 :    freez(pattern->pattern.url_spec.dbuffer);
    1274         574 :    freez(pattern->pattern.url_spec.dvec);
    1275         574 :    pattern->pattern.url_spec.dcount = 0;
    1276         574 :    freez(pattern->pattern.url_spec.port_list);
    1277         574 :    if (pattern->pattern.url_spec.preg)
    1278             :    {
    1279         327 :       regfree(pattern->pattern.url_spec.preg);
    1280         327 :       freez(pattern->pattern.url_spec.preg);
    1281             :    }
    1282         574 :    if (pattern->pattern.tag_regex)
    1283             :    {
    1284           0 :       regfree(pattern->pattern.tag_regex);
    1285           0 :       freez(pattern->pattern.tag_regex);
    1286             :    }
    1287             : }
    1288             : 
    1289             : 
    1290             : /*********************************************************************
    1291             :  *
    1292             :  * Function    :  port_matches
    1293             :  *
    1294             :  * Description :  Compares a port against a port list.
    1295             :  *
    1296             :  * Parameters  :
    1297             :  *          1  :  port      = The port to check.
    1298             :  *          2  :  port_list = The list of port to compare with.
    1299             :  *
    1300             :  * Returns     :  TRUE for yes, FALSE otherwise.
    1301             :  *
    1302             :  *********************************************************************/
    1303      588897 : static int port_matches(const int port, const char *port_list)
    1304             : {
    1305      588897 :    return ((NULL == port_list) || match_portlist(port_list, port));
    1306             : }
    1307             : 
    1308             : 
    1309             : /*********************************************************************
    1310             :  *
    1311             :  * Function    :  host_matches
    1312             :  *
    1313             :  * Description :  Compares a host against a host pattern.
    1314             :  *
    1315             :  * Parameters  :
    1316             :  *          1  :  url = The URL to match
    1317             :  *          2  :  pattern = The URL pattern
    1318             :  *
    1319             :  * Returns     :  TRUE for yes, FALSE otherwise.
    1320             :  *
    1321             :  *********************************************************************/
    1322      555459 : static int host_matches(const struct http_request *http,
    1323             :                         const struct pattern_spec *pattern)
    1324             : {
    1325      555459 :    assert(http->host != NULL);
    1326             : #ifdef FEATURE_PCRE_HOST_PATTERNS
    1327      555459 :    if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
    1328             :    {
    1329           0 :       return ((NULL == pattern->pattern.url_spec.host_regex)
    1330           0 :          || (0 == regexec(pattern->pattern.url_spec.host_regex,
    1331           0 :                http->host, 0, NULL, 0)));
    1332             :    }
    1333             : #endif
    1334      555459 :    return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
    1335             : }
    1336             : 
    1337             : 
    1338             : /*********************************************************************
    1339             :  *
    1340             :  * Function    :  path_matches
    1341             :  *
    1342             :  * Description :  Compares a path against a path pattern.
    1343             :  *
    1344             :  * Parameters  :
    1345             :  *          1  :  path = The path to match
    1346             :  *          2  :  pattern = The URL pattern
    1347             :  *
    1348             :  * Returns     :  TRUE for yes, FALSE otherwise.
    1349             :  *
    1350             :  *********************************************************************/
    1351      192528 : static int path_matches(const char *path, const struct pattern_spec *pattern)
    1352             : {
    1353      192528 :    return ((NULL == pattern->pattern.url_spec.preg)
    1354      192528 :       || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
    1355             : }
    1356             : 
    1357             : 
    1358             : /*********************************************************************
    1359             :  *
    1360             :  * Function    :  url_match
    1361             :  *
    1362             :  * Description :  Compare a URL against a URL pattern.
    1363             :  *
    1364             :  * Parameters  :
    1365             :  *          1  :  pattern = a URL pattern
    1366             :  *          2  :  url = URL to match
    1367             :  *
    1368             :  * Returns     :  Nonzero if the URL matches the pattern, else 0.
    1369             :  *
    1370             :  *********************************************************************/
    1371      727461 : int url_match(const struct pattern_spec *pattern,
    1372             :               const struct http_request *http)
    1373             : {
    1374      727461 :    if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN))
    1375             :    {
    1376             :       /* It's not an URL pattern and thus shouldn't be matched against URLs */
    1377      138564 :       return 0;
    1378             :    }
    1379             : 
    1380      588897 :    return (port_matches(http->port, pattern->pattern.url_spec.port_list)
    1381      588897 :       && host_matches(http, pattern) && path_matches(http->path, pattern));
    1382             : 
    1383             : }
    1384             : 
    1385             : 
    1386             : /*********************************************************************
    1387             :  *
    1388             :  * Function    :  match_portlist
    1389             :  *
    1390             :  * Description :  Check if a given number is covered by a comma
    1391             :  *                separated list of numbers and ranges (a,b-c,d,..)
    1392             :  *
    1393             :  * Parameters  :
    1394             :  *          1  :  portlist = String with list
    1395             :  *          2  :  port = port to check
    1396             :  *
    1397             :  * Returns     :  0 => no match
    1398             :  *                1 => match
    1399             :  *
    1400             :  *********************************************************************/
    1401       35152 : int match_portlist(const char *portlist, int port)
    1402             : {
    1403             :    char *min, *max, *next, *portlist_copy;
    1404             : 
    1405       35152 :    min = portlist_copy = strdup_or_die(portlist);
    1406             : 
    1407             :    /*
    1408             :     * Zero-terminate first item and remember offset for next
    1409             :     */
    1410       35152 :    if (NULL != (next = strchr(portlist_copy, (int) ',')))
    1411             :    {
    1412         511 :       *next++ = '\0';
    1413             :    }
    1414             : 
    1415             :    /*
    1416             :     * Loop through all items, checking for match
    1417             :     */
    1418       69432 :    while (NULL != min)
    1419             :    {
    1420       35799 :       if (NULL == (max = strchr(min, (int) '-')))
    1421             :       {
    1422             :          /*
    1423             :           * No dash, check for equality
    1424             :           */
    1425       35799 :          if (port == atoi(min))
    1426             :          {
    1427        1519 :             freez(portlist_copy);
    1428        1519 :             return(1);
    1429             :          }
    1430             :       }
    1431             :       else
    1432             :       {
    1433             :          /*
    1434             :           * This is a range, so check if between min and max,
    1435             :           * or, if max was omitted, between min and 65K
    1436             :           */
    1437           0 :          *max++ = '\0';
    1438           0 :          if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
    1439             :          {
    1440           0 :             freez(portlist_copy);
    1441           0 :             return(1);
    1442             :          }
    1443             : 
    1444             :       }
    1445             : 
    1446             :       /*
    1447             :        * Jump to next item
    1448             :        */
    1449       34280 :       min = next;
    1450             : 
    1451             :       /*
    1452             :        * Zero-terminate next item and remember offset for n+1
    1453             :        */
    1454       34280 :       if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
    1455             :       {
    1456         450 :          *next++ = '\0';
    1457             :       }
    1458             :    }
    1459             : 
    1460       33633 :    freez(portlist_copy);
    1461       33633 :    return 0;
    1462             : 
    1463             : }
    1464             : 
    1465             : 
    1466             : /*********************************************************************
    1467             :  *
    1468             :  * Function    :  parse_forwarder_address
    1469             :  *
    1470             :  * Description :  Parse out the username, password, host and port from
    1471             :  *                a forwarder address.
    1472             :  *
    1473             :  * Parameters  :
    1474             :  *          1  :  address = The forwarder address to parse.
    1475             :  *          2  :  hostname = Used to return the hostname. NULL on error.
    1476             :  *          3  :  port = Used to return the port. Untouched if no port
    1477             :  *                       is specified.
    1478             :  *          4  :  username = Used to return the username if any.
    1479             :  *          5  :  password = Used to return the password if any.
    1480             :  *
    1481             :  * Returns     :  JB_ERR_OK on success
    1482             :  *                JB_ERR_MEMORY on out of memory
    1483             :  *                JB_ERR_PARSE on malformed address.
    1484             :  *
    1485             :  *********************************************************************/
    1486           0 : jb_err parse_forwarder_address(char *address, char **hostname, int *port,
    1487             :                                char **username, char **password)
    1488             : {
    1489             :    char *p;
    1490             :    char *tmp;
    1491             : 
    1492           0 :    tmp = *hostname = strdup_or_die(address);
    1493             : 
    1494             :    /* Parse username and password */
    1495           0 :    if (username && password && (NULL != (p = strchr(*hostname, '@'))))
    1496             :    {
    1497           0 :       *p++ = '\0';
    1498           0 :       *username = strdup_or_die(*hostname);
    1499           0 :       *hostname = strdup_or_die(p);
    1500             : 
    1501           0 :       if (NULL != (p = strchr(*username, ':')))
    1502             :       {
    1503           0 :          *p++ = '\0';
    1504           0 :          *password = strdup_or_die(p);
    1505             :       }
    1506           0 :       freez(tmp);
    1507             :    }
    1508             : 
    1509             :    /* Parse hostname and port */
    1510           0 :    p = *hostname;
    1511           0 :    if ((*p == '[') && (NULL == strchr(p, ']')))
    1512             :    {
    1513             :       /* XXX: Should do some more validity checks here. */
    1514           0 :       return JB_ERR_PARSE;
    1515             :    }
    1516             : 
    1517           0 :    if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
    1518             :    {
    1519           0 :       *p++ = '\0';
    1520           0 :       memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
    1521           0 :       if (*p == ':')
    1522             :       {
    1523           0 :          *port = (int)strtol(++p, NULL, 0);
    1524             :       }
    1525             :    }
    1526           0 :    else if (NULL != (p = strchr(*hostname, ':')))
    1527             :    {
    1528           0 :       *p++ = '\0';
    1529           0 :       *port = (int)strtol(p, NULL, 0);
    1530             :    }
    1531             : 
    1532           0 :    return JB_ERR_OK;
    1533             : 
    1534             : }
    1535             : 
    1536             : 
    1537             : /*
    1538             :   Local Variables:
    1539             :   tab-width: 3
    1540             :   end:
    1541             : */

Generated by: LCOV version 1.14