LCOV - code coverage report
Current view: top level - fuzz - encode.c (source / functions) Hit Total Coverage
Test: trace.lcov_info_final Lines: 94 96 97.9 %
Date: 2021-02-22 04:51:02 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /*********************************************************************
       2             :  *
       3             :  * File        :  $Source: /cvsroot/ijbswa/current/encode.c,v $
       4             :  *
       5             :  * Purpose     :  Functions to encode and decode URLs, and also to
       6             :  *                encode cookies and HTML text.
       7             :  *
       8             :  * Copyright   :  Written by and Copyright (C) 2001 the
       9             :  *                Privoxy team. https://www.privoxy.org/
      10             :  *
      11             :  *                Based on the Internet Junkbuster originally written
      12             :  *                by and Copyright (C) 1997 Anonymous Coders and
      13             :  *                Junkbusters Corporation.  http://www.junkbusters.com
      14             :  *
      15             :  *                This program is free software; you can redistribute it
      16             :  *                and/or modify it under the terms of the GNU General
      17             :  *                Public License as published by the Free Software
      18             :  *                Foundation; either version 2 of the License, or (at
      19             :  *                your option) any later version.
      20             :  *
      21             :  *                This program is distributed in the hope that it will
      22             :  *                be useful, but WITHOUT ANY WARRANTY; without even the
      23             :  *                implied warranty of MERCHANTABILITY or FITNESS FOR A
      24             :  *                PARTICULAR PURPOSE.  See the GNU General Public
      25             :  *                License for more details.
      26             :  *
      27             :  *                The GNU General Public License should be included with
      28             :  *                this file.  If not, you can view it at
      29             :  *                http://www.gnu.org/copyleft/gpl.html
      30             :  *                or write to the Free Software Foundation, Inc., 59
      31             :  *                Temple Place - Suite 330, Boston, MA  02111-1307, USA.
      32             :  *
      33             :  *********************************************************************/
      34             : 
      35             : 
      36             : #include "config.h"
      37             : 
      38             : #include <stdio.h>
      39             : #include <stdlib.h>
      40             : #include <string.h>
      41             : #include <assert.h>
      42             : 
      43             : #include "miscutil.h"
      44             : #include "encode.h"
      45             : 
      46             : /* Maps special characters in a URL to their equivalent % codes. */
      47             : static const char url_code_map[256][4] = {
      48             :    "",    "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
      49             :    "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
      50             :    "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
      51             :    "%1E", "%1F", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
      52             :    "%28", "%29", "",    "%2B", "%2C", "",    "",    "%2F", "",    "",
      53             :    "",    "",    "",    "",    "",    "",    "",    "",    "%3A", "%3B",
      54             :    "%3C", "%3D", "%3E", "%3F", "",    "",    "",    "",    "",    "",
      55             :    "",    "",    "",    "",    "",    "",    "",    "",    "",    "",
      56             :    "",    "",    "",    "",    "",    "",    "",    "",    "",    "",
      57             :    "",    "%5B", "%5C", "%5D", "%5E", "",    "%60", "",    "",    "",
      58             :    "",    "",    "",    "",    "",    "",    "",    "",    "",    "",
      59             :    "",    "",    "",    "",    "",    "",    "",    "",    "",    "",
      60             :    "",    "",    "",    "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
      61             :    "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
      62             :    "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
      63             :    "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
      64             :    "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
      65             :    "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
      66             :    "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
      67             :    "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
      68             :    "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
      69             :    "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
      70             :    "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
      71             :    "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
      72             :    "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
      73             :    "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
      74             : };
      75             : 
      76             : /* Maps special characters in HTML to their equivalent entities. */
      77             : static const char * const html_code_map[256] = {
      78             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      79             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      80             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      81             :    NULL, NULL, NULL, NULL,"&quot;",NULL,NULL,NULL,"&amp;","&#39;",
      82             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      83             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      84             :    "&lt;",NULL,"&gt;",NULL,NULL, NULL, NULL, NULL, NULL, NULL,
      85             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      86             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      87             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      88             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      89             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      90             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      91             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      92             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      93             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      94             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      95             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      96             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      97             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      98             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
      99             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     100             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     101             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     102             :    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     103             :    NULL, NULL, NULL, NULL, NULL, NULL
     104             : };
     105             : 
     106             : 
     107             : /*********************************************************************
     108             :  *
     109             :  * Function    :  html_encode
     110             :  *
     111             :  * Description :  Encodes a string so it's not interpreted as
     112             :  *                containing HTML tags or entities.
     113             :  *                Replaces <, >, &, and " with the appropriate HTML
     114             :  *                entities.
     115             :  *
     116             :  * Parameters  :
     117             :  *          1  :  s = String to encode.  Null-terminated.
     118             :  *
     119             :  * Returns     :  Encoded string, newly allocated on the heap.
     120             :  *                Caller is responsible for freeing it with free().
     121             :  *                If s is NULL, or on out-of memory, returns NULL.
     122             :  *
     123             :  *********************************************************************/
     124      619959 : char * html_encode(const char *s)
     125             : {
     126             :    char * buf;
     127             :    size_t buf_size;
     128             : 
     129      619959 :    if (s == NULL)
     130             :    {
     131         108 :       return NULL;
     132             :    }
     133             : 
     134             :    /* each input char can expand to at most 6 chars */
     135      619851 :    buf_size = (strlen(s) * 6) + 1;
     136      619851 :    buf = (char *) malloc(buf_size);
     137             : 
     138      619851 :    if (buf)
     139             :    {
     140             :       char c;
     141      619851 :       char * p = buf;
     142    11663564 :       while ((c = *s++) != '\0')
     143             :       {
     144    11043713 :          const char * replace_with = html_code_map[(unsigned char) c];
     145    11043713 :          if (replace_with != NULL)
     146             :          {
     147       21272 :             const size_t bytes_written = (size_t)(p - buf);
     148       21272 :             assert(bytes_written < buf_size);
     149       21272 :             p += strlcpy(p, replace_with, buf_size - bytes_written);
     150             :          }
     151             :          else
     152             :          {
     153    11022441 :             *p++ = c;
     154             :          }
     155             :       }
     156             : 
     157      619851 :       *p = '\0';
     158             : 
     159      619851 :       assert(strlen(buf) < buf_size);
     160             :    }
     161             : 
     162      619851 :    return(buf);
     163             : }
     164             : 
     165             : 
     166             : /*********************************************************************
     167             :  *
     168             :  * Function    :  html_encode_and_free_original
     169             :  *
     170             :  * Description :  Encodes a string so it's not interpreted as
     171             :  *                containing HTML tags or entities.
     172             :  *                Replaces <, >, &, and " with the appropriate HTML
     173             :  *                entities.  Free()s original string.
     174             :  *                If original string is NULL, simply returns NULL.
     175             :  *
     176             :  * Parameters  :
     177             :  *          1  :  s = String to encode.  Null-terminated.
     178             :  *
     179             :  * Returns     :  Encoded string, newly allocated on the heap.
     180             :  *                Caller is responsible for freeing it with free().
     181             :  *                If s is NULL, or on out-of memory, returns NULL.
     182             :  *
     183             :  *********************************************************************/
     184         337 : char * html_encode_and_free_original(char *s)
     185             : {
     186             :    char * result;
     187             : 
     188         337 :    if (s == NULL)
     189             :    {
     190           0 :       return NULL;
     191             :    }
     192             : 
     193         337 :    result = html_encode(s);
     194         337 :    free(s);
     195             : 
     196         337 :    return result;
     197             : }
     198             : 
     199             : 
     200             : /*********************************************************************
     201             :  *
     202             :  * Function    :  url_encode
     203             :  *
     204             :  * Description :  Encodes a string so it can be used in a URL
     205             :  *                query string.  Replaces special characters with
     206             :  *                the appropriate %xx codes.
     207             :  *
     208             :  *                XXX: url_query_encode() would be a more fitting
     209             :  *                     name.
     210             :  *
     211             :  * Parameters  :
     212             :  *          1  :  s = String to encode.  Null-terminated.
     213             :  *
     214             :  * Returns     :  Encoded string, newly allocated on the heap.
     215             :  *                Caller is responsible for freeing it with free().
     216             :  *                If s is NULL, or on out-of memory, returns NULL.
     217             :  *
     218             :  *********************************************************************/
     219       32718 : char * url_encode(const char *s)
     220             : {
     221             :    char * buf;
     222             :    size_t buf_size;
     223             : 
     224       32718 :    if (s == NULL)
     225             :    {
     226           0 :       return NULL;
     227             :    }
     228             : 
     229             :    /* each input char can expand to at most 3 chars */
     230       32718 :    buf_size = (strlen(s) * 3) + 1;
     231       32718 :    buf = (char *) malloc(buf_size);
     232             : 
     233       32718 :    if (buf)
     234             :    {
     235             :       char c;
     236       32718 :       char * p = buf;
     237      692180 :       while((c = *s++) != '\0')
     238             :       {
     239      659462 :          const char *replace_with = url_code_map[(unsigned char) c];
     240      659462 :          if (*replace_with != '\0')
     241             :          {
     242      142647 :             const size_t bytes_written = (size_t)(p - buf);
     243      142647 :             assert(bytes_written < buf_size);
     244      142647 :             p += strlcpy(p, replace_with, buf_size - bytes_written);
     245             :          }
     246             :          else
     247             :          {
     248      516815 :             *p++ = c;
     249             :          }
     250             :       }
     251             : 
     252       32718 :       *p = '\0';
     253             : 
     254       32718 :       assert(strlen(buf) < buf_size);
     255             :    }
     256             : 
     257       32718 :    return(buf);
     258             : }
     259             : 
     260             : 
     261             : /*********************************************************************
     262             :  *
     263             :  * Function    :  xdtoi
     264             :  *
     265             :  * Description :  Converts a single hex digit to an integer.
     266             :  *
     267             :  * Parameters  :
     268             :  *          1  :  d = in the range of ['0'..'9', 'A'..'F', 'a'..'f']
     269             :  *
     270             :  * Returns     :  The integer value, or -1 for non-hex characters.
     271             :  *
     272             :  *********************************************************************/
     273       16773 : static int xdtoi(const int d)
     274             : {
     275       16773 :    if ((d >= '0') && (d <= '9'))
     276             :    {
     277        7286 :       return(d - '0');
     278             :    }
     279        9487 :    else if ((d >= 'a') && (d <= 'f'))
     280             :    {
     281         999 :       return(d - 'a' + 10);
     282             :    }
     283        8488 :    else if ((d >= 'A') && (d <= 'F'))
     284             :    {
     285        6127 :       return(d - 'A' + 10);
     286             :    }
     287             :    else
     288             :    {
     289        2361 :       return(-1);
     290             :    }
     291             : }
     292             : 
     293             : 
     294             : /*********************************************************************
     295             :  *
     296             :  * Function    :  xtoi
     297             :  *
     298             :  * Description :  Hex string to integer conversion.
     299             :  *
     300             :  * Parameters  :
     301             :  *          1  :  s = a 2 digit hex string (e.g. "1f").  Only the
     302             :  *                    first two characters will be looked at.
     303             :  *
     304             :  * Returns     :  The integer value, or 0 for non-hex strings.
     305             :  *
     306             :  *********************************************************************/
     307        9311 : int xtoi(const char *s)
     308             : {
     309             :    int d1;
     310             : 
     311        9311 :    d1 = xdtoi(*s);
     312        9311 :    if (d1 >= 0)
     313             :    {
     314        7462 :       int d2 = xdtoi(*(s+1));
     315        7462 :       if (d2 >= 0)
     316             :       {
     317        6950 :          return (d1 << 4) + d2;
     318             :       }
     319             :    }
     320             : 
     321        2361 :    return 0;
     322             : }
     323             : 
     324             : 
     325             : /*********************************************************************
     326             :  *
     327             :  * Function    :  url_decode
     328             :  *
     329             :  * Description :  Decodes a URL query string, replacing %xx codes
     330             :  *                with their decoded form.
     331             :  *
     332             :  * Parameters  :
     333             :  *          1  :  s = String to decode.  Null-terminated.
     334             :  *
     335             :  * Returns     :  Decoded string, newly allocated on the heap.
     336             :  *                Caller is responsible for freeing it with free().
     337             :  *
     338             :  *********************************************************************/
     339      157519 : char *url_decode(const char * s)
     340             : {
     341      157519 :    char *buf = malloc(strlen(s) + 1);
     342      157519 :    char *q = buf;
     343             : 
     344      157519 :    if (buf)
     345             :    {
     346     1751821 :       while (*s)
     347             :       {
     348     1594302 :          switch (*s)
     349             :          {
     350       37950 :             case '+':
     351       37950 :                s++;
     352       37950 :                *q++ = ' ';
     353       37950 :                break;
     354             : 
     355        9311 :             case '%':
     356        9311 :                if ((*q = (char)xtoi(s + 1)) != '\0')
     357             :                {
     358        6736 :                   s += 3;
     359        6736 :                   q++;
     360             :                }
     361             :                else
     362             :                {
     363             :                   /* malformed, just use it */
     364        2575 :                   *q++ = *s++;
     365             :                }
     366        9311 :                break;
     367             : 
     368     1547041 :             default:
     369     1547041 :                *q++ = *s++;
     370     1547041 :                break;
     371             :          }
     372             :       }
     373      157519 :       *q = '\0';
     374             :    }
     375             : 
     376      157519 :    return(buf);
     377             : 
     378             : }
     379             : 
     380             : 
     381             : /*********************************************************************
     382             :  *
     383             :  * Function    :  percent_encode_url
     384             :  *
     385             :  * Description :  Percent-encodes a string so it no longer contains
     386             :  *                any characters that aren't valid in an URL according
     387             :  *                to RFC 3986.
     388             :  *
     389             :  *                XXX: Do not confuse with encode_url()
     390             :  *
     391             :  * Parameters  :
     392             :  *          1  :  s = String to encode.  Null-terminated.
     393             :  *
     394             :  * Returns     :  Encoded string, newly allocated on the heap.
     395             :  *                Caller is responsible for freeing it with free().
     396             :  *                If s is NULL, or on out-of memory, returns NULL.
     397             :  *
     398             :  *********************************************************************/
     399         314 : char *percent_encode_url(const char *s)
     400             : {
     401             :    static const char allowed_characters[128] = {
     402             :       '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
     403             :       '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
     404             :       '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
     405             :       '\0', '\0', '\0', '!',  '\0', '#',  '$',  '%',  '&',  '\'',
     406             :       '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',  '0',  '1',
     407             :       '2',  '3',  '4',  '5',  '6',  '7',  '8',  '9',  ':',  ';',
     408             :       '\0', '=',  '\0', '?',  '@',  'A',  'B',  'C',  'D',  'E',
     409             :       'F',  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
     410             :       'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',  'X',  'Y',
     411             :       'Z',  '[',  '\0', ']',  '\0', '_',  '\0', 'a',  'b',  'c',
     412             :       'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',
     413             :       'n',  'o',  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
     414             :       'x',  'y',  'z',  '\0', '\0', '\0', '~',  '\0'
     415             :    };
     416             :    char *buf;
     417             :    size_t buf_size;
     418             : 
     419         314 :    assert(s != NULL);
     420             : 
     421             :    /* Each input char can expand to at most 3 chars. */
     422         314 :    buf_size = (strlen(s) * 3) + 1;
     423         314 :    buf = (char *)malloc(buf_size);
     424             : 
     425         314 :    if (buf != NULL)
     426             :    {
     427             :       char c;
     428         314 :       char *p = buf;
     429       15756 :       while ((c = *s++) != '\0')
     430             :       {
     431       15442 :          const unsigned int i = (unsigned char)c;
     432       15442 :          if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
     433        4263 :          {
     434        4263 :             const char *replace_with = url_code_map[i];
     435        4263 :             assert(*replace_with != '\0');
     436        4263 :             if (*replace_with != '\0')
     437             :             {
     438        4263 :                const size_t bytes_written = (size_t)(p - buf);
     439        4263 :                assert(bytes_written < buf_size);
     440        4263 :                p += strlcpy(p, replace_with, buf_size - bytes_written);
     441             :             }
     442             :          }
     443             :          else
     444             :          {
     445       11179 :             *p++ = c;
     446             :          }
     447             :       }
     448         314 :       *p = '\0';
     449             : 
     450         314 :       assert(strlen(buf) < buf_size);
     451             :    }
     452             : 
     453         314 :    return(buf);
     454             : 
     455             : }
     456             : 
     457             : 
     458             : /*
     459             :   Local Variables:
     460             :   tab-width: 3
     461             :   end:
     462             : */

Generated by: LCOV version 1.14