aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMichal Nazarewicz2016-07-17 03:09:38 +0200
committerMichal Nazarewicz2016-08-02 15:39:10 +0200
commit4538a5e37e8dacde4b3e828d832c4c558a146912 (patch)
tree43a158bf0635a01bf5946730ac439fd0b3b8f606 /src
parente7257061317c604492d20f26f312b9e925aa1860 (diff)
downloademacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.gz
emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.zip
Refactor regex character class parsing in [:name:]
re_wctype function is used in three separate places and in all of those places almost exact code extracting the name from [:name:] surrounds it. Furthermore, re_wctype requires a NUL-terminated string, so the name of the character class is copied to a temporary buffer. The code duplication and unnecessary memory copying can be avoided by pushing the responsibility of parsing the whole [:name:] sequence to the function. Furthermore, since now the function has access to the length of the character class name (since it’s doing the parsing), it can take advantage of that information in skipping some string comparisons and using a constant-length memcmp instead of strcmp which needs to take care of NUL bytes. * src/regex.c (re_wctype): Delete function. Replace it with: (re_wctype_parse): New function which parses a whole [:name:] string and returns a RECC_* constant or -1 if the string is not of [:name:] format. (regex_compile): Use re_wctype_parse. * src/syntax.c (skip_chars): Use re_wctype_parse.
Diffstat (limited to 'src')
-rw-r--r--src/regex.c310
-rw-r--r--src/regex.h14
-rw-r--r--src/syntax.c96
3 files changed, 181 insertions, 239 deletions
diff --git a/src/regex.c b/src/regex.c
index 1f2a1f086de..3a25835f452 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -1969,29 +1969,96 @@ struct range_table_work_area
1969 1969
1970#if ! WIDE_CHAR_SUPPORT 1970#if ! WIDE_CHAR_SUPPORT
1971 1971
1972/* Map a string to the char class it names (if any). */ 1972/* Parse a character class, i.e. string such as "[:name:]". *strp
1973 points to the string to be parsed and limit is length, in bytes, of
1974 that string.
1975
1976 If *strp point to a string that begins with "[:name:]", where name is
1977 a non-empty sequence of lower case letters, *strp will be advanced past the
1978 closing square bracket and RECC_* constant which maps to the name will be
1979 returned. If name is not a valid character class name zero, or RECC_ERROR,
1980 is returned.
1981
1982 Otherwise, if *strp doesn’t begin with "[:name:]", -1 is returned.
1983
1984 The function can be used on ASCII and multibyte (UTF-8-encoded) strings.
1985 */
1973re_wctype_t 1986re_wctype_t
1974re_wctype (const_re_char *str) 1987re_wctype_parse (const unsigned char **strp, unsigned limit)
1975{ 1988{
1976 const char *string = (const char *) str; 1989 const char *beg = (const char *)*strp, *it;
1977 if (STREQ (string, "alnum")) return RECC_ALNUM; 1990
1978 else if (STREQ (string, "alpha")) return RECC_ALPHA; 1991 if (limit < 4 || beg[0] != '[' || beg[1] != ':')
1979 else if (STREQ (string, "word")) return RECC_WORD; 1992 return -1;
1980 else if (STREQ (string, "ascii")) return RECC_ASCII; 1993
1981 else if (STREQ (string, "nonascii")) return RECC_NONASCII; 1994 beg += 2; /* skip opening ‘[:’ */
1982 else if (STREQ (string, "graph")) return RECC_GRAPH; 1995 limit -= 3; /* opening ‘[:’ and half of closing ‘:]’; --limit handles rest */
1983 else if (STREQ (string, "lower")) return RECC_LOWER; 1996 for (it = beg; it[0] != ':' || it[1] != ']'; ++it)
1984 else if (STREQ (string, "print")) return RECC_PRINT; 1997 if (!--limit)
1985 else if (STREQ (string, "punct")) return RECC_PUNCT; 1998 return -1;
1986 else if (STREQ (string, "space")) return RECC_SPACE; 1999
1987 else if (STREQ (string, "upper")) return RECC_UPPER; 2000 *strp = (const unsigned char *)(it + 2);
1988 else if (STREQ (string, "unibyte")) return RECC_UNIBYTE; 2001
1989 else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE; 2002 /* Sort tests in the length=five case by frequency the classes to minimise
1990 else if (STREQ (string, "digit")) return RECC_DIGIT; 2003 number of times we fail the comparison. The frequencies of character class
1991 else if (STREQ (string, "xdigit")) return RECC_XDIGIT; 2004 names used in Emacs sources as of 2016-07-27:
1992 else if (STREQ (string, "cntrl")) return RECC_CNTRL; 2005
1993 else if (STREQ (string, "blank")) return RECC_BLANK; 2006 $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + |
1994 else return 0; 2007 sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr
2008 213 [:alnum:]
2009 104 [:alpha:]
2010 62 [:space:]
2011 39 [:digit:]
2012 36 [:blank:]
2013 26 [:word:]
2014 26 [:upper:]
2015 21 [:lower:]
2016 10 [:xdigit:]
2017 10 [:punct:]
2018 10 [:ascii:]
2019 4 [:nonascii:]
2020 4 [:graph:]
2021 2 [:print:]
2022 2 [:cntrl:]
2023 1 [:ff:]
2024
2025 If you update this list, consider also updating chain of or’ed conditions
2026 in execute_charset function.
2027 */
2028
2029 switch (it - beg) {
2030 case 4:
2031 if (!memcmp (beg, "word", 4)) return RECC_WORD;
2032 break;
2033 case 5:
2034 if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM;
2035 if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA;
2036 if (!memcmp (beg, "space", 5)) return RECC_SPACE;
2037 if (!memcmp (beg, "digit", 5)) return RECC_DIGIT;
2038 if (!memcmp (beg, "blank", 5)) return RECC_BLANK;
2039 if (!memcmp (beg, "upper", 5)) return RECC_UPPER;
2040 if (!memcmp (beg, "lower", 5)) return RECC_LOWER;
2041 if (!memcmp (beg, "punct", 5)) return RECC_PUNCT;
2042 if (!memcmp (beg, "ascii", 5)) return RECC_ASCII;
2043 if (!memcmp (beg, "graph", 5)) return RECC_GRAPH;
2044 if (!memcmp (beg, "print", 5)) return RECC_PRINT;
2045 if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL;
2046 break;
2047 case 6:
2048 if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT;
2049 break;
2050 case 7:
2051 if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE;
2052 break;
2053 case 8:
2054 if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII;
2055 break;
2056 case 9:
2057 if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE;
2058 break;
2059 }
2060
2061 return RECC_ERROR;
1995} 2062}
1996 2063
1997/* True if CH is in the char class CC. */ 2064/* True if CH is in the char class CC. */
@@ -2776,10 +2843,74 @@ regex_compile (const_re_char *pattern, size_t size, reg_syntax_t syntax,
2776 { 2843 {
2777 boolean escaped_char = false; 2844 boolean escaped_char = false;
2778 const unsigned char *p2 = p; 2845 const unsigned char *p2 = p;
2846 re_wctype_t cc;
2779 re_wchar_t ch; 2847 re_wchar_t ch;
2780 2848
2781 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2849 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2782 2850
2851 /* See if we're at the beginning of a possible character
2852 class. */
2853 if (syntax & RE_CHAR_CLASSES &&
2854 (cc = re_wctype_parse(&p, pend - p)) != -1)
2855 {
2856 if (cc == 0)
2857 FREE_STACK_RETURN (REG_ECTYPE);
2858
2859 if (p == pend)
2860 FREE_STACK_RETURN (REG_EBRACK);
2861
2862#ifndef emacs
2863 for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
2864 if (re_iswctype (btowc (ch), cc))
2865 {
2866 c = TRANSLATE (ch);
2867 if (c < (1 << BYTEWIDTH))
2868 SET_LIST_BIT (c);
2869 }
2870#else /* emacs */
2871 /* Most character classes in a multibyte match just set
2872 a flag. Exceptions are is_blank, is_digit, is_cntrl, and
2873 is_xdigit, since they can only match ASCII characters.
2874 We don't need to handle them for multibyte. They are
2875 distinguished by a negative wctype. */
2876
2877 /* Setup the gl_state object to its buffer-defined value.
2878 This hardcodes the buffer-global syntax-table for ASCII
2879 chars, while the other chars will obey syntax-table
2880 properties. It's not ideal, but it's the way it's been
2881 done until now. */
2882 SETUP_BUFFER_SYNTAX_TABLE ();
2883
2884 for (ch = 0; ch < 256; ++ch)
2885 {
2886 c = RE_CHAR_TO_MULTIBYTE (ch);
2887 if (! CHAR_BYTE8_P (c)
2888 && re_iswctype (c, cc))
2889 {
2890 SET_LIST_BIT (ch);
2891 c1 = TRANSLATE (c);
2892 if (c1 == c)
2893 continue;
2894 if (ASCII_CHAR_P (c1))
2895 SET_LIST_BIT (c1);
2896 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
2897 SET_LIST_BIT (c1);
2898 }
2899 }
2900 SET_RANGE_TABLE_WORK_AREA_BIT
2901 (range_table_work, re_wctype_to_bit (cc));
2902#endif /* emacs */
2903 /* In most cases the matching rule for char classes only
2904 uses the syntax table for multibyte chars, so that the
2905 content of the syntax-table is not hardcoded in the
2906 range_table. SPACE and WORD are the two exceptions. */
2907 if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2908 bufp->used_syntax = 1;
2909
2910 /* Repeat the loop. */
2911 continue;
2912 }
2913
2783 /* Don't translate yet. The range TRANSLATE(X..Y) cannot 2914 /* Don't translate yet. The range TRANSLATE(X..Y) cannot
2784 always be determined from TRANSLATE(X) and TRANSLATE(Y) 2915 always be determined from TRANSLATE(X) and TRANSLATE(Y)
2785 So the translation is done later in a loop. Example: 2916 So the translation is done later in a loop. Example:
@@ -2803,119 +2934,6 @@ regex_compile (const_re_char *pattern, size_t size, reg_syntax_t syntax,
2803 break; 2934 break;
2804 } 2935 }
2805 2936
2806 /* See if we're at the beginning of a possible character
2807 class. */
2808
2809 if (!escaped_char &&
2810 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2811 {
2812 /* Leave room for the null. */
2813 unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
2814 const unsigned char *class_beg;
2815
2816 PATFETCH (c);
2817 c1 = 0;
2818 class_beg = p;
2819
2820 /* If pattern is `[[:'. */
2821 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2822
2823 for (;;)
2824 {
2825 PATFETCH (c);
2826 if ((c == ':' && *p == ']') || p == pend)
2827 break;
2828 if (c1 < CHAR_CLASS_MAX_LENGTH)
2829 str[c1++] = c;
2830 else
2831 /* This is in any case an invalid class name. */
2832 str[0] = '\0';
2833 }
2834 str[c1] = '\0';
2835
2836 /* If isn't a word bracketed by `[:' and `:]':
2837 undo the ending character, the letters, and
2838 leave the leading `:' and `[' (but set bits for
2839 them). */
2840 if (c == ':' && *p == ']')
2841 {
2842 re_wctype_t cc = re_wctype (str);
2843
2844 if (cc == 0)
2845 FREE_STACK_RETURN (REG_ECTYPE);
2846
2847 /* Throw away the ] at the end of the character
2848 class. */
2849 PATFETCH (c);
2850
2851 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2852
2853#ifndef emacs
2854 for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
2855 if (re_iswctype (btowc (ch), cc))
2856 {
2857 c = TRANSLATE (ch);
2858 if (c < (1 << BYTEWIDTH))
2859 SET_LIST_BIT (c);
2860 }
2861#else /* emacs */
2862 /* Most character classes in a multibyte match
2863 just set a flag. Exceptions are is_blank,
2864 is_digit, is_cntrl, and is_xdigit, since
2865 they can only match ASCII characters. We
2866 don't need to handle them for multibyte.
2867 They are distinguished by a negative wctype. */
2868
2869 /* Setup the gl_state object to its buffer-defined
2870 value. This hardcodes the buffer-global
2871 syntax-table for ASCII chars, while the other chars
2872 will obey syntax-table properties. It's not ideal,
2873 but it's the way it's been done until now. */
2874 SETUP_BUFFER_SYNTAX_TABLE ();
2875
2876 for (ch = 0; ch < 256; ++ch)
2877 {
2878 c = RE_CHAR_TO_MULTIBYTE (ch);
2879 if (! CHAR_BYTE8_P (c)
2880 && re_iswctype (c, cc))
2881 {
2882 SET_LIST_BIT (ch);
2883 c1 = TRANSLATE (c);
2884 if (c1 == c)
2885 continue;
2886 if (ASCII_CHAR_P (c1))
2887 SET_LIST_BIT (c1);
2888 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
2889 SET_LIST_BIT (c1);
2890 }
2891 }
2892 SET_RANGE_TABLE_WORK_AREA_BIT
2893 (range_table_work, re_wctype_to_bit (cc));
2894#endif /* emacs */
2895 /* In most cases the matching rule for char classes
2896 only uses the syntax table for multibyte chars,
2897 so that the content of the syntax-table is not
2898 hardcoded in the range_table. SPACE and WORD are
2899 the two exceptions. */
2900 if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2901 bufp->used_syntax = 1;
2902
2903 /* Repeat the loop. */
2904 continue;
2905 }
2906 else
2907 {
2908 /* Go back to right after the "[:". */
2909 p = class_beg;
2910 SET_LIST_BIT ('[');
2911
2912 /* Because the `:' may start the range, we
2913 can't simply set bit and repeat the loop.
2914 Instead, just set it to C and handle below. */
2915 c = ':';
2916 }
2917 }
2918
2919 if (p < pend && p[0] == '-' && p[1] != ']') 2937 if (p < pend && p[0] == '-' && p[1] != ']')
2920 { 2938 {
2921 2939
@@ -4659,28 +4677,8 @@ execute_charset (const_re_char **pp, unsigned c, unsigned corig, bool unibyte)
4659 re_wchar_t range_start, range_end; 4677 re_wchar_t range_start, range_end;
4660 4678
4661 /* Sort tests by the most commonly used classes with some adjustment to which 4679 /* Sort tests by the most commonly used classes with some adjustment to which
4662 tests are easiest to perform. Frequencies of character class names used in 4680 tests are easiest to perform. Take a look at comment in re_wctype_parse
4663 Emacs sources as of 2016-07-15: 4681 for table with frequencies of character class names. */
4664
4665 $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + |
4666 sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr
4667 213 [:alnum:]
4668 104 [:alpha:]
4669 62 [:space:]
4670 39 [:digit:]
4671 36 [:blank:]
4672 26 [:upper:]
4673 24 [:word:]
4674 21 [:lower:]
4675 10 [:punct:]
4676 10 [:ascii:]
4677 9 [:xdigit:]
4678 4 [:nonascii:]
4679 4 [:graph:]
4680 2 [:print:]
4681 2 [:cntrl:]
4682 1 [:ff:]
4683 */
4684 4682
4685 if ((class_bits & BIT_MULTIBYTE) || 4683 if ((class_bits & BIT_MULTIBYTE) ||
4686 (class_bits & BIT_ALNUM && ISALNUM (c)) || 4684 (class_bits & BIT_ALNUM && ISALNUM (c)) ||
diff --git a/src/regex.h b/src/regex.h
index 817167a07ca..01b659addbb 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -585,25 +585,13 @@ extern void regfree (regex_t *__preg);
585/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 585/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
586# include <wchar.h> 586# include <wchar.h>
587# include <wctype.h> 587# include <wctype.h>
588#endif
589 588
590#if WIDE_CHAR_SUPPORT
591/* The GNU C library provides support for user-defined character classes
592 and the functions from ISO C amendment 1. */
593# ifdef CHARCLASS_NAME_MAX
594# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
595# else
596/* This shouldn't happen but some implementation might still have this
597 problem. Use a reasonable default value. */
598# define CHAR_CLASS_MAX_LENGTH 256
599# endif
600typedef wctype_t re_wctype_t; 589typedef wctype_t re_wctype_t;
601typedef wchar_t re_wchar_t; 590typedef wchar_t re_wchar_t;
602# define re_wctype wctype 591# define re_wctype wctype
603# define re_iswctype iswctype 592# define re_iswctype iswctype
604# define re_wctype_to_bit(cc) 0 593# define re_wctype_to_bit(cc) 0
605#else 594#else
606# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
607# ifndef emacs 595# ifndef emacs
608# define btowc(c) c 596# define btowc(c) c
609# endif 597# endif
@@ -621,7 +609,7 @@ typedef enum { RECC_ERROR = 0,
621} re_wctype_t; 609} re_wctype_t;
622 610
623extern char re_iswctype (int ch, re_wctype_t cc); 611extern char re_iswctype (int ch, re_wctype_t cc);
624extern re_wctype_t re_wctype (const unsigned char* str); 612extern re_wctype_t re_wctype_parse (const unsigned char **strp, unsigned limit);
625 613
626typedef int re_wchar_t; 614typedef int re_wchar_t;
627 615
diff --git a/src/syntax.c b/src/syntax.c
index f8d987b377c..667de402ec4 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -1691,44 +1691,22 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1691 /* At first setup fastmap. */ 1691 /* At first setup fastmap. */
1692 while (i_byte < size_byte) 1692 while (i_byte < size_byte)
1693 { 1693 {
1694 c = str[i_byte++]; 1694 if (handle_iso_classes)
1695
1696 if (handle_iso_classes && c == '['
1697 && i_byte < size_byte
1698 && str[i_byte] == ':')
1699 { 1695 {
1700 const unsigned char *class_beg = str + i_byte + 1; 1696 const unsigned char *ch = str + i_byte;
1701 const unsigned char *class_end = class_beg; 1697 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1702 const unsigned char *class_limit = str + size_byte - 2;
1703 /* Leave room for the null. */
1704 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1705 re_wctype_t cc;
1706
1707 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1708 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1709
1710 while (class_end < class_limit
1711 && *class_end >= 'a' && *class_end <= 'z')
1712 class_end++;
1713
1714 if (class_end == class_beg
1715 || *class_end != ':' || class_end[1] != ']')
1716 goto not_a_class_name;
1717
1718 memcpy (class_name, class_beg, class_end - class_beg);
1719 class_name[class_end - class_beg] = 0;
1720
1721 cc = re_wctype (class_name);
1722 if (cc == 0) 1698 if (cc == 0)
1723 error ("Invalid ISO C character class"); 1699 error ("Invalid ISO C character class");
1724 1700 if (cc != -1)
1725 iso_classes = Fcons (make_number (cc), iso_classes); 1701 {
1726 1702 iso_classes = Fcons (make_number (cc), iso_classes);
1727 i_byte = class_end + 2 - str; 1703 i_byte = ch - str;
1728 continue; 1704 continue;
1705 }
1729 } 1706 }
1730 1707
1731 not_a_class_name: 1708 c = str[i_byte++];
1709
1732 if (c == '\\') 1710 if (c == '\\')
1733 { 1711 {
1734 if (i_byte == size_byte) 1712 if (i_byte == size_byte)
@@ -1808,54 +1786,32 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1808 while (i_byte < size_byte) 1786 while (i_byte < size_byte)
1809 { 1787 {
1810 int leading_code = str[i_byte]; 1788 int leading_code = str[i_byte];
1811 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1812 i_byte += len;
1813 1789
1814 if (handle_iso_classes && c == '[' 1790 if (handle_iso_classes)
1815 && i_byte < size_byte
1816 && STRING_CHAR (str + i_byte) == ':')
1817 { 1791 {
1818 const unsigned char *class_beg = str + i_byte + 1; 1792 const unsigned char *ch = str + i_byte;
1819 const unsigned char *class_end = class_beg; 1793 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1820 const unsigned char *class_limit = str + size_byte - 2;
1821 /* Leave room for the null. */
1822 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1823 re_wctype_t cc;
1824
1825 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1826 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1827
1828 while (class_end < class_limit
1829 && *class_end >= 'a' && *class_end <= 'z')
1830 class_end++;
1831
1832 if (class_end == class_beg
1833 || *class_end != ':' || class_end[1] != ']')
1834 goto not_a_class_name_multibyte;
1835
1836 memcpy (class_name, class_beg, class_end - class_beg);
1837 class_name[class_end - class_beg] = 0;
1838
1839 cc = re_wctype (class_name);
1840 if (cc == 0) 1794 if (cc == 0)
1841 error ("Invalid ISO C character class"); 1795 error ("Invalid ISO C character class");
1842 1796 if (cc != -1)
1843 iso_classes = Fcons (make_number (cc), iso_classes); 1797 {
1844 1798 iso_classes = Fcons (make_number (cc), iso_classes);
1845 i_byte = class_end + 2 - str; 1799 i_byte = ch - str;
1846 continue; 1800 continue;
1801 }
1847 } 1802 }
1848 1803
1849 not_a_class_name_multibyte: 1804 if (leading_code== '\\')
1850 if (c == '\\')
1851 { 1805 {
1852 if (i_byte == size_byte) 1806 if (++i_byte == size_byte)
1853 break; 1807 break;
1854 1808
1855 leading_code = str[i_byte]; 1809 leading_code = str[i_byte];
1856 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1857 i_byte += len;
1858 } 1810 }
1811 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1812 i_byte += len;
1813
1814
1859 /* Treat `-' as range character only if another character 1815 /* Treat `-' as range character only if another character
1860 follows. */ 1816 follows. */
1861 if (i_byte + 1 < size_byte 1817 if (i_byte + 1 < size_byte