aboutsummaryrefslogtreecommitdiffstats
path: root/src/syntax.c
diff options
context:
space:
mode:
authorMichal Nazarewicz2016-07-17 03:09:38 +0200
committerMichal Nazarewicz2016-08-02 15:39:10 +0200
commit4538a5e37e8dacde4b3e828d832c4c558a146912 (patch)
tree43a158bf0635a01bf5946730ac439fd0b3b8f606 /src/syntax.c
parente7257061317c604492d20f26f312b9e925aa1860 (diff)
downloademacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.gz
emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.zip
Refactor regex character class parsing in [:name:]
re_wctype function is used in three separate places and in all of those places almost exact code extracting the name from [:name:] surrounds it. Furthermore, re_wctype requires a NUL-terminated string, so the name of the character class is copied to a temporary buffer. The code duplication and unnecessary memory copying can be avoided by pushing the responsibility of parsing the whole [:name:] sequence to the function. Furthermore, since now the function has access to the length of the character class name (since it’s doing the parsing), it can take advantage of that information in skipping some string comparisons and using a constant-length memcmp instead of strcmp which needs to take care of NUL bytes. * src/regex.c (re_wctype): Delete function. Replace it with: (re_wctype_parse): New function which parses a whole [:name:] string and returns a RECC_* constant or -1 if the string is not of [:name:] format. (regex_compile): Use re_wctype_parse. * src/syntax.c (skip_chars): Use re_wctype_parse.
Diffstat (limited to 'src/syntax.c')
-rw-r--r--src/syntax.c96
1 files changed, 26 insertions, 70 deletions
diff --git a/src/syntax.c b/src/syntax.c
index f8d987b377c..667de402ec4 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -1691,44 +1691,22 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1691 /* At first setup fastmap. */ 1691 /* At first setup fastmap. */
1692 while (i_byte < size_byte) 1692 while (i_byte < size_byte)
1693 { 1693 {
1694 c = str[i_byte++]; 1694 if (handle_iso_classes)
1695
1696 if (handle_iso_classes && c == '['
1697 && i_byte < size_byte
1698 && str[i_byte] == ':')
1699 { 1695 {
1700 const unsigned char *class_beg = str + i_byte + 1; 1696 const unsigned char *ch = str + i_byte;
1701 const unsigned char *class_end = class_beg; 1697 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1702 const unsigned char *class_limit = str + size_byte - 2;
1703 /* Leave room for the null. */
1704 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1705 re_wctype_t cc;
1706
1707 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1708 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1709
1710 while (class_end < class_limit
1711 && *class_end >= 'a' && *class_end <= 'z')
1712 class_end++;
1713
1714 if (class_end == class_beg
1715 || *class_end != ':' || class_end[1] != ']')
1716 goto not_a_class_name;
1717
1718 memcpy (class_name, class_beg, class_end - class_beg);
1719 class_name[class_end - class_beg] = 0;
1720
1721 cc = re_wctype (class_name);
1722 if (cc == 0) 1698 if (cc == 0)
1723 error ("Invalid ISO C character class"); 1699 error ("Invalid ISO C character class");
1724 1700 if (cc != -1)
1725 iso_classes = Fcons (make_number (cc), iso_classes); 1701 {
1726 1702 iso_classes = Fcons (make_number (cc), iso_classes);
1727 i_byte = class_end + 2 - str; 1703 i_byte = ch - str;
1728 continue; 1704 continue;
1705 }
1729 } 1706 }
1730 1707
1731 not_a_class_name: 1708 c = str[i_byte++];
1709
1732 if (c == '\\') 1710 if (c == '\\')
1733 { 1711 {
1734 if (i_byte == size_byte) 1712 if (i_byte == size_byte)
@@ -1808,54 +1786,32 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1808 while (i_byte < size_byte) 1786 while (i_byte < size_byte)
1809 { 1787 {
1810 int leading_code = str[i_byte]; 1788 int leading_code = str[i_byte];
1811 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1812 i_byte += len;
1813 1789
1814 if (handle_iso_classes && c == '[' 1790 if (handle_iso_classes)
1815 && i_byte < size_byte
1816 && STRING_CHAR (str + i_byte) == ':')
1817 { 1791 {
1818 const unsigned char *class_beg = str + i_byte + 1; 1792 const unsigned char *ch = str + i_byte;
1819 const unsigned char *class_end = class_beg; 1793 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1820 const unsigned char *class_limit = str + size_byte - 2;
1821 /* Leave room for the null. */
1822 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1823 re_wctype_t cc;
1824
1825 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1826 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1827
1828 while (class_end < class_limit
1829 && *class_end >= 'a' && *class_end <= 'z')
1830 class_end++;
1831
1832 if (class_end == class_beg
1833 || *class_end != ':' || class_end[1] != ']')
1834 goto not_a_class_name_multibyte;
1835
1836 memcpy (class_name, class_beg, class_end - class_beg);
1837 class_name[class_end - class_beg] = 0;
1838
1839 cc = re_wctype (class_name);
1840 if (cc == 0) 1794 if (cc == 0)
1841 error ("Invalid ISO C character class"); 1795 error ("Invalid ISO C character class");
1842 1796 if (cc != -1)
1843 iso_classes = Fcons (make_number (cc), iso_classes); 1797 {
1844 1798 iso_classes = Fcons (make_number (cc), iso_classes);
1845 i_byte = class_end + 2 - str; 1799 i_byte = ch - str;
1846 continue; 1800 continue;
1801 }
1847 } 1802 }
1848 1803
1849 not_a_class_name_multibyte: 1804 if (leading_code== '\\')
1850 if (c == '\\')
1851 { 1805 {
1852 if (i_byte == size_byte) 1806 if (++i_byte == size_byte)
1853 break; 1807 break;
1854 1808
1855 leading_code = str[i_byte]; 1809 leading_code = str[i_byte];
1856 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1857 i_byte += len;
1858 } 1810 }
1811 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1812 i_byte += len;
1813
1814
1859 /* Treat `-' as range character only if another character 1815 /* Treat `-' as range character only if another character
1860 follows. */ 1816 follows. */
1861 if (i_byte + 1 < size_byte 1817 if (i_byte + 1 < size_byte