aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMattias EngdegÄrd2019-06-28 10:20:55 +0200
committerMattias EngdegÄrd2019-06-28 17:30:18 +0200
commita1f76adfb03c23bb4242928e8efe6193c301f0c1 (patch)
tree7e2a5c58656ffbe78d34dc58639d7cd5bf8f943a /src
parentaae5bf4438712c9fe761c5e4b5a871192852cd97 (diff)
downloademacs-a1f76adfb03c23bb4242928e8efe6193c301f0c1.tar.gz
emacs-a1f76adfb03c23bb4242928e8efe6193c301f0c1.zip
Correct regexp matching of raw bytes
Make regexp matching of raw bytes work in all combination of unibyte and multibyte patterns and targets, as exact strings and in character alternatives (bug#3687). * src/regex-emacs.c (analyze_first): Include raw byte in fastmap when pattern is a multibyte exact string. Include leading byte in fastmap for raw bytes in character alternatives. (re_match_2_internal): Decrement the byte count by the number of bytes in the pattern character, not 1. * test/src/regex-emacs-tests.el (regexp-unibyte-unibyte) (regexp-multibyte-unibyte, regexp-unibyte-mutibyte) (regexp-multibyte-multibyte): New tests.
Diffstat (limited to 'src')
-rw-r--r--src/regex-emacs.c24
1 files changed, 20 insertions, 4 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index c353a78fb4f..5887eaa30c7 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -2794,6 +2794,7 @@ static int
2794analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte) 2794analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
2795{ 2795{
2796 int j, k; 2796 int j, k;
2797 int nbits;
2797 bool not; 2798 bool not;
2798 2799
2799 /* If all elements for base leading-codes in fastmap is set, this 2800 /* If all elements for base leading-codes in fastmap is set, this
@@ -2854,7 +2855,14 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
2854 each byte is a character. Thus, this works in both 2855 each byte is a character. Thus, this works in both
2855 cases. */ 2856 cases. */
2856 fastmap[p[1]] = 1; 2857 fastmap[p[1]] = 1;
2857 if (! multibyte) 2858 if (multibyte)
2859 {
2860 /* Cover the case of matching a raw char in a
2861 multibyte regexp against unibyte. */
2862 if (CHAR_BYTE8_HEAD_P (p[1]))
2863 fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
2864 }
2865 else
2858 { 2866 {
2859 /* For the case of matching this unibyte regex 2867 /* For the case of matching this unibyte regex
2860 against multibyte, we must set a leading code of 2868 against multibyte, we must set a leading code of
@@ -2886,11 +2894,18 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
2886 case charset: 2894 case charset:
2887 if (!fastmap) break; 2895 if (!fastmap) break;
2888 not = (re_opcode_t) *(p - 1) == charset_not; 2896 not = (re_opcode_t) *(p - 1) == charset_not;
2889 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; 2897 nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
2890 j >= 0; j--) 2898 p++;
2899 for (j = 0; j < nbits; j++)
2891 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) 2900 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
2892 fastmap[j] = 1; 2901 fastmap[j] = 1;
2893 2902
2903 /* To match raw bytes (in the 80..ff range) against multibyte
2904 strings, add their leading bytes to the fastmap. */
2905 for (j = 0x80; j < nbits; j++)
2906 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
2907 fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
2908
2894 if (/* Any leading code can possibly start a character 2909 if (/* Any leading code can possibly start a character
2895 which doesn't match the specified set of characters. */ 2910 which doesn't match the specified set of characters. */
2896 not 2911 not
@@ -4251,8 +4266,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
4251 } 4266 }
4252 p += pat_charlen; 4267 p += pat_charlen;
4253 d++; 4268 d++;
4269 mcnt -= pat_charlen;
4254 } 4270 }
4255 while (--mcnt); 4271 while (mcnt > 0);
4256 4272
4257 break; 4273 break;
4258 4274