(regex_compile): Substitute whitespace_regexp for spaces, if it is nonzero.

(whitespace_regexp): New variable. (re_set_whitespace_regexp): New function.
author: Richard M. Stallman 2004-11-19 19:36:09 +0000
committer: Richard M. Stallman 2004-11-19 19:36:09 +0000
commit: f9b0fd9964ef94bce907db72a826df1347a9ffed (patch)
tree: 7f1c68a41081d8c324facd0ba6353859f57a79f6 /src
parent: e1c1c5a7f4b92cadec79150bd55a96f33ad0742a (diff)
download: emacs-f9b0fd9964ef94bce907db72a826df1347a9ffed.tar.gz
emacs-f9b0fd9964ef94bce907db72a826df1347a9ffed.zip
1 files changed, 71 insertions, 2 deletions
diff --git a/src/regex.c b/src/regex.c
index 36787238a32..459c0ab26b1 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -1250,7 +1250,7 @@ reg_syntax_t re_syntax_options;
 reg_syntax_t
 re_set_syntax (syntax)
-    reg_syntax_t syntax;
+     reg_syntax_t syntax;
 {
  reg_syntax_t ret = re_syntax_options;
@@ -1258,6 +1258,17 @@ re_set_syntax (syntax)
  return ret;
 }
 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
+/* Regexp to use to replace spaces, or NULL meaning don't.  */
+static re_char *whitespace_regexp;
+void
+re_set_whitespace_regexp (regexp)
+     re_char *regexp;
+{
+  whitespace_regexp = regexp;
+}
+WEAK_ALIAS (__re_set_syntax, re_set_syntax)
 /* This table gives an error message for each of the error codes listed
   in regex.h.  Obviously the order here has to be same as there.
@@ -2436,6 +2447,15 @@ regex_compile (pattern, size, syntax, bufp)
  /* If the object matched can contain multibyte characters.  */
  const boolean multibyte = RE_MULTIBYTE_P (bufp);
+  /* Nonzero if we have pushed down into a subpattern.  */
+  int in_subpattern = 0;
+  /* These hold the values of p, pattern, and pend from the main
+     pattern when we have pushed into a subpattern.  */
+  re_char *main_p;
+  re_char *main_pattern;
+  re_char *main_pend;
 #ifdef DEBUG
  debug++;
  DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -2498,12 +2518,61 @@ regex_compile (pattern, size, syntax, bufp)
  begalt = b = bufp->buffer;
  /* Loop through the uncompiled pattern until we're at the end.  */
-  while (p != pend)
+  while (1)
    {
+      if (p == pend)
+        {
+          /* If this is the end of an included regexp,
+             pop back to the main regexp and try again.  */
+          if (in_subpattern)
+            {
+              in_subpattern = 0;
+              pattern = main_pattern;
+              p = main_p;
+              pend = main_pend;
+              continue;
+            }
+          /* If this is the end of the main regexp, we are done.  */
+          break;
+        }
      PATFETCH (c);
      switch (c)
        {
+        case ' ':
+          {
+            re_char *p1 = p;
+            /* If there's no special whitespace regexp, treat
+               spaces normally.  */
+            if (!whitespace_regexp)
+              goto normal_char;
+            /* Peek past following spaces.  */
+            while (p1 != pend)
+              {
+                if (*p1 != ' ')
+                  break;
+                p1++;
+              }
+            /* If the spaces are followed by a repetition op,
+               treat them normally.  */
+            if (p1 == pend
+                || (*p1 == '*' || *p1 == '+' || *p1 == '?'
+                    || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
+              goto normal_char;
+            /* Replace the spaces with the whitespace regexp.  */
+            in_subpattern = 1;
+            main_p = p1;
+            main_pend = pend;
+            main_pattern = pattern;
+            p = pattern = whitespace_regexp;
+            pend = p + strlen (p);
+            break;
+          }    
        case '^':
          {
            if (   /* If at start of pattern, it's an operator.  */
author	Richard M. Stallman	2004-11-19 19:36:09 +0000
committer	Richard M. Stallman	2004-11-19 19:36:09 +0000
commit	f9b0fd9964ef94bce907db72a826df1347a9ffed (patch)
tree	7f1c68a41081d8c324facd0ba6353859f57a79f6 /src
parent	e1c1c5a7f4b92cadec79150bd55a96f33ad0742a (diff)
download	emacs-f9b0fd9964ef94bce907db72a826df1347a9ffed.tar.gz emacs-f9b0fd9964ef94bce907db72a826df1347a9ffed.zip

diff --git a/src/regex.c b/src/regex.c index 36787238a32..459c0ab26b1 100644 --- a/src/regex.c +++ b/src/regex.c
@@ -1250,7 +1250,7 @@ reg_syntax_t re_syntax_options;
1250		1250
1251	reg_syntax_t	1251	reg_syntax_t
1252	re_set_syntax (syntax)	1252	re_set_syntax (syntax)
1253	reg_syntax_t syntax;	1253	reg_syntax_t syntax;
1254	{	1254	{
1255	reg_syntax_t ret = re_syntax_options;	1255	reg_syntax_t ret = re_syntax_options;
1256		1256
@@ -1258,6 +1258,17 @@ re_set_syntax (syntax)
1258	return ret;	1258	return ret;
1259	}	1259	}
1260	WEAK_ALIAS (__re_set_syntax, re_set_syntax)	1260	WEAK_ALIAS (__re_set_syntax, re_set_syntax)
		1261
		1262	/* Regexp to use to replace spaces, or NULL meaning don't. */
		1263	static re_char *whitespace_regexp;
		1264
		1265	void
		1266	re_set_whitespace_regexp (regexp)
		1267	re_char *regexp;
		1268	{
		1269	whitespace_regexp = regexp;
		1270	}
		1271	WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1261		1272
1262	/* This table gives an error message for each of the error codes listed	1273	/* This table gives an error message for each of the error codes listed
1263	in regex.h. Obviously the order here has to be same as there.	1274	in regex.h. Obviously the order here has to be same as there.
@@ -2436,6 +2447,15 @@ regex_compile (pattern, size, syntax, bufp)
2436	/* If the object matched can contain multibyte characters. */	2447	/* If the object matched can contain multibyte characters. */
2437	const boolean multibyte = RE_MULTIBYTE_P (bufp);	2448	const boolean multibyte = RE_MULTIBYTE_P (bufp);
2438		2449
		2450	/* Nonzero if we have pushed down into a subpattern. */
		2451	int in_subpattern = 0;
		2452
		2453	/* These hold the values of p, pattern, and pend from the main
		2454	pattern when we have pushed into a subpattern. */
		2455	re_char *main_p;
		2456	re_char *main_pattern;
		2457	re_char *main_pend;
		2458
2439	#ifdef DEBUG	2459	#ifdef DEBUG
2440	debug++;	2460	debug++;
2441	DEBUG_PRINT1 ("\nCompiling pattern: ");	2461	DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -2498,12 +2518,61 @@ regex_compile (pattern, size, syntax, bufp)
2498	begalt = b = bufp->buffer;	2518	begalt = b = bufp->buffer;
2499		2519
2500	/* Loop through the uncompiled pattern until we're at the end. */	2520	/* Loop through the uncompiled pattern until we're at the end. */
2501	while (p != pend)	2521	while (1)
2502	{	2522	{
		2523	if (p == pend)
		2524	{
		2525	/* If this is the end of an included regexp,
		2526	pop back to the main regexp and try again. */
		2527	if (in_subpattern)
		2528	{
		2529	in_subpattern = 0;
		2530	pattern = main_pattern;
		2531	p = main_p;
		2532	pend = main_pend;
		2533	continue;
		2534	}
		2535	/* If this is the end of the main regexp, we are done. */
		2536	break;
		2537	}
		2538
2503	PATFETCH (c);	2539	PATFETCH (c);
2504		2540
2505	switch (c)	2541	switch (c)
2506	{	2542	{
		2543	case ' ':
		2544	{
		2545	re_char *p1 = p;
		2546
		2547	/* If there's no special whitespace regexp, treat
		2548	spaces normally. */
		2549	if (!whitespace_regexp)
		2550	goto normal_char;
		2551
		2552	/* Peek past following spaces. */
		2553	while (p1 != pend)
		2554	{
		2555	if (*p1 != ' ')
		2556	break;
		2557	p1++;
		2558	}
		2559	/* If the spaces are followed by a repetition op,
		2560	treat them normally. */
		2561	if (p1 == pend
		2562	\|\| (p1 == '' \|\| p1 == '+' \|\| p1 == '?'
		2563	\|\| (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
		2564	goto normal_char;
		2565
		2566	/* Replace the spaces with the whitespace regexp. */
		2567	in_subpattern = 1;
		2568	main_p = p1;
		2569	main_pend = pend;
		2570	main_pattern = pattern;
		2571	p = pattern = whitespace_regexp;
		2572	pend = p + strlen (p);
		2573	break;
		2574	}
		2575
2507	case '^':	2576	case '^':
2508	{	2577	{
2509	if ( /* If at start of pattern, it's an operator. */	2578	if ( /* If at start of pattern, it's an operator. */