diff options
| author | Eli Zaretskii | 2015-04-14 21:57:23 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2015-04-14 21:57:23 +0300 |
| commit | 95cee7f6a6c9332296e386ca6e6fcce3141e5d13 (patch) | |
| tree | b8be0617b508b2868d0892cddb8a7b6bd945822d | |
| parent | 807a0e98f00057ae9d60ecafb5b8c0c98bc4cdb5 (diff) | |
| download | emacs-95cee7f6a6c9332296e386ca6e6fcce3141e5d13.tar.gz emacs-95cee7f6a6c9332296e386ca6e6fcce3141e5d13.zip | |
Improve the commit-msg Git hook for unibyte environments
* build-aux/git-hooks/commit-msg: Set LC_ALL=C, before running Awk
in unibyte environments. (Suggested by Paul Eggert
<eggert@cs.ucla.edu>.) Use a more accurate approximation to
[:print:], based on UTF-8 sequences of the unprintable characters.
| -rwxr-xr-x | build-aux/git-hooks/commit-msg | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/build-aux/git-hooks/commit-msg b/build-aux/git-hooks/commit-msg index 6e31dbcbdbe..96613765d32 100755 --- a/build-aux/git-hooks/commit-msg +++ b/build-aux/git-hooks/commit-msg | |||
| @@ -36,8 +36,11 @@ at_sign=`$awk "$print_at_sign" </dev/null 2>/dev/null` | |||
| 36 | if test "$at_sign" != @; then | 36 | if test "$at_sign" != @; then |
| 37 | at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null` | 37 | at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null` |
| 38 | if test "$at_sign" = @; then | 38 | if test "$at_sign" = @; then |
| 39 | LC_ALL=en_US.UTF-8; export LC_ALL | 39 | LC_ALL=en_US.UTF-8 |
| 40 | else | ||
| 41 | LC_ALL=C | ||
| 40 | fi | 42 | fi |
| 43 | export LC_ALL | ||
| 41 | fi | 44 | fi |
| 42 | 45 | ||
| 43 | # Check the log entry. | 46 | # Check the log entry. |
| @@ -45,10 +48,13 @@ exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" ' | |||
| 45 | BEGIN { | 48 | BEGIN { |
| 46 | # These regular expressions assume traditional Unix unibyte behavior. | 49 | # These regular expressions assume traditional Unix unibyte behavior. |
| 47 | # They are needed for old or broken versions of awk, e.g., | 50 | # They are needed for old or broken versions of awk, e.g., |
| 48 | # mawk 1.3.3 (1996), or gawk on MSYS (2015). | 51 | # mawk 1.3.3 (1996), or gawk on MSYS (2015), and/or for systems that |
| 52 | # cannot use UTF-8 as the codeset for the locale. | ||
| 49 | space = "[ \f\n\r\t\v]" | 53 | space = "[ \f\n\r\t\v]" |
| 50 | non_space = "[^ \f\n\r\t\v]" | 54 | non_space = "[^ \f\n\r\t\v]" |
| 51 | non_print = "[\1-\37\177]" | 55 | # The non_print below rejects control characters and surrogates |
| 56 | # UTF-8 for: 0x01-0x1f 0x7f 0x80-0x9f 0xd800-0xdbff 0xdc00-0xdfff | ||
| 57 | non_print = "[\1-\37\177]|\302[\200-\237]|\355[\240-\277][\200-\277]" | ||
| 52 | 58 | ||
| 53 | # Prefer POSIX regular expressions if available, as they do a | 59 | # Prefer POSIX regular expressions if available, as they do a |
| 54 | # better job of checking. Similarly, prefer POSIX negated | 60 | # better job of checking. Similarly, prefer POSIX negated |