aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2015-04-14 21:57:23 +0300
committerEli Zaretskii2015-04-14 21:57:23 +0300
commit95cee7f6a6c9332296e386ca6e6fcce3141e5d13 (patch)
treeb8be0617b508b2868d0892cddb8a7b6bd945822d
parent807a0e98f00057ae9d60ecafb5b8c0c98bc4cdb5 (diff)
downloademacs-95cee7f6a6c9332296e386ca6e6fcce3141e5d13.tar.gz
emacs-95cee7f6a6c9332296e386ca6e6fcce3141e5d13.zip
Improve the commit-msg Git hook for unibyte environments
* build-aux/git-hooks/commit-msg: Set LC_ALL=C, before running Awk in unibyte environments. (Suggested by Paul Eggert <eggert@cs.ucla.edu>.) Use a more accurate approximation to [:print:], based on UTF-8 sequences of the unprintable characters.
-rwxr-xr-xbuild-aux/git-hooks/commit-msg12
1 files changed, 9 insertions, 3 deletions
diff --git a/build-aux/git-hooks/commit-msg b/build-aux/git-hooks/commit-msg
index 6e31dbcbdbe..96613765d32 100755
--- a/build-aux/git-hooks/commit-msg
+++ b/build-aux/git-hooks/commit-msg
@@ -36,8 +36,11 @@ at_sign=`$awk "$print_at_sign" </dev/null 2>/dev/null`
36if test "$at_sign" != @; then 36if test "$at_sign" != @; then
37 at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null` 37 at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null`
38 if test "$at_sign" = @; then 38 if test "$at_sign" = @; then
39 LC_ALL=en_US.UTF-8; export LC_ALL 39 LC_ALL=en_US.UTF-8
40 else
41 LC_ALL=C
40 fi 42 fi
43 export LC_ALL
41fi 44fi
42 45
43# Check the log entry. 46# Check the log entry.
@@ -45,10 +48,13 @@ exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" '
45 BEGIN { 48 BEGIN {
46 # These regular expressions assume traditional Unix unibyte behavior. 49 # These regular expressions assume traditional Unix unibyte behavior.
47 # They are needed for old or broken versions of awk, e.g., 50 # They are needed for old or broken versions of awk, e.g.,
48 # mawk 1.3.3 (1996), or gawk on MSYS (2015). 51 # mawk 1.3.3 (1996), or gawk on MSYS (2015), and/or for systems that
52 # cannot use UTF-8 as the codeset for the locale.
49 space = "[ \f\n\r\t\v]" 53 space = "[ \f\n\r\t\v]"
50 non_space = "[^ \f\n\r\t\v]" 54 non_space = "[^ \f\n\r\t\v]"
51 non_print = "[\1-\37\177]" 55 # The non_print below rejects control characters and surrogates
56 # UTF-8 for: 0x01-0x1f 0x7f 0x80-0x9f 0xd800-0xdbff 0xdc00-0xdfff
57 non_print = "[\1-\37\177]|\302[\200-\237]|\355[\240-\277][\200-\277]"
52 58
53 # Prefer POSIX regular expressions if available, as they do a 59 # Prefer POSIX regular expressions if available, as they do a
54 # better job of checking. Similarly, prefer POSIX negated 60 # better job of checking. Similarly, prefer POSIX negated