diff options
| author | Eli Zaretskii | 2023-07-27 11:36:00 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2023-07-27 11:36:00 +0300 |
| commit | 2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a (patch) | |
| tree | 2321e4abe8ab2ad3d52ec5abf55304584892ee59 | |
| parent | b936ff0963e69d30d77cec5323a95bc2385cf212 (diff) | |
| download | emacs-2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a.tar.gz emacs-2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a.zip | |
Support Posix-standard pax extended header in tar files
* lisp/tar-mode.el (pax-extended-attribute-record-regexp)
(tar-attr-vector): New variables.
(pax-gid-index, pax-gname-index, pax-linkpath-index)
(pax-mtime-index, pax-path-index, pax-size-index, pax-uid-index)
(pax-uname-index): New constants.
(pax-header-gid, pax-header-gname, pax-header-linkpath)
(pax-header-mtime, pax-header-path, pax-header-size)
(pax-header-uid, pax-header-uname): New accessors to pax header.
(pax-decode-string, tar-parse-pax-extended-header): New functions.
(tar-header-block-tokenize): Recognize and handle Posix-standard
pax extended header, and use its attributes instead of those in
the standard tar header. (Bug#64686)
| -rw-r--r-- | lisp/tar-mode.el | 260 |
1 files changed, 199 insertions, 61 deletions
diff --git a/lisp/tar-mode.el b/lisp/tar-mode.el index c9206028e94..4e9843123b0 100644 --- a/lisp/tar-mode.el +++ b/lisp/tar-mode.el | |||
| @@ -215,6 +215,99 @@ Preserve the modified states of the buffers and set `tar-data-swapped'." | |||
| 215 | "Round S up to the next multiple of 512." | 215 | "Round S up to the next multiple of 512." |
| 216 | (ash (ash (+ s 511) -9) 9)) | 216 | (ash (ash (+ s 511) -9) 9)) |
| 217 | 217 | ||
| 218 | ;; Reference: | ||
| 219 | ;; https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_02 | ||
| 220 | (defconst pax-extended-attribute-record-regexp | ||
| 221 | ;; We omit attributes that are "reserved" by Posix, since no | ||
| 222 | ;; processing has been defined for them. | ||
| 223 | "\\([0-9]+\\) \\(gid\\|gname\\|hdrcharset\\|linkpath\\|mtime\\|path\\|size\\|uid\\|uname\\)=" | ||
| 224 | "Regular expression for looking up extended attributes in a | ||
| 225 | Posix-standard pax extended header of a tar file. | ||
| 226 | Only attributes that `tar-mode' can grok are mentioned.") | ||
| 227 | |||
| 228 | (defconst pax-gid-index 0) | ||
| 229 | (defconst pax-gname-index 1) | ||
| 230 | (defconst pax-linkpath-index 2) | ||
| 231 | (defconst pax-mtime-index 3) | ||
| 232 | (defconst pax-path-index 4) | ||
| 233 | (defconst pax-size-index 5) | ||
| 234 | (defconst pax-uid-index 6) | ||
| 235 | (defconst pax-uname-index 7) | ||
| 236 | (defsubst pax-header-gid (attr-vec) | ||
| 237 | (aref attr-vec pax-gid-index)) | ||
| 238 | (defsubst pax-header-gname (attr-vec) | ||
| 239 | (aref attr-vec pax-gname-index)) | ||
| 240 | (defsubst pax-header-linkpath (attr-vec) | ||
| 241 | (aref attr-vec pax-linkpath-index)) | ||
| 242 | (defsubst pax-header-mtime (attr-vec) | ||
| 243 | (aref attr-vec pax-mtime-index)) | ||
| 244 | (defsubst pax-header-path (attr-vec) | ||
| 245 | (aref attr-vec pax-path-index)) | ||
| 246 | (defsubst pax-header-size (attr-vec) | ||
| 247 | (aref attr-vec pax-size-index)) | ||
| 248 | (defsubst pax-header-uid (attr-vec) | ||
| 249 | (aref attr-vec pax-uid-index)) | ||
| 250 | (defsubst pax-header-uname (attr-vec) | ||
| 251 | (aref attr-vec pax-uid-index)) | ||
| 252 | |||
| 253 | (defsubst pax-decode-string (str coding) | ||
| 254 | (if str | ||
| 255 | (decode-coding-string str coding) | ||
| 256 | str)) | ||
| 257 | |||
| 258 | (defvar tar-attr-vector '[nil nil nil nil nil nil nil nil]) | ||
| 259 | (defun tar-parse-pax-extended-header (pos) | ||
| 260 | "Parse a pax external header of a Posix-format tar file." | ||
| 261 | (let ((end (+ pos 512)) | ||
| 262 | (result tar-attr-vector) | ||
| 263 | (coding 'utf-8-unix) | ||
| 264 | attr value record-len value-len) | ||
| 265 | (dotimes (i 8) | ||
| 266 | (aset result i nil)) | ||
| 267 | (goto-char pos) | ||
| 268 | (while (and (< pos end) | ||
| 269 | (re-search-forward pax-extended-attribute-record-regexp | ||
| 270 | end 'move)) | ||
| 271 | (setq record-len (string-to-number (match-string 1)) | ||
| 272 | attr (match-string 2) | ||
| 273 | value-len (- record-len | ||
| 274 | (length (match-string 1)) | ||
| 275 | 1 | ||
| 276 | (length (match-string 2)) | ||
| 277 | 2) | ||
| 278 | value (buffer-substring (point) (+ (point) value-len))) | ||
| 279 | (setq pos (goto-char (+ (point) value-len 1))) | ||
| 280 | (cond | ||
| 281 | ((equal attr "gid") | ||
| 282 | (aset result pax-gid-index value)) | ||
| 283 | ((equal attr "gname") | ||
| 284 | (aset result pax-gname-index value)) | ||
| 285 | ((equal attr "linkpath") | ||
| 286 | (aset result pax-linkpath-index value)) | ||
| 287 | ((equal attr "mtime") | ||
| 288 | (aset result pax-mtime-index (string-to-number value))) | ||
| 289 | ((equal attr "path") | ||
| 290 | (aset result pax-path-index value)) | ||
| 291 | ((equal attr "size") | ||
| 292 | (aset result pax-size-index value)) | ||
| 293 | ((equal attr "uid") | ||
| 294 | (aset result pax-uid-index value)) | ||
| 295 | ((equal attr "uname") | ||
| 296 | (aset result pax-uname-index value)) | ||
| 297 | ((equal attr "hdrcharset") | ||
| 298 | (setq coding (if (equal value "BINARY") 'no-conversion 'utf-8-unix)))) | ||
| 299 | (setq pos (+ pos (skip-chars-forward "\000")))) | ||
| 300 | ;; Decode string-valued attributes. | ||
| 301 | (aset result pax-gname-index | ||
| 302 | (pax-decode-string (aref result pax-gname-index) coding)) | ||
| 303 | (aset result pax-linkpath-index | ||
| 304 | (pax-decode-string (aref result pax-linkpath-index) coding)) | ||
| 305 | (aset result pax-path-index | ||
| 306 | (pax-decode-string (aref result pax-path-index) coding)) | ||
| 307 | (aset result pax-uname-index | ||
| 308 | (pax-decode-string (aref result pax-uname-index) coding)) | ||
| 309 | result)) | ||
| 310 | |||
| 218 | (defun tar-header-block-tokenize (pos coding &optional disable-slash) | 311 | (defun tar-header-block-tokenize (pos coding &optional disable-slash) |
| 219 | "Return a `tar-header' structure. | 312 | "Return a `tar-header' structure. |
| 220 | This is a list of name, mode, uid, gid, size, | 313 | This is a list of name, mode, uid, gid, size, |
| @@ -271,67 +364,112 @@ of the file header. This is used for \"old GNU\" Tar format." | |||
| 271 | (if (and (null link-p) (null disable-slash) (string-match "/\\'" name)) | 364 | (if (and (null link-p) (null disable-slash) (string-match "/\\'" name)) |
| 272 | (setq link-p 5)) ; directory | 365 | (setq link-p 5)) ; directory |
| 273 | 366 | ||
| 274 | (if (and (equal name "././@LongLink") | 367 | (if (member magic-str '("ustar " "ustar\0")) |
| 275 | ;; Supposedly @LongLink is only used for GNUTAR | 368 | (if (equal name "././@LongLink") |
| 276 | ;; format (i.e. "ustar ") but some POSIX Tar files | 369 | ;; Supposedly @LongLink is only used for GNUTAR |
| 277 | ;; (with "ustar\0") have been seen using it as well. | 370 | ;; format (i.e. "ustar ") but some POSIX Tar files |
| 278 | (member magic-str '("ustar " "ustar\0"))) | 371 | ;; (with "ustar\0") have been seen using it as well. |
| 279 | ;; This is a GNU Tar long-file-name header. | 372 | ;; This is a GNU Tar long-file-name header. |
| 280 | (let* ((size (tar-parse-octal-integer | 373 | (let* ((size (tar-parse-octal-integer |
| 281 | string tar-size-offset tar-time-offset)) | 374 | string tar-size-offset tar-time-offset)) |
| 282 | ;; The long name is in the next 512-byte block. | 375 | ;; The long name is in the next 512-byte block. |
| 283 | ;; We've already moved POS there, when we computed | 376 | ;; We've already moved POS there, when we |
| 284 | ;; STRING above. | 377 | ;; computed STRING above. |
| 285 | (name (decode-coding-string | 378 | (name (decode-coding-string |
| 286 | ;; -1 so as to strip the terminating 0 byte. | 379 | ;; -1 so as to strip the terminating 0 byte. |
| 287 | (buffer-substring pos (+ pos size -1)) coding)) | 380 | (buffer-substring pos (+ pos size -1)) coding)) |
| 288 | ;; Tokenize the header of the _real_ file entry, | 381 | ;; Tokenize the header of the _real_ file entry, |
| 289 | ;; which is further 512 bytes into the archive. | 382 | ;; which is further 512 bytes into the archive. |
| 290 | (descriptor (tar-header-block-tokenize | 383 | (descriptor (tar-header-block-tokenize |
| 291 | (+ pos (tar-roundup-512 size)) coding | 384 | (+ pos (tar-roundup-512 size)) coding |
| 292 | ;; Don't intuit directories from | 385 | ;; Don't intuit directories from |
| 293 | ;; the trailing slash, because the | 386 | ;; the trailing slash, because the |
| 294 | ;; truncated name might by chance end | 387 | ;; truncated name might by chance end |
| 295 | ;; in a slash. | 388 | ;; in a slash. |
| 296 | 'ignore-trailing-slash))) | 389 | 'ignore-trailing-slash))) |
| 297 | ;; Fix the descriptor of the real file entry by using | 390 | ;; Fix the descriptor of the real file entry by using |
| 298 | ;; the information from the long name entry. | 391 | ;; the information from the long name entry. |
| 299 | (cond | 392 | (cond |
| 300 | ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME. | 393 | ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME. |
| 301 | (setf (tar-header-name descriptor) name)) | 394 | (setf (tar-header-name descriptor) name)) |
| 302 | ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK. | 395 | ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK. |
| 303 | (setf (tar-header-link-name descriptor) name)) | 396 | (setf (tar-header-link-name descriptor) name)) |
| 304 | (t | 397 | (t |
| 305 | (message "Unrecognized GNU Tar @LongLink format"))) | 398 | (message "Unrecognized GNU Tar @LongLink format"))) |
| 306 | ;; Fix the "link-type" attribute, based on the long name. | 399 | ;; Fix the "link-type" attribute, based on the long name. |
| 307 | (if (and (null (tar-header-link-type descriptor)) | 400 | (if (and (null (tar-header-link-type descriptor)) |
| 308 | (string-match "/\\'" name)) | 401 | (string-match "/\\'" name)) |
| 309 | (setf (tar-header-link-type descriptor) 5)) ; directory | 402 | (setf (tar-header-link-type descriptor) 5)) ; directory |
| 310 | (setf (tar-header-header-start descriptor) | 403 | (setf (tar-header-header-start descriptor) |
| 311 | (copy-marker (- pos 512) t)) | 404 | (copy-marker (- pos 512) t)) |
| 312 | descriptor) | 405 | descriptor) |
| 313 | 406 | ;; Posix pax extended header. FIXME: support ?g as well. | |
| 314 | (make-tar-header | 407 | (if (eq link-p (- ?x ?0)) |
| 315 | (copy-marker pos nil) | 408 | ;; Get whatever attributes are in the extended header, |
| 316 | name | 409 | (let* ((pax-attrs (tar-parse-pax-extended-header pos)) |
| 317 | (tar-parse-octal-integer string tar-mode-offset tar-uid-offset) | 410 | (gid (pax-header-gid pax-attrs)) |
| 318 | (tar-parse-octal-integer string tar-uid-offset tar-gid-offset) | 411 | (gname (pax-header-gname pax-attrs)) |
| 319 | (tar-parse-octal-integer string tar-gid-offset tar-size-offset) | 412 | (linkpath (pax-header-linkpath pax-attrs)) |
| 320 | (tar-parse-octal-integer string tar-size-offset tar-time-offset) | 413 | (mtime (pax-header-mtime pax-attrs)) |
| 321 | (tar-parse-octal-integer string tar-time-offset tar-chk-offset) | 414 | (path (pax-header-path pax-attrs)) |
| 322 | (tar-parse-octal-integer string tar-chk-offset tar-linkp-offset) | 415 | (size (pax-header-size pax-attrs)) |
| 323 | link-p | 416 | (uid (pax-header-uid pax-attrs)) |
| 324 | linkname | 417 | (uname (pax-header-uname pax-attrs)) |
| 325 | uname-valid-p | 418 | ;; Tokenize the header of the _real_ file entry, |
| 326 | (when uname-valid-p | 419 | ;; which is further 512 bytes into the archive. |
| 327 | (decode-coding-string | 420 | (descriptor |
| 328 | (substring string tar-uname-offset uname-end) coding)) | 421 | (tar-header-block-tokenize (+ pos 512) coding |
| 329 | (when uname-valid-p | 422 | 'ignore-trailing-slash))) |
| 330 | (decode-coding-string | 423 | ;; Fix the descriptor of the real file entry by |
| 331 | (substring string tar-gname-offset gname-end) coding)) | 424 | ;; overriding some of the fields with the information |
| 332 | (tar-parse-octal-integer string tar-dmaj-offset tar-dmin-offset) | 425 | ;; from the extended header. |
| 333 | (tar-parse-octal-integer string tar-dmin-offset tar-prefix-offset) | 426 | (if gid |
| 334 | )))))) | 427 | (setf (tar-header-gid descriptor) gid)) |
| 428 | (if gname | ||
| 429 | (setf (tar-header-gname descriptor) gname)) | ||
| 430 | (if linkpath | ||
| 431 | (setf (tar-header-link-name descriptor) linkpath)) | ||
| 432 | (if mtime | ||
| 433 | (setf (tar-header-date descriptor) mtime)) | ||
| 434 | (if path | ||
| 435 | (setf (tar-header-name descriptor) path)) | ||
| 436 | (if size | ||
| 437 | (setf (tar-header-size descriptor) size)) | ||
| 438 | (if uid | ||
| 439 | (setf (tar-header-uid descriptor) uid)) | ||
| 440 | (if uname | ||
| 441 | (setf (tar-header-uname descriptor) uname)) | ||
| 442 | descriptor) | ||
| 443 | |||
| 444 | (make-tar-header | ||
| 445 | (copy-marker pos nil) | ||
| 446 | name | ||
| 447 | (tar-parse-octal-integer string tar-mode-offset | ||
| 448 | tar-uid-offset) | ||
| 449 | (tar-parse-octal-integer string tar-uid-offset | ||
| 450 | tar-gid-offset) | ||
| 451 | (tar-parse-octal-integer string tar-gid-offset | ||
| 452 | tar-size-offset) | ||
| 453 | (tar-parse-octal-integer string tar-size-offset | ||
| 454 | tar-time-offset) | ||
| 455 | (tar-parse-octal-integer string tar-time-offset | ||
| 456 | tar-chk-offset) | ||
| 457 | (tar-parse-octal-integer string tar-chk-offset | ||
| 458 | tar-linkp-offset) | ||
| 459 | link-p | ||
| 460 | linkname | ||
| 461 | uname-valid-p | ||
| 462 | (when uname-valid-p | ||
| 463 | (decode-coding-string | ||
| 464 | (substring string tar-uname-offset uname-end) coding)) | ||
| 465 | (when uname-valid-p | ||
| 466 | (decode-coding-string | ||
| 467 | (substring string tar-gname-offset gname-end) coding)) | ||
| 468 | (tar-parse-octal-integer string tar-dmaj-offset | ||
| 469 | tar-dmin-offset) | ||
| 470 | (tar-parse-octal-integer string tar-dmin-offset | ||
| 471 | tar-prefix-offset) | ||
| 472 | )))))))) | ||
| 335 | 473 | ||
| 336 | ;; Pseudo-field. | 474 | ;; Pseudo-field. |
| 337 | (defun tar-header-data-end (descriptor) | 475 | (defun tar-header-data-end (descriptor) |