aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2023-07-27 11:36:00 +0300
committerEli Zaretskii2023-07-27 11:36:00 +0300
commit2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a (patch)
tree2321e4abe8ab2ad3d52ec5abf55304584892ee59
parentb936ff0963e69d30d77cec5323a95bc2385cf212 (diff)
downloademacs-2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a.tar.gz
emacs-2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a.zip
Support Posix-standard pax extended header in tar files
* lisp/tar-mode.el (pax-extended-attribute-record-regexp) (tar-attr-vector): New variables. (pax-gid-index, pax-gname-index, pax-linkpath-index) (pax-mtime-index, pax-path-index, pax-size-index, pax-uid-index) (pax-uname-index): New constants. (pax-header-gid, pax-header-gname, pax-header-linkpath) (pax-header-mtime, pax-header-path, pax-header-size) (pax-header-uid, pax-header-uname): New accessors to pax header. (pax-decode-string, tar-parse-pax-extended-header): New functions. (tar-header-block-tokenize): Recognize and handle Posix-standard pax extended header, and use its attributes instead of those in the standard tar header. (Bug#64686)
-rw-r--r--lisp/tar-mode.el260
1 files changed, 199 insertions, 61 deletions
diff --git a/lisp/tar-mode.el b/lisp/tar-mode.el
index c9206028e94..4e9843123b0 100644
--- a/lisp/tar-mode.el
+++ b/lisp/tar-mode.el
@@ -215,6 +215,99 @@ Preserve the modified states of the buffers and set `tar-data-swapped'."
215 "Round S up to the next multiple of 512." 215 "Round S up to the next multiple of 512."
216 (ash (ash (+ s 511) -9) 9)) 216 (ash (ash (+ s 511) -9) 9))
217 217
218;; Reference:
219;; https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_02
220(defconst pax-extended-attribute-record-regexp
221 ;; We omit attributes that are "reserved" by Posix, since no
222 ;; processing has been defined for them.
223 "\\([0-9]+\\) \\(gid\\|gname\\|hdrcharset\\|linkpath\\|mtime\\|path\\|size\\|uid\\|uname\\)="
224 "Regular expression for looking up extended attributes in a
225Posix-standard pax extended header of a tar file.
226Only attributes that `tar-mode' can grok are mentioned.")
227
228(defconst pax-gid-index 0)
229(defconst pax-gname-index 1)
230(defconst pax-linkpath-index 2)
231(defconst pax-mtime-index 3)
232(defconst pax-path-index 4)
233(defconst pax-size-index 5)
234(defconst pax-uid-index 6)
235(defconst pax-uname-index 7)
236(defsubst pax-header-gid (attr-vec)
237 (aref attr-vec pax-gid-index))
238(defsubst pax-header-gname (attr-vec)
239 (aref attr-vec pax-gname-index))
240(defsubst pax-header-linkpath (attr-vec)
241 (aref attr-vec pax-linkpath-index))
242(defsubst pax-header-mtime (attr-vec)
243 (aref attr-vec pax-mtime-index))
244(defsubst pax-header-path (attr-vec)
245 (aref attr-vec pax-path-index))
246(defsubst pax-header-size (attr-vec)
247 (aref attr-vec pax-size-index))
248(defsubst pax-header-uid (attr-vec)
249 (aref attr-vec pax-uid-index))
250(defsubst pax-header-uname (attr-vec)
251 (aref attr-vec pax-uid-index))
252
253(defsubst pax-decode-string (str coding)
254 (if str
255 (decode-coding-string str coding)
256 str))
257
258(defvar tar-attr-vector '[nil nil nil nil nil nil nil nil])
259(defun tar-parse-pax-extended-header (pos)
260 "Parse a pax external header of a Posix-format tar file."
261 (let ((end (+ pos 512))
262 (result tar-attr-vector)
263 (coding 'utf-8-unix)
264 attr value record-len value-len)
265 (dotimes (i 8)
266 (aset result i nil))
267 (goto-char pos)
268 (while (and (< pos end)
269 (re-search-forward pax-extended-attribute-record-regexp
270 end 'move))
271 (setq record-len (string-to-number (match-string 1))
272 attr (match-string 2)
273 value-len (- record-len
274 (length (match-string 1))
275 1
276 (length (match-string 2))
277 2)
278 value (buffer-substring (point) (+ (point) value-len)))
279 (setq pos (goto-char (+ (point) value-len 1)))
280 (cond
281 ((equal attr "gid")
282 (aset result pax-gid-index value))
283 ((equal attr "gname")
284 (aset result pax-gname-index value))
285 ((equal attr "linkpath")
286 (aset result pax-linkpath-index value))
287 ((equal attr "mtime")
288 (aset result pax-mtime-index (string-to-number value)))
289 ((equal attr "path")
290 (aset result pax-path-index value))
291 ((equal attr "size")
292 (aset result pax-size-index value))
293 ((equal attr "uid")
294 (aset result pax-uid-index value))
295 ((equal attr "uname")
296 (aset result pax-uname-index value))
297 ((equal attr "hdrcharset")
298 (setq coding (if (equal value "BINARY") 'no-conversion 'utf-8-unix))))
299 (setq pos (+ pos (skip-chars-forward "\000"))))
300 ;; Decode string-valued attributes.
301 (aset result pax-gname-index
302 (pax-decode-string (aref result pax-gname-index) coding))
303 (aset result pax-linkpath-index
304 (pax-decode-string (aref result pax-linkpath-index) coding))
305 (aset result pax-path-index
306 (pax-decode-string (aref result pax-path-index) coding))
307 (aset result pax-uname-index
308 (pax-decode-string (aref result pax-uname-index) coding))
309 result))
310
218(defun tar-header-block-tokenize (pos coding &optional disable-slash) 311(defun tar-header-block-tokenize (pos coding &optional disable-slash)
219 "Return a `tar-header' structure. 312 "Return a `tar-header' structure.
220This is a list of name, mode, uid, gid, size, 313This is a list of name, mode, uid, gid, size,
@@ -271,67 +364,112 @@ of the file header. This is used for \"old GNU\" Tar format."
271 (if (and (null link-p) (null disable-slash) (string-match "/\\'" name)) 364 (if (and (null link-p) (null disable-slash) (string-match "/\\'" name))
272 (setq link-p 5)) ; directory 365 (setq link-p 5)) ; directory
273 366
274 (if (and (equal name "././@LongLink") 367 (if (member magic-str '("ustar " "ustar\0"))
275 ;; Supposedly @LongLink is only used for GNUTAR 368 (if (equal name "././@LongLink")
276 ;; format (i.e. "ustar ") but some POSIX Tar files 369 ;; Supposedly @LongLink is only used for GNUTAR
277 ;; (with "ustar\0") have been seen using it as well. 370 ;; format (i.e. "ustar ") but some POSIX Tar files
278 (member magic-str '("ustar " "ustar\0"))) 371 ;; (with "ustar\0") have been seen using it as well.
279 ;; This is a GNU Tar long-file-name header. 372 ;; This is a GNU Tar long-file-name header.
280 (let* ((size (tar-parse-octal-integer 373 (let* ((size (tar-parse-octal-integer
281 string tar-size-offset tar-time-offset)) 374 string tar-size-offset tar-time-offset))
282 ;; The long name is in the next 512-byte block. 375 ;; The long name is in the next 512-byte block.
283 ;; We've already moved POS there, when we computed 376 ;; We've already moved POS there, when we
284 ;; STRING above. 377 ;; computed STRING above.
285 (name (decode-coding-string 378 (name (decode-coding-string
286 ;; -1 so as to strip the terminating 0 byte. 379 ;; -1 so as to strip the terminating 0 byte.
287 (buffer-substring pos (+ pos size -1)) coding)) 380 (buffer-substring pos (+ pos size -1)) coding))
288 ;; Tokenize the header of the _real_ file entry, 381 ;; Tokenize the header of the _real_ file entry,
289 ;; which is further 512 bytes into the archive. 382 ;; which is further 512 bytes into the archive.
290 (descriptor (tar-header-block-tokenize 383 (descriptor (tar-header-block-tokenize
291 (+ pos (tar-roundup-512 size)) coding 384 (+ pos (tar-roundup-512 size)) coding
292 ;; Don't intuit directories from 385 ;; Don't intuit directories from
293 ;; the trailing slash, because the 386 ;; the trailing slash, because the
294 ;; truncated name might by chance end 387 ;; truncated name might by chance end
295 ;; in a slash. 388 ;; in a slash.
296 'ignore-trailing-slash))) 389 'ignore-trailing-slash)))
297 ;; Fix the descriptor of the real file entry by using 390 ;; Fix the descriptor of the real file entry by using
298 ;; the information from the long name entry. 391 ;; the information from the long name entry.
299 (cond 392 (cond
300 ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME. 393 ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME.
301 (setf (tar-header-name descriptor) name)) 394 (setf (tar-header-name descriptor) name))
302 ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK. 395 ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK.
303 (setf (tar-header-link-name descriptor) name)) 396 (setf (tar-header-link-name descriptor) name))
304 (t 397 (t
305 (message "Unrecognized GNU Tar @LongLink format"))) 398 (message "Unrecognized GNU Tar @LongLink format")))
306 ;; Fix the "link-type" attribute, based on the long name. 399 ;; Fix the "link-type" attribute, based on the long name.
307 (if (and (null (tar-header-link-type descriptor)) 400 (if (and (null (tar-header-link-type descriptor))
308 (string-match "/\\'" name)) 401 (string-match "/\\'" name))
309 (setf (tar-header-link-type descriptor) 5)) ; directory 402 (setf (tar-header-link-type descriptor) 5)) ; directory
310 (setf (tar-header-header-start descriptor) 403 (setf (tar-header-header-start descriptor)
311 (copy-marker (- pos 512) t)) 404 (copy-marker (- pos 512) t))
312 descriptor) 405 descriptor)
313 406 ;; Posix pax extended header. FIXME: support ?g as well.
314 (make-tar-header 407 (if (eq link-p (- ?x ?0))
315 (copy-marker pos nil) 408 ;; Get whatever attributes are in the extended header,
316 name 409 (let* ((pax-attrs (tar-parse-pax-extended-header pos))
317 (tar-parse-octal-integer string tar-mode-offset tar-uid-offset) 410 (gid (pax-header-gid pax-attrs))
318 (tar-parse-octal-integer string tar-uid-offset tar-gid-offset) 411 (gname (pax-header-gname pax-attrs))
319 (tar-parse-octal-integer string tar-gid-offset tar-size-offset) 412 (linkpath (pax-header-linkpath pax-attrs))
320 (tar-parse-octal-integer string tar-size-offset tar-time-offset) 413 (mtime (pax-header-mtime pax-attrs))
321 (tar-parse-octal-integer string tar-time-offset tar-chk-offset) 414 (path (pax-header-path pax-attrs))
322 (tar-parse-octal-integer string tar-chk-offset tar-linkp-offset) 415 (size (pax-header-size pax-attrs))
323 link-p 416 (uid (pax-header-uid pax-attrs))
324 linkname 417 (uname (pax-header-uname pax-attrs))
325 uname-valid-p 418 ;; Tokenize the header of the _real_ file entry,
326 (when uname-valid-p 419 ;; which is further 512 bytes into the archive.
327 (decode-coding-string 420 (descriptor
328 (substring string tar-uname-offset uname-end) coding)) 421 (tar-header-block-tokenize (+ pos 512) coding
329 (when uname-valid-p 422 'ignore-trailing-slash)))
330 (decode-coding-string 423 ;; Fix the descriptor of the real file entry by
331 (substring string tar-gname-offset gname-end) coding)) 424 ;; overriding some of the fields with the information
332 (tar-parse-octal-integer string tar-dmaj-offset tar-dmin-offset) 425 ;; from the extended header.
333 (tar-parse-octal-integer string tar-dmin-offset tar-prefix-offset) 426 (if gid
334 )))))) 427 (setf (tar-header-gid descriptor) gid))
428 (if gname
429 (setf (tar-header-gname descriptor) gname))
430 (if linkpath
431 (setf (tar-header-link-name descriptor) linkpath))
432 (if mtime
433 (setf (tar-header-date descriptor) mtime))
434 (if path
435 (setf (tar-header-name descriptor) path))
436 (if size
437 (setf (tar-header-size descriptor) size))
438 (if uid
439 (setf (tar-header-uid descriptor) uid))
440 (if uname
441 (setf (tar-header-uname descriptor) uname))
442 descriptor)
443
444 (make-tar-header
445 (copy-marker pos nil)
446 name
447 (tar-parse-octal-integer string tar-mode-offset
448 tar-uid-offset)
449 (tar-parse-octal-integer string tar-uid-offset
450 tar-gid-offset)
451 (tar-parse-octal-integer string tar-gid-offset
452 tar-size-offset)
453 (tar-parse-octal-integer string tar-size-offset
454 tar-time-offset)
455 (tar-parse-octal-integer string tar-time-offset
456 tar-chk-offset)
457 (tar-parse-octal-integer string tar-chk-offset
458 tar-linkp-offset)
459 link-p
460 linkname
461 uname-valid-p
462 (when uname-valid-p
463 (decode-coding-string
464 (substring string tar-uname-offset uname-end) coding))
465 (when uname-valid-p
466 (decode-coding-string
467 (substring string tar-gname-offset gname-end) coding))
468 (tar-parse-octal-integer string tar-dmaj-offset
469 tar-dmin-offset)
470 (tar-parse-octal-integer string tar-dmin-offset
471 tar-prefix-offset)
472 ))))))))
335 473
336;; Pseudo-field. 474;; Pseudo-field.
337(defun tar-header-data-end (descriptor) 475(defun tar-header-data-end (descriptor)