diff options
| author | Lars Ingebrigtsen | 2019-09-21 18:13:05 +0200 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2019-09-21 18:13:11 +0200 |
| commit | 535b65875e7e47e1fd6bec1753f687592ae600b8 (patch) | |
| tree | 4a54aee8bf3a1cff9a032d323a0a2ae4a4d5091d | |
| parent | 56985dd8a69fc2729422cf8f95efbd03ee6b021e (diff) | |
| download | emacs-535b65875e7e47e1fd6bec1753f687592ae600b8.tar.gz emacs-535b65875e7e47e1fd6bec1753f687592ae600b8.zip | |
Add an Exif parsing library
* lisp/image/exif.el: New file (bug#23070).
* test/lisp/image/exif-tests.el: Add some basic tests.
| -rw-r--r-- | etc/NEWS | 4 | ||||
| -rw-r--r-- | lisp/image/exif.el | 224 | ||||
| -rw-r--r-- | test/data/image/black.jpg | bin | 0 -> 52456 bytes | |||
| -rw-r--r-- | test/lisp/image/exif-tests.el | 44 |
4 files changed, 272 insertions, 0 deletions
| @@ -2567,6 +2567,10 @@ left to higher-level functions. | |||
| 2567 | 2567 | ||
| 2568 | ** Image mode | 2568 | ** Image mode |
| 2569 | 2569 | ||
| 2570 | *** An Exif library has been added that can parse JPEG files and | ||
| 2571 | output data about creation times and orientation and the like. | ||
| 2572 | 'exif-parse' is the main interface function. | ||
| 2573 | |||
| 2570 | *** 'image-mode' started using ImageMagick by default for all images | 2574 | *** 'image-mode' started using ImageMagick by default for all images |
| 2571 | some years back. It now respects 'imagemagick-types-inhibit' as a way | 2575 | some years back. It now respects 'imagemagick-types-inhibit' as a way |
| 2572 | to disable that. | 2576 | to disable that. |
diff --git a/lisp/image/exif.el b/lisp/image/exif.el new file mode 100644 index 00000000000..2ec256bb2ee --- /dev/null +++ b/lisp/image/exif.el | |||
| @@ -0,0 +1,224 @@ | |||
| 1 | ;;; exif.el --- parsing Exif data in JPEG images -*- lexical-binding: t -*- | ||
| 2 | |||
| 3 | ;; Copyright (C) 2019 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | ||
| 6 | ;; Keywords: images | ||
| 7 | |||
| 8 | ;; This file is part of GNU Emacs. | ||
| 9 | |||
| 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify | ||
| 11 | ;; it under the terms of the GNU General Public License as published by | ||
| 12 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 13 | ;; (at your option) any later version. | ||
| 14 | |||
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 18 | ;; GNU General Public License for more details. | ||
| 19 | |||
| 20 | ;; You should have received a copy of the GNU General Public License | ||
| 21 | ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. | ||
| 22 | |||
| 23 | ;;; Commentary: | ||
| 24 | |||
| 25 | ;; Specification at: | ||
| 26 | |||
| 27 | ;; https://www.media.mit.edu/pia/Research/deepview/exif.html | ||
| 28 | ;; but it's kinda er not very easy to read. | ||
| 29 | |||
| 30 | ;; The JPEG format is: | ||
| 31 | ;; | ||
| 32 | ;; FFD8 and then any number of chunks on the format: FFxx SSSS ..., | ||
| 33 | ;; where FFxx is the ID, and SSSS is the length of the chunk plus 2. | ||
| 34 | ;; When you get to ID FFDA, the image itself is over and you can stop | ||
| 35 | ;; parsing. | ||
| 36 | ;; | ||
| 37 | ;; The Exif data is in the TIFF format. It starts off with the six | ||
| 38 | ;; bytes "Exif^0^0". | ||
| 39 | ;; | ||
| 40 | ;; Then either "II" or "MM", where "II" means little-endian and "MM" | ||
| 41 | ;; means big-endian. All subsequent numbers should be read in | ||
| 42 | ;; according to this. | ||
| 43 | ;; | ||
| 44 | ;; Next follows two bytes that should always represent 0x2a, and then | ||
| 45 | ;; four bytes that's the offset to where the IFD "image file | ||
| 46 | ;; directory" starts. (It's an offset from the start of this chunk; | ||
| 47 | ;; i.e., where "II"/"MM" is; all offsets in the TIFF format are from | ||
| 48 | ;; this point.) | ||
| 49 | ;; | ||
| 50 | ;; The IFD starts with two bytes that says how many entries there are | ||
| 51 | ;; in the directory, and then that number of entries follows, and then | ||
| 52 | ;; an offset to the next IFD. | ||
| 53 | |||
| 54 | ;; Usage: (exif-parse "test.jpg") => | ||
| 55 | ;; ((:tag 274 :tag-name orientation :format 3 :format-type short :value 1) | ||
| 56 | ;; (:tag 282 :tag-name x-resolution :format 5 :format-type rational :value | ||
| 57 | ;; (180 . 1)) | ||
| 58 | ;; (:tag 306 :tag-name date-time :format 2 :format-type ascii | ||
| 59 | ;; :value "2019:09:21 16:22:13") | ||
| 60 | ;; ...) | ||
| 61 | |||
| 62 | ;;; Code: | ||
| 63 | |||
| 64 | (require 'cl-lib) | ||
| 65 | |||
| 66 | (defvar exif-tag-alist | ||
| 67 | '((11 processing-software) | ||
| 68 | (271 make) | ||
| 69 | (272 model) | ||
| 70 | (274 orientation) | ||
| 71 | (282 x-resolution) | ||
| 72 | (283 y-resolution) | ||
| 73 | (296 resolution-unit) | ||
| 74 | (305 software) | ||
| 75 | (306 date-time)) | ||
| 76 | "Alist of tag values and their names.") | ||
| 77 | |||
| 78 | (defun exif-parse (file) | ||
| 79 | "Parse FILE (a JPEG file) and return the Exif data, if any. | ||
| 80 | The return value is a list of Exif items." | ||
| 81 | (when-let ((app1 (cdr (assq #xffe1 (exif--parse-jpeg file))))) | ||
| 82 | (exif--parse-exif-chunk app1))) | ||
| 83 | |||
| 84 | (defun exif--parse-jpeg (file) | ||
| 85 | (with-temp-buffer | ||
| 86 | (set-buffer-multibyte nil) | ||
| 87 | (insert-file-contents-literally file) | ||
| 88 | (unless (= (exif--read-number-be 2) #xffd8) ; SOI (start of image) | ||
| 89 | (error "Not a valid JPEG file")) | ||
| 90 | (cl-loop for segment = (exif--read-number-be 2) | ||
| 91 | for size = (exif--read-number-be 2) | ||
| 92 | ;; Stop parsing when we get to SOS (start of stream); | ||
| 93 | ;; this is when the image itself starts, and there will | ||
| 94 | ;; be no more chunks of interest after that. | ||
| 95 | while (not (= segment #xffda)) | ||
| 96 | collect (cons segment (exif--read-chunk (- size 2)))))) | ||
| 97 | |||
| 98 | (defun exif--parse-exif-chunk (data) | ||
| 99 | (with-temp-buffer | ||
| 100 | (set-buffer-multibyte nil) | ||
| 101 | (insert data) | ||
| 102 | (goto-char (point-min)) | ||
| 103 | ;; The Exif data is in the APP1 JPEG chunk and starts with | ||
| 104 | ;; "Exif\0\0". | ||
| 105 | (unless (equal (exif--read-chunk 6) (string ?E ?x ?i ?f ?\0 ?\0)) | ||
| 106 | (error "Not a valid Exif chunk")) | ||
| 107 | (delete-region (point-min) (point)) | ||
| 108 | (let* ((endian-marker (exif--read-chunk 2)) | ||
| 109 | (le (cond | ||
| 110 | ;; "Morotola" is big-endian. | ||
| 111 | ((equal endian-marker "MM") | ||
| 112 | nil) | ||
| 113 | ;; "Intel" is little-endian. | ||
| 114 | ((equal endian-marker "II") | ||
| 115 | t) | ||
| 116 | (t | ||
| 117 | (error "Invalid endian-ness %s" endian-marker))))) | ||
| 118 | ;; Another magical number. | ||
| 119 | (unless (= (exif--read-number 2 le) #x002a) | ||
| 120 | (error "Invalid TIFF header length")) | ||
| 121 | (let ((offset (exif--read-number 2 le))) | ||
| 122 | ;; Jump to where the IFD (directory) starts and parse it. | ||
| 123 | (goto-char (1+ offset)) | ||
| 124 | (exif--parse-directory le))))) | ||
| 125 | |||
| 126 | (defun exif--field-format (number) | ||
| 127 | (cl-case number | ||
| 128 | (1 (cons 'byte 1)) | ||
| 129 | (2 (cons 'ascii 1)) | ||
| 130 | (3 (cons 'short 2)) | ||
| 131 | (4 (cons 'long 4)) | ||
| 132 | (5 (cons 'rational 8)) | ||
| 133 | (otherwise (cons 'unknown 1)))) | ||
| 134 | |||
| 135 | (defun exif--parse-directory (le) | ||
| 136 | (let ((dir | ||
| 137 | (cl-loop repeat (exif--read-number 2 le) | ||
| 138 | for tag = (exif--read-number 2 le) | ||
| 139 | for format = (exif--read-number 2 le) | ||
| 140 | for field-format = (exif--field-format format) | ||
| 141 | ;; The actual length is the number in this field | ||
| 142 | ;; times the "inherent" length of the field format | ||
| 143 | ;; (i.e., "long integer" (4 bytes) or "ascii" (1 | ||
| 144 | ;; byte). | ||
| 145 | for length = (* (exif--read-number 4 le) | ||
| 146 | (cdr field-format)) | ||
| 147 | for value = (exif--read-number 4 le) | ||
| 148 | collect (list :tag tag | ||
| 149 | :tag-name (cadr (assq tag exif-tag-alist)) | ||
| 150 | :format format | ||
| 151 | :format-type (car field-format) | ||
| 152 | :value (exif--process-value | ||
| 153 | (if (> length 4) | ||
| 154 | ;; If the length of the data | ||
| 155 | ;; is more than 4 bytes, then | ||
| 156 | ;; it's actually stored after | ||
| 157 | ;; this directory, and the | ||
| 158 | ;; value here is just the | ||
| 159 | ;; offset to use to find the | ||
| 160 | ;; data. | ||
| 161 | (buffer-substring | ||
| 162 | (1+ value) (+ (1+ value) length)) | ||
| 163 | ;; The value is stored | ||
| 164 | ;; directly in the directory. | ||
| 165 | value) | ||
| 166 | (car field-format) | ||
| 167 | le))))) | ||
| 168 | (let ((next (exif--read-number 4 le))) | ||
| 169 | (if (> next 0) | ||
| 170 | ;; There's more than one directory; if so, jump to it and | ||
| 171 | ;; keep parsing. | ||
| 172 | (progn | ||
| 173 | (goto-char (1+ next)) | ||
| 174 | (append dir (exif--parse-directory le))) | ||
| 175 | ;; We've reached the end of the directories. | ||
| 176 | dir)))) | ||
| 177 | |||
| 178 | (defun exif--process-value (value type le) | ||
| 179 | "Do type-based post-processing of the value." | ||
| 180 | (cl-case type | ||
| 181 | ;; Chop off trailing zero byte. | ||
| 182 | ('ascii (substring value 0 (1- (length value)))) | ||
| 183 | ('rational (with-temp-buffer | ||
| 184 | (set-buffer-multibyte nil) | ||
| 185 | (insert value) | ||
| 186 | (goto-char (point-min)) | ||
| 187 | (cons (exif--read-number 4 le) | ||
| 188 | (exif--read-number 4 le)))) | ||
| 189 | (otherwise value))) | ||
| 190 | |||
| 191 | (defun exif--read-chunk (bytes) | ||
| 192 | "Return BYTES octets from the buffer and advance point that much." | ||
| 193 | (prog1 | ||
| 194 | (buffer-substring (point) (+ (point) bytes)) | ||
| 195 | (forward-char bytes))) | ||
| 196 | |||
| 197 | (defun exif--read-number-be (bytes) | ||
| 198 | "Read BYTES octets from the buffer as a chunk of big-endian bytes. | ||
| 199 | Advance point to after the read bytes." | ||
| 200 | (let ((sum 0)) | ||
| 201 | (dotimes (_ bytes) | ||
| 202 | (setq sum (+ (* sum 256) (following-char))) | ||
| 203 | (forward-char 1)) | ||
| 204 | sum)) | ||
| 205 | |||
| 206 | (defun exif--read-number-le (bytes) | ||
| 207 | "Read BYTES octets from the buffer as a chunk of low-endian bytes. | ||
| 208 | Advance point to after the read bytes." | ||
| 209 | (let ((sum 0)) | ||
| 210 | (dotimes (i bytes) | ||
| 211 | (setq sum (+ (* (following-char) (expt 256 i)) sum)) | ||
| 212 | (forward-char 1)) | ||
| 213 | sum)) | ||
| 214 | |||
| 215 | (defun exif--read-number (bytes lower-endian) | ||
| 216 | "Read BYTES octets from the buffer with endianness determined by LOWER-ENDIAN. | ||
| 217 | Advance point to after the read bytes." | ||
| 218 | (if lower-endian | ||
| 219 | (exif--read-number-le bytes) | ||
| 220 | (exif--read-number-be bytes))) | ||
| 221 | |||
| 222 | (provide 'exif) | ||
| 223 | |||
| 224 | ;;; exif.el ends here | ||
diff --git a/test/data/image/black.jpg b/test/data/image/black.jpg new file mode 100644 index 00000000000..be9af2a9a05 --- /dev/null +++ b/test/data/image/black.jpg | |||
| Binary files differ | |||
diff --git a/test/lisp/image/exif-tests.el b/test/lisp/image/exif-tests.el new file mode 100644 index 00000000000..d6b46980d77 --- /dev/null +++ b/test/lisp/image/exif-tests.el | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | ;;; exif-tests.el --- tests for exif.el -*- lexical-binding: t -*- | ||
| 2 | |||
| 3 | ;; Copyright (C) 2019 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; This file is part of GNU Emacs. | ||
| 6 | |||
| 7 | ;; GNU Emacs is free software: you can redistribute it and/or modify | ||
| 8 | ;; it under the terms of the GNU General Public License as published by | ||
| 9 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 10 | ;; (at your option) any later version. | ||
| 11 | |||
| 12 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | ;; GNU General Public License for more details. | ||
| 16 | |||
| 17 | ;; You should have received a copy of the GNU General Public License | ||
| 18 | ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. | ||
| 19 | |||
| 20 | ;;; Code: | ||
| 21 | |||
| 22 | (require 'ert) | ||
| 23 | (require 'exif) | ||
| 24 | (require 'seq) | ||
| 25 | |||
| 26 | (defun test-image-file (name) | ||
| 27 | (expand-file-name | ||
| 28 | name (expand-file-name "data/image" | ||
| 29 | (or (getenv "EMACS_TEST_DIRECTORY") | ||
| 30 | "../../")))) | ||
| 31 | |||
| 32 | (defun exif-elem (exif elem) | ||
| 33 | (plist-get (seq-find (lambda (e) | ||
| 34 | (eq elem (plist-get e :tag-name))) | ||
| 35 | exif) | ||
| 36 | :value)) | ||
| 37 | |||
| 38 | (ert-deftest test-exif-parse () | ||
| 39 | (let ((exif (exif-parse (test-image-file "black.jpg")))) | ||
| 40 | (should (equal (exif-elem exif 'make) "Panasonic")) | ||
| 41 | (should (equal (exif-elem exif 'orientation) 1)) | ||
| 42 | (should (equal (exif-elem exif 'x-resolution) '(180 . 1))))) | ||
| 43 | |||
| 44 | ;;; exif-tests.el ends here | ||