diff options
| author | Lars Ingebrigtsen | 2019-07-29 14:22:31 +0200 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2019-07-29 14:22:38 +0200 |
| commit | fa04c8b87e50a2e2b0d021958f637be8f475d8bc (patch) | |
| tree | 304b07bc09258970ecf7c4a97fd53e85d4cddfe8 /lisp/calendar | |
| parent | 6cfda69d72cb9debefc48d0d95e341d389e7303a (diff) | |
| download | emacs-fa04c8b87e50a2e2b0d021958f637be8f475d8bc.tar.gz emacs-fa04c8b87e50a2e2b0d021958f637be8f475d8bc.zip | |
Add an ISO 8601 parsing library
* doc/lispref/os.texi (Time Parsing): Document it.
* lisp/calendar/iso8601.el: New file.
* test/lisp/calendar/iso8601-tests.el: Test ISO8601 parsing functions.
Diffstat (limited to 'lisp/calendar')
| -rw-r--r-- | lisp/calendar/iso8601.el | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/lisp/calendar/iso8601.el b/lisp/calendar/iso8601.el new file mode 100644 index 00000000000..ab0077ac58d --- /dev/null +++ b/lisp/calendar/iso8601.el | |||
| @@ -0,0 +1,370 @@ | |||
| 1 | ;;; iso8601.el --- parse ISO 8601 date/time strings -*- lexical-binding:t -*- | ||
| 2 | |||
| 3 | ;; Copyright (C) 2019 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Keywords: dates | ||
| 6 | |||
| 7 | ;; This file is part of GNU Emacs. | ||
| 8 | |||
| 9 | ;; GNU Emacs is free software: you can redistribute it and/or modify | ||
| 10 | ;; it under the terms of the GNU General Public License as published by | ||
| 11 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 12 | ;; (at your option) any later version. | ||
| 13 | |||
| 14 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 17 | ;; GNU General Public License for more details. | ||
| 18 | |||
| 19 | ;; You should have received a copy of the GNU General Public License | ||
| 20 | ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. | ||
| 21 | |||
| 22 | ;;; Commentary: | ||
| 23 | |||
| 24 | ;; ISO8601 times basically look like 1985-04-01T15:23:49... Or so | ||
| 25 | ;; you'd think. This is what everybody means when they say "ISO8601", | ||
| 26 | ;; but it's in reality a quite large collection of syntaxes, including | ||
| 27 | ;; week numbers, ordinal dates, durations and intervals. This package | ||
| 28 | ;; has functions for parsing them all. | ||
| 29 | ;; | ||
| 30 | ;; The interface functions are `iso8601-parse', `iso8601-parse-date', | ||
| 31 | ;; `iso8601-parse-time', `iso8601-parse-zone', | ||
| 32 | ;; `iso8601-parse-duration' and `iso8601-parse-interval'. They all | ||
| 33 | ;; return decoded time objects, except the last one, which returns a | ||
| 34 | ;; list of three of them. | ||
| 35 | ;; | ||
| 36 | ;; (iso8601-parse-interval "P1Y2M10DT2H30M/2008W32T153000-01") | ||
| 37 | ;; '((0 0 13 24 5 2007 nil nil -3600) | ||
| 38 | ;; (0 30 15 3 8 2008 nil nil -3600) | ||
| 39 | ;; (0 30 2 10 2 1 nil nil nil)) | ||
| 40 | ;; | ||
| 41 | ;; | ||
| 42 | ;; The standard can be found at: | ||
| 43 | ;; | ||
| 44 | ;; http://www.loc.gov/standards/datetime/iso-tc154-wg5_n0038_iso_wd_8601-1_2016-02-16.pdf | ||
| 45 | ;; | ||
| 46 | ;; The Wikipedia page on the standard is also informative: | ||
| 47 | ;; | ||
| 48 | ;; https://en.wikipedia.org/wiki/ISO_8601 | ||
| 49 | ;; | ||
| 50 | ;; RFC3339 defines the subset that everybody thinks of as "ISO8601". | ||
| 51 | |||
| 52 | ;;; Code: | ||
| 53 | |||
| 54 | (require 'time-date) | ||
| 55 | (require 'cl-lib) | ||
| 56 | |||
| 57 | (defun iso8601--concat-regexps (regexps) | ||
| 58 | (mapconcat (lambda (regexp) | ||
| 59 | (concat "\\(?:" | ||
| 60 | (replace-regexp-in-string "(" "(?:" regexp) | ||
| 61 | "\\)")) | ||
| 62 | regexps "\\|")) | ||
| 63 | |||
| 64 | (defconst iso8601--year-match | ||
| 65 | "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)") | ||
| 66 | (defconst iso8601--full-date-match | ||
| 67 | "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)") | ||
| 68 | (defconst iso8601--without-day-match | ||
| 69 | "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)") | ||
| 70 | (defconst iso8601--outdated-date-match | ||
| 71 | "--\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)") | ||
| 72 | (defconst iso8601--week-date-match | ||
| 73 | "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?W\\([0-9][0-9]\\)-?\\([0-9]\\)?") | ||
| 74 | (defconst iso8601--ordinal-date-match | ||
| 75 | "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9][0-9]\\)") | ||
| 76 | (defconst iso8601--date-match | ||
| 77 | (iso8601--concat-regexps | ||
| 78 | (list iso8601--year-match | ||
| 79 | iso8601--full-date-match | ||
| 80 | iso8601--without-day-match | ||
| 81 | iso8601--outdated-date-match | ||
| 82 | iso8601--week-date-match | ||
| 83 | iso8601--ordinal-date-match))) | ||
| 84 | |||
| 85 | (defconst iso8601--time-match | ||
| 86 | "\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?:?\\([0-9][0-9]\\)?\\.?\\([0-9][0-9][0-9]\\)?") | ||
| 87 | |||
| 88 | (defconst iso8601--zone-match | ||
| 89 | "\\(Z\\|\\([-+]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?\\)") | ||
| 90 | |||
| 91 | (defconst iso8601--full-time-match | ||
| 92 | (concat "\\(" (replace-regexp-in-string "(" "(?:" iso8601--time-match) "\\)" | ||
| 93 | "\\(" iso8601--zone-match "\\)?")) | ||
| 94 | |||
| 95 | (defconst iso8601--combined-match | ||
| 96 | (concat "\\(" iso8601--date-match "\\)" | ||
| 97 | "\\(?:T\\(" | ||
| 98 | (replace-regexp-in-string "(" "(?:" iso8601--time-match) | ||
| 99 | "\\)" | ||
| 100 | "\\(" iso8601--zone-match "\\)?\\)?")) | ||
| 101 | |||
| 102 | (defconst iso8601--duration-full-match | ||
| 103 | "P\\([0-9]+Y\\)?\\([0-9]+M\\)?\\([0-9]+D\\)?\\(T\\([0-9]+H\\)?\\([0-9]+M\\)?\\([0-9]+S\\)?\\)?") | ||
| 104 | (defconst iso8601--duration-week-match | ||
| 105 | "P\\([0-9]+\\)W") | ||
| 106 | (defconst iso8601--duration-combined-match | ||
| 107 | (concat "P" iso8601--combined-match)) | ||
| 108 | (defconst iso8601--duration-match | ||
| 109 | (iso8601--concat-regexps | ||
| 110 | (list iso8601--duration-full-match | ||
| 111 | iso8601--duration-week-match | ||
| 112 | iso8601--duration-combined-match))) | ||
| 113 | |||
| 114 | (defun iso8601-parse (string) | ||
| 115 | "Parse an ISO 8601 date/time string and return a `decoded-time' structure. | ||
| 116 | |||
| 117 | The ISO 8601 date/time strings look like \"2008-03-02T13:47:30\", | ||
| 118 | but shorter, incomplete strings like \"2008-03-02\" are valid, as | ||
| 119 | well as variants like \"2008W32\" (week number) and | ||
| 120 | \"2008-234\" (ordinal day number)." | ||
| 121 | (if (not (iso8601-valid-p string)) | ||
| 122 | (signal 'wrong-type-argument string) | ||
| 123 | (let* ((date-string (match-string 1 string)) | ||
| 124 | (time-string (match-string 2 string)) | ||
| 125 | (zone-string (match-string 3 string)) | ||
| 126 | (date (iso8601-parse-date date-string))) | ||
| 127 | ;; The time portion is optional. | ||
| 128 | (when time-string | ||
| 129 | (let ((time (iso8601-parse-time time-string))) | ||
| 130 | (setf (decoded-time-hour date) (decoded-time-hour time)) | ||
| 131 | (setf (decoded-time-minute date) (decoded-time-minute time)) | ||
| 132 | (setf (decoded-time-second date) (decoded-time-second time)))) | ||
| 133 | ;; The time zone is optional. | ||
| 134 | (when zone-string | ||
| 135 | (setf (decoded-time-zone date) | ||
| 136 | ;; The time zone in decoded times are in seconds. | ||
| 137 | (* (iso8601-parse-zone zone-string) 60))) | ||
| 138 | date))) | ||
| 139 | |||
| 140 | (defun iso8601-parse-date (string) | ||
| 141 | "Parse STRING (which should be on ISO 8601 format) and return a time value." | ||
| 142 | (cond | ||
| 143 | ;; Just a year: [-+]YYYY. | ||
| 144 | ((iso8601--match iso8601--year-match string) | ||
| 145 | (iso8601--decoded-time | ||
| 146 | :year (iso8601--adjust-year (match-string 1 string) | ||
| 147 | (match-string 2 string)))) | ||
| 148 | ;; Calendar dates: YYYY-MM-DD and variants. | ||
| 149 | ((iso8601--match iso8601--full-date-match string) | ||
| 150 | (iso8601--decoded-time | ||
| 151 | :year (iso8601--adjust-year (match-string 1 string) | ||
| 152 | (match-string 2 string)) | ||
| 153 | :month (match-string 3 string) | ||
| 154 | :day (match-string 4 string))) | ||
| 155 | ;; Calendar date without day: YYYY-MM. | ||
| 156 | ((iso8601--match iso8601--without-day-match string) | ||
| 157 | (iso8601--decoded-time | ||
| 158 | :year (iso8601--adjust-year (match-string 1 string) | ||
| 159 | (match-string 2 string)) | ||
| 160 | :month (match-string 3 string))) | ||
| 161 | ;; Outdated date without year: --MM-DD | ||
| 162 | ((iso8601--match iso8601--outdated-date-match string) | ||
| 163 | (iso8601--decoded-time | ||
| 164 | :month (match-string 1 string) | ||
| 165 | :day (match-string 2 string))) | ||
| 166 | ;; Week dates: YYYY-Www-D | ||
| 167 | ((iso8601--match iso8601--week-date-match string) | ||
| 168 | (let* ((year (iso8601--adjust-year (match-string 1 string) | ||
| 169 | (match-string 2 string))) | ||
| 170 | (week (string-to-number (match-string 3 string))) | ||
| 171 | (day-of-week (and (match-string 4 string) | ||
| 172 | (string-to-number (match-string 4 string)))) | ||
| 173 | (jan-start (decoded-time-weekday | ||
| 174 | (decode-time | ||
| 175 | (iso8601--encode-time | ||
| 176 | (iso8601--decoded-time :year year | ||
| 177 | :month 1 | ||
| 178 | :day 4))))) | ||
| 179 | (correction (+ (if (zerop jan-start) 7 jan-start) | ||
| 180 | 3)) | ||
| 181 | (ordinal (+ (* week 7) (or day-of-week 0) (- correction)))) | ||
| 182 | (cond | ||
| 183 | ;; Monday 29 December 2008 is written "2009-W01-1". | ||
| 184 | ((< ordinal 1) | ||
| 185 | (setq year (1- year) | ||
| 186 | ordinal (+ ordinal (if (date-leap-year-p year) | ||
| 187 | 366 365)))) | ||
| 188 | ;; Sunday 3 January 2010 is written "2009-W53-7". | ||
| 189 | ((> ordinal (if (date-leap-year-p year) | ||
| 190 | 366 365)) | ||
| 191 | (setq ordinal (- ordinal (if (date-leap-year-p year) | ||
| 192 | 366 365)) | ||
| 193 | year (1+ year)))) | ||
| 194 | (let ((month-day (date-ordinal-to-time year ordinal))) | ||
| 195 | (iso8601--decoded-time :year year | ||
| 196 | :month (decoded-time-month month-day) | ||
| 197 | :day (decoded-time-day month-day))))) | ||
| 198 | ;; Ordinal dates: YYYY-DDD | ||
| 199 | ((iso8601--match iso8601--ordinal-date-match string) | ||
| 200 | (let* ((year (iso8601--adjust-year (match-string 1 string) | ||
| 201 | (match-string 2 string))) | ||
| 202 | (ordinal (string-to-number (match-string 3 string))) | ||
| 203 | (month-day (date-ordinal-to-time year ordinal))) | ||
| 204 | (iso8601--decoded-time :year year | ||
| 205 | :month (decoded-time-month month-day) | ||
| 206 | :day (decoded-time-day month-day)))) | ||
| 207 | (t | ||
| 208 | (signal 'wrong-type-argument string)))) | ||
| 209 | |||
| 210 | (defun iso8601--adjust-year (sign year) | ||
| 211 | (save-match-data | ||
| 212 | (let ((year (if (stringp year) | ||
| 213 | (string-to-number year) | ||
| 214 | year))) | ||
| 215 | (if (string= sign "-") | ||
| 216 | ;; -0001 is 2 BCE. | ||
| 217 | (1- (- year)) | ||
| 218 | year)))) | ||
| 219 | |||
| 220 | (defun iso8601-parse-time (string) | ||
| 221 | "Parse STRING, which should be an ISO 8601 time string, and return a time value." | ||
| 222 | (if (not (iso8601--match iso8601--full-time-match string)) | ||
| 223 | (signal 'wrong-type-argument string) | ||
| 224 | (let ((time (match-string 1 string)) | ||
| 225 | (zone (match-string 2 string))) | ||
| 226 | (if (not (iso8601--match iso8601--time-match time)) | ||
| 227 | (signal 'wrong-type-argument string) | ||
| 228 | (let ((hour (string-to-number (match-string 1 time))) | ||
| 229 | (minute (and (match-string 2 time) | ||
| 230 | (string-to-number (match-string 2 time)))) | ||
| 231 | (second (and (match-string 3 time) | ||
| 232 | (string-to-number (match-string 3 time)))) | ||
| 233 | ;; Hm... | ||
| 234 | (_millisecond (and (match-string 4 time) | ||
| 235 | (string-to-number (match-string 4 time))))) | ||
| 236 | (iso8601--decoded-time :hour hour | ||
| 237 | :minute (or minute 0) | ||
| 238 | :second (or second 0) | ||
| 239 | :zone (and zone | ||
| 240 | (* 60 (iso8601-parse-zone | ||
| 241 | zone))))))))) | ||
| 242 | |||
| 243 | (defun iso8601-parse-zone (string) | ||
| 244 | "Parse STRING, which should be an ISO 8601 time zone. | ||
| 245 | Return the number of minutes." | ||
| 246 | (if (not (iso8601--match iso8601--zone-match string)) | ||
| 247 | (signal 'wrong-type-argument string) | ||
| 248 | (if (match-string 2 string) | ||
| 249 | ;; HH:MM-ish. | ||
| 250 | (let ((hour (string-to-number (match-string 3 string))) | ||
| 251 | (minute (and (match-string 4 string) | ||
| 252 | (string-to-number (match-string 4 string))))) | ||
| 253 | (* (if (equal (match-string 2 string) "-") | ||
| 254 | -1 | ||
| 255 | 1) | ||
| 256 | (+ (* hour 60) | ||
| 257 | (or minute 0)))) | ||
| 258 | ;; "Z". | ||
| 259 | 0))) | ||
| 260 | |||
| 261 | (defun iso8601-valid-p (string) | ||
| 262 | "Say whether STRING is a valid ISO 8601 representation." | ||
| 263 | (iso8601--match iso8601--combined-match string)) | ||
| 264 | |||
| 265 | (defun iso8601-parse-duration (string) | ||
| 266 | "Parse ISO 8601 durations on the form P3Y6M4DT12H30M5S." | ||
| 267 | (cond | ||
| 268 | ((and (iso8601--match iso8601--duration-full-match string) | ||
| 269 | ;; Just a "P" isn't valid; there has to be at least one | ||
| 270 | ;; element, like P1M. | ||
| 271 | (> (length (match-string 0 string)) 2)) | ||
| 272 | (iso8601--decoded-time :year (or (match-string 1 string) 0) | ||
| 273 | :month (or (match-string 2 string) 0) | ||
| 274 | :day (or (match-string 3 string) 0) | ||
| 275 | :hour (or (match-string 5 string) 0) | ||
| 276 | :minute (or (match-string 6 string) 0) | ||
| 277 | :second (or (match-string 7 string) 0))) | ||
| 278 | ;; PnW: Weeks. | ||
| 279 | ((iso8601--match iso8601--duration-week-match string) | ||
| 280 | (let ((weeks (string-to-number (match-string 1 string)))) | ||
| 281 | ;; Does this make sense? Hm... | ||
| 282 | (iso8601--decoded-time :day (* weeks 7)))) | ||
| 283 | ;; P<date>T<time> | ||
| 284 | ((iso8601--match iso8601--duration-combined-match string) | ||
| 285 | (iso8601-parse (substring string 1))) | ||
| 286 | (t | ||
| 287 | (signal 'wrong-type-argument string)))) | ||
| 288 | |||
| 289 | (defun iso8601-parse-interval (string) | ||
| 290 | "Parse ISO 8601 intervals." | ||
| 291 | (let ((bits (split-string string "/")) | ||
| 292 | start end duration) | ||
| 293 | (if (not (= (length bits) 2)) | ||
| 294 | (signal 'wrong-type-argument string) | ||
| 295 | ;; The intervals may be an explicit start/end times, or either a | ||
| 296 | ;; start or an end, and an accompanying duration. | ||
| 297 | (cond | ||
| 298 | ((and (string-match "\\`P" (car bits)) | ||
| 299 | (iso8601-valid-p (cadr bits))) | ||
| 300 | (setq duration (iso8601-parse-duration (car bits)) | ||
| 301 | end (iso8601-parse (cadr bits)))) | ||
| 302 | ((and (string-match "\\`P" (cadr bits)) | ||
| 303 | (iso8601-valid-p (car bits))) | ||
| 304 | (setq duration (iso8601-parse-duration (cadr bits)) | ||
| 305 | start (iso8601-parse (car bits)))) | ||
| 306 | ((and (iso8601-valid-p (car bits)) | ||
| 307 | (iso8601-valid-p (cadr bits))) | ||
| 308 | (setq start (iso8601-parse (car bits)) | ||
| 309 | end (iso8601-parse (cadr bits)))) | ||
| 310 | (t | ||
| 311 | (signal 'wrong-type-argument string)))) | ||
| 312 | (unless end | ||
| 313 | (setq end (decoded-time-add start duration))) | ||
| 314 | (unless start | ||
| 315 | (setq start (decoded-time-add end | ||
| 316 | ;; We negate the duration so that | ||
| 317 | ;; we get a subtraction. | ||
| 318 | (mapcar (lambda (elem) | ||
| 319 | (if (numberp elem) | ||
| 320 | (- elem) | ||
| 321 | elem)) | ||
| 322 | duration)))) | ||
| 323 | (list start end | ||
| 324 | (or duration | ||
| 325 | (decode-time (time-subtract (iso8601--encode-time end) | ||
| 326 | (iso8601--encode-time start)) | ||
| 327 | (or (decoded-time-zone end) 0)))))) | ||
| 328 | |||
| 329 | (defun iso8601--match (regexp string) | ||
| 330 | (string-match (concat "\\`" regexp "\\'") string)) | ||
| 331 | |||
| 332 | (defun iso8601--value (elem &optional default) | ||
| 333 | (if (stringp elem) | ||
| 334 | (string-to-number elem) | ||
| 335 | (or elem default))) | ||
| 336 | |||
| 337 | (cl-defun iso8601--decoded-time (&key second minute hour | ||
| 338 | day month year | ||
| 339 | dst zone) | ||
| 340 | (list (iso8601--value second) | ||
| 341 | (iso8601--value minute) | ||
| 342 | (iso8601--value hour) | ||
| 343 | (iso8601--value day) | ||
| 344 | (iso8601--value month) | ||
| 345 | (iso8601--value year) | ||
| 346 | nil | ||
| 347 | dst | ||
| 348 | zone)) | ||
| 349 | |||
| 350 | (defun iso8601--encode-time (time) | ||
| 351 | "Like `encode-time', but fill in nil values in TIME." | ||
| 352 | (setq time (copy-sequence time)) | ||
| 353 | (unless (decoded-time-second time) | ||
| 354 | (setf (decoded-time-second time) 0)) | ||
| 355 | (unless (decoded-time-minute time) | ||
| 356 | (setf (decoded-time-minute time) 0)) | ||
| 357 | (unless (decoded-time-hour time) | ||
| 358 | (setf (decoded-time-hour time) 0)) | ||
| 359 | |||
| 360 | (unless (decoded-time-day time) | ||
| 361 | (setf (decoded-time-day time) 1)) | ||
| 362 | (unless (decoded-time-month time) | ||
| 363 | (setf (decoded-time-month time) 1)) | ||
| 364 | (unless (decoded-time-year time) | ||
| 365 | (setf (decoded-time-year time) 0)) | ||
| 366 | (encode-time time)) | ||
| 367 | |||
| 368 | (provide 'iso8601) | ||
| 369 | |||
| 370 | ;;; iso8601.el ends here | ||