aboutsummaryrefslogtreecommitdiffstats
path: root/lisp/calendar
diff options
context:
space:
mode:
authorLars Ingebrigtsen2019-07-29 14:22:31 +0200
committerLars Ingebrigtsen2019-07-29 14:22:38 +0200
commitfa04c8b87e50a2e2b0d021958f637be8f475d8bc (patch)
tree304b07bc09258970ecf7c4a97fd53e85d4cddfe8 /lisp/calendar
parent6cfda69d72cb9debefc48d0d95e341d389e7303a (diff)
downloademacs-fa04c8b87e50a2e2b0d021958f637be8f475d8bc.tar.gz
emacs-fa04c8b87e50a2e2b0d021958f637be8f475d8bc.zip
Add an ISO 8601 parsing library
* doc/lispref/os.texi (Time Parsing): Document it. * lisp/calendar/iso8601.el: New file. * test/lisp/calendar/iso8601-tests.el: Test ISO8601 parsing functions.
Diffstat (limited to 'lisp/calendar')
-rw-r--r--lisp/calendar/iso8601.el370
1 files changed, 370 insertions, 0 deletions
diff --git a/lisp/calendar/iso8601.el b/lisp/calendar/iso8601.el
new file mode 100644
index 00000000000..ab0077ac58d
--- /dev/null
+++ b/lisp/calendar/iso8601.el
@@ -0,0 +1,370 @@
1;;; iso8601.el --- parse ISO 8601 date/time strings -*- lexical-binding:t -*-
2
3;; Copyright (C) 2019 Free Software Foundation, Inc.
4
5;; Keywords: dates
6
7;; This file is part of GNU Emacs.
8
9;; GNU Emacs is free software: you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
11;; the Free Software Foundation, either version 3 of the License, or
12;; (at your option) any later version.
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
21
22;;; Commentary:
23
24;; ISO8601 times basically look like 1985-04-01T15:23:49... Or so
25;; you'd think. This is what everybody means when they say "ISO8601",
26;; but it's in reality a quite large collection of syntaxes, including
27;; week numbers, ordinal dates, durations and intervals. This package
28;; has functions for parsing them all.
29;;
30;; The interface functions are `iso8601-parse', `iso8601-parse-date',
31;; `iso8601-parse-time', `iso8601-parse-zone',
32;; `iso8601-parse-duration' and `iso8601-parse-interval'. They all
33;; return decoded time objects, except the last one, which returns a
34;; list of three of them.
35;;
36;; (iso8601-parse-interval "P1Y2M10DT2H30M/2008W32T153000-01")
37;; '((0 0 13 24 5 2007 nil nil -3600)
38;; (0 30 15 3 8 2008 nil nil -3600)
39;; (0 30 2 10 2 1 nil nil nil))
40;;
41;;
42;; The standard can be found at:
43;;
44;; http://www.loc.gov/standards/datetime/iso-tc154-wg5_n0038_iso_wd_8601-1_2016-02-16.pdf
45;;
46;; The Wikipedia page on the standard is also informative:
47;;
48;; https://en.wikipedia.org/wiki/ISO_8601
49;;
50;; RFC3339 defines the subset that everybody thinks of as "ISO8601".
51
52;;; Code:
53
54(require 'time-date)
55(require 'cl-lib)
56
57(defun iso8601--concat-regexps (regexps)
58 (mapconcat (lambda (regexp)
59 (concat "\\(?:"
60 (replace-regexp-in-string "(" "(?:" regexp)
61 "\\)"))
62 regexps "\\|"))
63
64(defconst iso8601--year-match
65 "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)")
66(defconst iso8601--full-date-match
67 "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
68(defconst iso8601--without-day-match
69 "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)")
70(defconst iso8601--outdated-date-match
71 "--\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
72(defconst iso8601--week-date-match
73 "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?W\\([0-9][0-9]\\)-?\\([0-9]\\)?")
74(defconst iso8601--ordinal-date-match
75 "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9][0-9]\\)")
76(defconst iso8601--date-match
77 (iso8601--concat-regexps
78 (list iso8601--year-match
79 iso8601--full-date-match
80 iso8601--without-day-match
81 iso8601--outdated-date-match
82 iso8601--week-date-match
83 iso8601--ordinal-date-match)))
84
85(defconst iso8601--time-match
86 "\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?:?\\([0-9][0-9]\\)?\\.?\\([0-9][0-9][0-9]\\)?")
87
88(defconst iso8601--zone-match
89 "\\(Z\\|\\([-+]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?\\)")
90
91(defconst iso8601--full-time-match
92 (concat "\\(" (replace-regexp-in-string "(" "(?:" iso8601--time-match) "\\)"
93 "\\(" iso8601--zone-match "\\)?"))
94
95(defconst iso8601--combined-match
96 (concat "\\(" iso8601--date-match "\\)"
97 "\\(?:T\\("
98 (replace-regexp-in-string "(" "(?:" iso8601--time-match)
99 "\\)"
100 "\\(" iso8601--zone-match "\\)?\\)?"))
101
102(defconst iso8601--duration-full-match
103 "P\\([0-9]+Y\\)?\\([0-9]+M\\)?\\([0-9]+D\\)?\\(T\\([0-9]+H\\)?\\([0-9]+M\\)?\\([0-9]+S\\)?\\)?")
104(defconst iso8601--duration-week-match
105 "P\\([0-9]+\\)W")
106(defconst iso8601--duration-combined-match
107 (concat "P" iso8601--combined-match))
108(defconst iso8601--duration-match
109 (iso8601--concat-regexps
110 (list iso8601--duration-full-match
111 iso8601--duration-week-match
112 iso8601--duration-combined-match)))
113
114(defun iso8601-parse (string)
115 "Parse an ISO 8601 date/time string and return a `decoded-time' structure.
116
117The ISO 8601 date/time strings look like \"2008-03-02T13:47:30\",
118but shorter, incomplete strings like \"2008-03-02\" are valid, as
119well as variants like \"2008W32\" (week number) and
120\"2008-234\" (ordinal day number)."
121 (if (not (iso8601-valid-p string))
122 (signal 'wrong-type-argument string)
123 (let* ((date-string (match-string 1 string))
124 (time-string (match-string 2 string))
125 (zone-string (match-string 3 string))
126 (date (iso8601-parse-date date-string)))
127 ;; The time portion is optional.
128 (when time-string
129 (let ((time (iso8601-parse-time time-string)))
130 (setf (decoded-time-hour date) (decoded-time-hour time))
131 (setf (decoded-time-minute date) (decoded-time-minute time))
132 (setf (decoded-time-second date) (decoded-time-second time))))
133 ;; The time zone is optional.
134 (when zone-string
135 (setf (decoded-time-zone date)
136 ;; The time zone in decoded times are in seconds.
137 (* (iso8601-parse-zone zone-string) 60)))
138 date)))
139
140(defun iso8601-parse-date (string)
141 "Parse STRING (which should be on ISO 8601 format) and return a time value."
142 (cond
143 ;; Just a year: [-+]YYYY.
144 ((iso8601--match iso8601--year-match string)
145 (iso8601--decoded-time
146 :year (iso8601--adjust-year (match-string 1 string)
147 (match-string 2 string))))
148 ;; Calendar dates: YYYY-MM-DD and variants.
149 ((iso8601--match iso8601--full-date-match string)
150 (iso8601--decoded-time
151 :year (iso8601--adjust-year (match-string 1 string)
152 (match-string 2 string))
153 :month (match-string 3 string)
154 :day (match-string 4 string)))
155 ;; Calendar date without day: YYYY-MM.
156 ((iso8601--match iso8601--without-day-match string)
157 (iso8601--decoded-time
158 :year (iso8601--adjust-year (match-string 1 string)
159 (match-string 2 string))
160 :month (match-string 3 string)))
161 ;; Outdated date without year: --MM-DD
162 ((iso8601--match iso8601--outdated-date-match string)
163 (iso8601--decoded-time
164 :month (match-string 1 string)
165 :day (match-string 2 string)))
166 ;; Week dates: YYYY-Www-D
167 ((iso8601--match iso8601--week-date-match string)
168 (let* ((year (iso8601--adjust-year (match-string 1 string)
169 (match-string 2 string)))
170 (week (string-to-number (match-string 3 string)))
171 (day-of-week (and (match-string 4 string)
172 (string-to-number (match-string 4 string))))
173 (jan-start (decoded-time-weekday
174 (decode-time
175 (iso8601--encode-time
176 (iso8601--decoded-time :year year
177 :month 1
178 :day 4)))))
179 (correction (+ (if (zerop jan-start) 7 jan-start)
180 3))
181 (ordinal (+ (* week 7) (or day-of-week 0) (- correction))))
182 (cond
183 ;; Monday 29 December 2008 is written "2009-W01-1".
184 ((< ordinal 1)
185 (setq year (1- year)
186 ordinal (+ ordinal (if (date-leap-year-p year)
187 366 365))))
188 ;; Sunday 3 January 2010 is written "2009-W53-7".
189 ((> ordinal (if (date-leap-year-p year)
190 366 365))
191 (setq ordinal (- ordinal (if (date-leap-year-p year)
192 366 365))
193 year (1+ year))))
194 (let ((month-day (date-ordinal-to-time year ordinal)))
195 (iso8601--decoded-time :year year
196 :month (decoded-time-month month-day)
197 :day (decoded-time-day month-day)))))
198 ;; Ordinal dates: YYYY-DDD
199 ((iso8601--match iso8601--ordinal-date-match string)
200 (let* ((year (iso8601--adjust-year (match-string 1 string)
201 (match-string 2 string)))
202 (ordinal (string-to-number (match-string 3 string)))
203 (month-day (date-ordinal-to-time year ordinal)))
204 (iso8601--decoded-time :year year
205 :month (decoded-time-month month-day)
206 :day (decoded-time-day month-day))))
207 (t
208 (signal 'wrong-type-argument string))))
209
210(defun iso8601--adjust-year (sign year)
211 (save-match-data
212 (let ((year (if (stringp year)
213 (string-to-number year)
214 year)))
215 (if (string= sign "-")
216 ;; -0001 is 2 BCE.
217 (1- (- year))
218 year))))
219
220(defun iso8601-parse-time (string)
221 "Parse STRING, which should be an ISO 8601 time string, and return a time value."
222 (if (not (iso8601--match iso8601--full-time-match string))
223 (signal 'wrong-type-argument string)
224 (let ((time (match-string 1 string))
225 (zone (match-string 2 string)))
226 (if (not (iso8601--match iso8601--time-match time))
227 (signal 'wrong-type-argument string)
228 (let ((hour (string-to-number (match-string 1 time)))
229 (minute (and (match-string 2 time)
230 (string-to-number (match-string 2 time))))
231 (second (and (match-string 3 time)
232 (string-to-number (match-string 3 time))))
233 ;; Hm...
234 (_millisecond (and (match-string 4 time)
235 (string-to-number (match-string 4 time)))))
236 (iso8601--decoded-time :hour hour
237 :minute (or minute 0)
238 :second (or second 0)
239 :zone (and zone
240 (* 60 (iso8601-parse-zone
241 zone)))))))))
242
243(defun iso8601-parse-zone (string)
244 "Parse STRING, which should be an ISO 8601 time zone.
245Return the number of minutes."
246 (if (not (iso8601--match iso8601--zone-match string))
247 (signal 'wrong-type-argument string)
248 (if (match-string 2 string)
249 ;; HH:MM-ish.
250 (let ((hour (string-to-number (match-string 3 string)))
251 (minute (and (match-string 4 string)
252 (string-to-number (match-string 4 string)))))
253 (* (if (equal (match-string 2 string) "-")
254 -1
255 1)
256 (+ (* hour 60)
257 (or minute 0))))
258 ;; "Z".
259 0)))
260
261(defun iso8601-valid-p (string)
262 "Say whether STRING is a valid ISO 8601 representation."
263 (iso8601--match iso8601--combined-match string))
264
265(defun iso8601-parse-duration (string)
266 "Parse ISO 8601 durations on the form P3Y6M4DT12H30M5S."
267 (cond
268 ((and (iso8601--match iso8601--duration-full-match string)
269 ;; Just a "P" isn't valid; there has to be at least one
270 ;; element, like P1M.
271 (> (length (match-string 0 string)) 2))
272 (iso8601--decoded-time :year (or (match-string 1 string) 0)
273 :month (or (match-string 2 string) 0)
274 :day (or (match-string 3 string) 0)
275 :hour (or (match-string 5 string) 0)
276 :minute (or (match-string 6 string) 0)
277 :second (or (match-string 7 string) 0)))
278 ;; PnW: Weeks.
279 ((iso8601--match iso8601--duration-week-match string)
280 (let ((weeks (string-to-number (match-string 1 string))))
281 ;; Does this make sense? Hm...
282 (iso8601--decoded-time :day (* weeks 7))))
283 ;; P<date>T<time>
284 ((iso8601--match iso8601--duration-combined-match string)
285 (iso8601-parse (substring string 1)))
286 (t
287 (signal 'wrong-type-argument string))))
288
289(defun iso8601-parse-interval (string)
290 "Parse ISO 8601 intervals."
291 (let ((bits (split-string string "/"))
292 start end duration)
293 (if (not (= (length bits) 2))
294 (signal 'wrong-type-argument string)
295 ;; The intervals may be an explicit start/end times, or either a
296 ;; start or an end, and an accompanying duration.
297 (cond
298 ((and (string-match "\\`P" (car bits))
299 (iso8601-valid-p (cadr bits)))
300 (setq duration (iso8601-parse-duration (car bits))
301 end (iso8601-parse (cadr bits))))
302 ((and (string-match "\\`P" (cadr bits))
303 (iso8601-valid-p (car bits)))
304 (setq duration (iso8601-parse-duration (cadr bits))
305 start (iso8601-parse (car bits))))
306 ((and (iso8601-valid-p (car bits))
307 (iso8601-valid-p (cadr bits)))
308 (setq start (iso8601-parse (car bits))
309 end (iso8601-parse (cadr bits))))
310 (t
311 (signal 'wrong-type-argument string))))
312 (unless end
313 (setq end (decoded-time-add start duration)))
314 (unless start
315 (setq start (decoded-time-add end
316 ;; We negate the duration so that
317 ;; we get a subtraction.
318 (mapcar (lambda (elem)
319 (if (numberp elem)
320 (- elem)
321 elem))
322 duration))))
323 (list start end
324 (or duration
325 (decode-time (time-subtract (iso8601--encode-time end)
326 (iso8601--encode-time start))
327 (or (decoded-time-zone end) 0))))))
328
329(defun iso8601--match (regexp string)
330 (string-match (concat "\\`" regexp "\\'") string))
331
332(defun iso8601--value (elem &optional default)
333 (if (stringp elem)
334 (string-to-number elem)
335 (or elem default)))
336
337(cl-defun iso8601--decoded-time (&key second minute hour
338 day month year
339 dst zone)
340 (list (iso8601--value second)
341 (iso8601--value minute)
342 (iso8601--value hour)
343 (iso8601--value day)
344 (iso8601--value month)
345 (iso8601--value year)
346 nil
347 dst
348 zone))
349
350(defun iso8601--encode-time (time)
351 "Like `encode-time', but fill in nil values in TIME."
352 (setq time (copy-sequence time))
353 (unless (decoded-time-second time)
354 (setf (decoded-time-second time) 0))
355 (unless (decoded-time-minute time)
356 (setf (decoded-time-minute time) 0))
357 (unless (decoded-time-hour time)
358 (setf (decoded-time-hour time) 0))
359
360 (unless (decoded-time-day time)
361 (setf (decoded-time-day time) 1))
362 (unless (decoded-time-month time)
363 (setf (decoded-time-month time) 1))
364 (unless (decoded-time-year time)
365 (setf (decoded-time-year time) 0))
366 (encode-time time))
367
368(provide 'iso8601)
369
370;;; iso8601.el ends here