diff options
| author | Magnus Henoch | 2006-10-09 20:10:13 +0000 |
|---|---|---|
| committer | Magnus Henoch | 2006-10-09 20:10:13 +0000 |
| commit | f6fb70fc69a26f42e6f12394654c769c24dfa8bb (patch) | |
| tree | fecba1723783c2deac4cd47c6f9fed5a087f524b | |
| parent | 70846e2ad81362eaee6847e5c874ff1d69f4a61d (diff) | |
| download | emacs-f6fb70fc69a26f42e6f12394654c769c24dfa8bb.tar.gz emacs-f6fb70fc69a26f42e6f12394654c769c24dfa8bb.zip | |
(url-generic-parse-url): Handle URLs with empty path component and
non-empty query component. Untangle path, query and fragment parsing
code. Add references to RFC 3986 in comments.
(url-recreate-url-attributes): Start query string with "?", not ";".
| -rw-r--r-- | lisp/url/ChangeLog | 9 | ||||
| -rw-r--r-- | lisp/url/url-parse.el | 49 |
2 files changed, 36 insertions, 22 deletions
diff --git a/lisp/url/ChangeLog b/lisp/url/ChangeLog index a6afb0ba20f..bed12ecc459 100644 --- a/lisp/url/ChangeLog +++ b/lisp/url/ChangeLog | |||
| @@ -1,3 +1,12 @@ | |||
| 1 | 2006-10-09 Magnus Henoch <mange@freemail.hu> | ||
| 2 | |||
| 3 | * url-parse.el (url-generic-parse-url): Handle URLs with empty | ||
| 4 | path component and non-empty query component. Untangle path, | ||
| 5 | query and fragment parsing code. Add references to RFC 3986 in | ||
| 6 | comments. | ||
| 7 | (url-recreate-url-attributes): Start query string with "?", not | ||
| 8 | ";". | ||
| 9 | |||
| 1 | 2006-09-20 Stefan Monnier <monnier@iro.umontreal.ca> | 10 | 2006-09-20 Stefan Monnier <monnier@iro.umontreal.ca> |
| 2 | 11 | ||
| 3 | * url-dav.el (url-dav-file-attributes): Simplify. | 12 | * url-dav.el (url-dav-file-attributes): Simplify. |
diff --git a/lisp/url/url-parse.el b/lisp/url/url-parse.el index 1e4d93a861e..2e4fc8a9f27 100644 --- a/lisp/url/url-parse.el +++ b/lisp/url/url-parse.el | |||
| @@ -108,7 +108,7 @@ | |||
| 108 | (defun url-recreate-url-attributes (urlobj) | 108 | (defun url-recreate-url-attributes (urlobj) |
| 109 | "Recreate the attributes of an URL string from the parsed URLOBJ." | 109 | "Recreate the attributes of an URL string from the parsed URLOBJ." |
| 110 | (when (url-attributes urlobj) | 110 | (when (url-attributes urlobj) |
| 111 | (concat ";" | 111 | (concat "?" |
| 112 | (mapconcat (lambda (x) | 112 | (mapconcat (lambda (x) |
| 113 | (if (cdr x) | 113 | (if (cdr x) |
| 114 | (concat (car x) "=" (cdr x)) | 114 | (concat (car x) "=" (cdr x)) |
| @@ -120,11 +120,16 @@ | |||
| 120 | "Return a vector of the parts of URL. | 120 | "Return a vector of the parts of URL. |
| 121 | Format is: | 121 | Format is: |
| 122 | \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]" | 122 | \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]" |
| 123 | ;; See RFC 3986. | ||
| 123 | (cond | 124 | (cond |
| 124 | ((null url) | 125 | ((null url) |
| 125 | (make-vector 9 nil)) | 126 | (make-vector 9 nil)) |
| 126 | ((or (not (string-match url-nonrelative-link url)) | 127 | ((or (not (string-match url-nonrelative-link url)) |
| 127 | (= ?/ (string-to-char url))) | 128 | (= ?/ (string-to-char url))) |
| 129 | ;; This isn't correct, as a relative URL can be a fragment link | ||
| 130 | ;; (e.g. "#foo") and many other things (see section 4.2). | ||
| 131 | ;; However, let's not fix something that isn't broken, especially | ||
| 132 | ;; when close to a release. | ||
| 128 | (let ((retval (make-vector 9 nil))) | 133 | (let ((retval (make-vector 9 nil))) |
| 129 | (url-set-filename retval url) | 134 | (url-set-filename retval url) |
| 130 | (url-set-full retval nil) | 135 | (url-set-full retval nil) |
| @@ -148,6 +153,8 @@ Format is: | |||
| 148 | (insert url) | 153 | (insert url) |
| 149 | (goto-char (point-min)) | 154 | (goto-char (point-min)) |
| 150 | (setq save-pos (point)) | 155 | (setq save-pos (point)) |
| 156 | |||
| 157 | ;; 3.1. Scheme | ||
| 151 | (if (not (looking-at "//")) | 158 | (if (not (looking-at "//")) |
| 152 | (progn | 159 | (progn |
| 153 | (skip-chars-forward "a-zA-Z+.\\-") | 160 | (skip-chars-forward "a-zA-Z+.\\-") |
| @@ -156,13 +163,13 @@ Format is: | |||
| 156 | (skip-chars-forward ":") | 163 | (skip-chars-forward ":") |
| 157 | (setq save-pos (point)))) | 164 | (setq save-pos (point)))) |
| 158 | 165 | ||
| 159 | ;; We are doing a fully specified URL, with hostname and all | 166 | ;; 3.2. Authority |
| 160 | (if (looking-at "//") | 167 | (if (looking-at "//") |
| 161 | (progn | 168 | (progn |
| 162 | (setq full t) | 169 | (setq full t) |
| 163 | (forward-char 2) | 170 | (forward-char 2) |
| 164 | (setq save-pos (point)) | 171 | (setq save-pos (point)) |
| 165 | (skip-chars-forward "^/") | 172 | (skip-chars-forward "^/\\?#") |
| 166 | (setq host (buffer-substring save-pos (point))) | 173 | (setq host (buffer-substring save-pos (point))) |
| 167 | (if (string-match "^\\([^@]+\\)@" host) | 174 | (if (string-match "^\\([^@]+\\)@" host) |
| 168 | (setq user (match-string 1 host) | 175 | (setq user (match-string 1 host) |
| @@ -170,6 +177,7 @@ Format is: | |||
| 170 | (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) | 177 | (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) |
| 171 | (setq pass (match-string 2 user) | 178 | (setq pass (match-string 2 user) |
| 172 | user (match-string 1 user))) | 179 | user (match-string 1 user))) |
| 180 | ;; This gives wrong results for IPv6 literal addresses. | ||
| 173 | (if (string-match ":\\([0-9+]+\\)" host) | 181 | (if (string-match ":\\([0-9+]+\\)" host) |
| 174 | (setq port (string-to-number (match-string 1 host)) | 182 | (setq port (string-to-number (match-string 1 host)) |
| 175 | host (substring host 0 (match-beginning 0)))) | 183 | host (substring host 0 (match-beginning 0)))) |
| @@ -181,29 +189,26 @@ Format is: | |||
| 181 | (if (not port) | 189 | (if (not port) |
| 182 | (setq port (url-scheme-get-property prot 'default-port))) | 190 | (setq port (url-scheme-get-property prot 'default-port))) |
| 183 | 191 | ||
| 184 | ;; Gross hack to preserve ';' in data URLs | 192 | ;; 3.3. Path |
| 185 | |||
| 186 | (setq save-pos (point)) | 193 | (setq save-pos (point)) |
| 194 | (skip-chars-forward "^#?") | ||
| 195 | (setq file (buffer-substring save-pos (point))) | ||
| 187 | 196 | ||
| 188 | (if (string= "data" prot) | 197 | ;; 3.4. Query |
| 189 | (goto-char (point-max)) | 198 | (when (looking-at "\\?") |
| 190 | ;; Now check for references | 199 | (forward-char 1) |
| 200 | (setq save-pos (point)) | ||
| 191 | (skip-chars-forward "^#") | 201 | (skip-chars-forward "^#") |
| 192 | (if (eobp) | 202 | ;; RFC 3986 specifies no general way of parsing the query |
| 193 | nil | 203 | ;; string, but `url-parse-args' seems universal enough. |
| 194 | (delete-region | 204 | (setq attr (url-parse-args (buffer-substring save-pos (point)) t) |
| 195 | (point) | 205 | attr (nreverse attr))) |
| 196 | (progn | 206 | |
| 197 | (skip-chars-forward "#") | 207 | ;; 3.5. Fragment |
| 198 | (setq refs (buffer-substring (point) (point-max))) | 208 | (when (looking-at "#") |
| 199 | (point-max)))) | 209 | (forward-char 1) |
| 200 | (goto-char save-pos) | 210 | (setq refs (buffer-substring (point) (point-max)))) |
| 201 | (skip-chars-forward "^;") | ||
| 202 | (if (not (eobp)) | ||
| 203 | (setq attr (url-parse-args (buffer-substring (point) (point-max)) t) | ||
| 204 | attr (nreverse attr)))) | ||
| 205 | 211 | ||
| 206 | (setq file (buffer-substring save-pos (point))) | ||
| 207 | (if (and host (string-match "%[0-9][0-9]" host)) | 212 | (if (and host (string-match "%[0-9][0-9]" host)) |
| 208 | (setq host (url-unhex-string host))) | 213 | (setq host (url-unhex-string host))) |
| 209 | (vector prot user pass host port file refs attr full)))))) | 214 | (vector prot user pass host port file refs attr full)))))) |