diff options
| -rw-r--r-- | lispref/processes.texi | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/lispref/processes.texi b/lispref/processes.texi index 07a72886355..f86a844a876 100644 --- a/lispref/processes.texi +++ b/lispref/processes.texi | |||
| @@ -52,6 +52,7 @@ This function returns @code{t} if @var{object} is a process, | |||
| 52 | * Datagrams:: UDP network connections. | 52 | * Datagrams:: UDP network connections. |
| 53 | * Low-Level Network:: Lower-level but more general function | 53 | * Low-Level Network:: Lower-level but more general function |
| 54 | to create connections and servers. | 54 | to create connections and servers. |
| 55 | * Byte Packing:: Using bindat to pack and unpack binary data. | ||
| 55 | @end menu | 56 | @end menu |
| 56 | 57 | ||
| 57 | @node Subprocess Creation | 58 | @node Subprocess Creation |
| @@ -2015,6 +2016,407 @@ That particular network option is supported by | |||
| 2015 | @code{make-network-process} and @code{set-network-process-option}. | 2016 | @code{make-network-process} and @code{set-network-process-option}. |
| 2016 | @end table | 2017 | @end table |
| 2017 | 2018 | ||
| 2019 | @node Byte Packing | ||
| 2020 | @section Packing and Unpacking Byte Arrays | ||
| 2021 | |||
| 2022 | This section describes how to pack and unpack arrays of bytes, | ||
| 2023 | usually for binary network protocols. These functoins byte arrays to | ||
| 2024 | alists, and vice versa. The byte array can be represented as a | ||
| 2025 | unibyte string or as a vector of integers, while the alist associates | ||
| 2026 | symbols either with fixed-size objects or with recursive sub-alists. | ||
| 2027 | |||
| 2028 | @cindex serializing | ||
| 2029 | @cindex deserializing | ||
| 2030 | @cindex packing | ||
| 2031 | @cindex unpacking | ||
| 2032 | Conversion from byte arrays to nested alists is also known as | ||
| 2033 | @dfn{deserializing} or @dfn{unpacking}, while going in the opposite | ||
| 2034 | direction is also known as @dfn{serializing} or @dfn{packing}. | ||
| 2035 | |||
| 2036 | @menu | ||
| 2037 | * Bindat Spec:: Describing data layout. | ||
| 2038 | * Bindat Functions:: Doing the unpacking and packing. | ||
| 2039 | * Bindat Examples:: Samples of what bindat.el can do for you! | ||
| 2040 | @end menu | ||
| 2041 | |||
| 2042 | @node Bindat Spec | ||
| 2043 | @subsection Describing Data Layout | ||
| 2044 | |||
| 2045 | To control unpacking and packing, you write a @dfn{data layout | ||
| 2046 | specification}, a special nested list describing named and typed | ||
| 2047 | @dfn{fields}. This specification conrtols length of each field to be | ||
| 2048 | processed, and how to pack or unpack it. | ||
| 2049 | |||
| 2050 | @cindex endianness | ||
| 2051 | @cindex big endian | ||
| 2052 | @cindex little endian | ||
| 2053 | @cindex network byte ordering | ||
| 2054 | A field's @dfn{type} describes the size (in bytes) of the object | ||
| 2055 | that the field represents and, in the case of multibyte fields, how | ||
| 2056 | the bytes are ordered within the firld. The two possible orderings | ||
| 2057 | are ``big endian'' (also known as ``network byte ordering'') and | ||
| 2058 | ``little endian''. For instance, the number @code{#x23cd} (decimal | ||
| 2059 | 9165) in big endian would be the two bytes @code{#x23} @code{#xcd}; | ||
| 2060 | and in little endian, @code{#xcd} @code{#x23}. Here are the possible | ||
| 2061 | type values: | ||
| 2062 | |||
| 2063 | @table @code | ||
| 2064 | @item u8 | ||
| 2065 | @itemx byte | ||
| 2066 | Unsigned byte, with length 1. | ||
| 2067 | |||
| 2068 | @item u16 | ||
| 2069 | @itemx word | ||
| 2070 | @itemx short | ||
| 2071 | Unsigned integer in network byte order, with length 2. | ||
| 2072 | |||
| 2073 | @item u24 | ||
| 2074 | Unsigned integer in network byte order, with length 3. | ||
| 2075 | |||
| 2076 | @item u32 | ||
| 2077 | @itemx dword | ||
| 2078 | @itemx long | ||
| 2079 | Unsigned integer in network byte order, with length 4. | ||
| 2080 | Note: These values may be limited by Emacs' integer implementation limits. | ||
| 2081 | |||
| 2082 | @item u16r | ||
| 2083 | @itemx u24r | ||
| 2084 | @itemx u32r | ||
| 2085 | Unsigned integer in little endian order, with length 2, 3 and 4, respectively. | ||
| 2086 | |||
| 2087 | @item str @var{len} | ||
| 2088 | String of length @var{len}. | ||
| 2089 | |||
| 2090 | @item strz @var{len} | ||
| 2091 | Zero-terminated string of length @var{len}. | ||
| 2092 | |||
| 2093 | @item vec @var{len} | ||
| 2094 | Vector of @var{len} bytes. | ||
| 2095 | |||
| 2096 | @item ip | ||
| 2097 | Four-byte vector representing an Internet address. For example: | ||
| 2098 | @code{[127 0 0 1]} for localhost. | ||
| 2099 | |||
| 2100 | @item bits @var{len} | ||
| 2101 | List of set bits in @var{len} bytes. The bytes are taken in big | ||
| 2102 | endian order and the bits are numbered starting with @code{8 * | ||
| 2103 | @var{len} @minus{} 1}} and ending with zero. For example: @code{bits | ||
| 2104 | 2} unpacks @code{#x28} @code{#x1c} to @code{(2 3 4 11 13)} and | ||
| 2105 | @code{#x1c} @code{#x28} to @code{(3 5 10 11 12)}. | ||
| 2106 | |||
| 2107 | @item (eval @var{form}) | ||
| 2108 | @var{form} is a Lisp expression evaluated at the moment the field is | ||
| 2109 | unpacked or packed. The result of the evaluation should be one of the | ||
| 2110 | above-listed type specifications. | ||
| 2111 | @end table | ||
| 2112 | |||
| 2113 | A field specification generally has the form @code{([@var{name}] | ||
| 2114 | @var{handler})}. The square braces indicate that @var{name} is | ||
| 2115 | optional. (Don't use names that are symbols meaningful as type | ||
| 2116 | specifications (above) or handler specifications (below), since that | ||
| 2117 | would be ambiguous.) @var{name} can be a symbol or the expression | ||
| 2118 | @code{(eval @var{form})}, in which case @var{form} should evaluate to | ||
| 2119 | a symbol. | ||
| 2120 | |||
| 2121 | @var{handler} describes how to unpack or pack the field and can be one | ||
| 2122 | of the following: | ||
| 2123 | |||
| 2124 | @table @code | ||
| 2125 | @item @var{type} | ||
| 2126 | Unpack/pack this field according to the type specification @var{type}. | ||
| 2127 | |||
| 2128 | @item eval @var{form} | ||
| 2129 | Evaluate @var{form}, a Lisp expression, for side-effect only. If the | ||
| 2130 | field name is specified, the value is bound to that field name. | ||
| 2131 | @var{form} can access and update these dynamically bound variables: | ||
| 2132 | |||
| 2133 | @table @code | ||
| 2134 | @item raw-data | ||
| 2135 | The data as a byte array. | ||
| 2136 | |||
| 2137 | @item pos | ||
| 2138 | Current position of the unpacking or packing operation. | ||
| 2139 | |||
| 2140 | @item struct | ||
| 2141 | Alist. | ||
| 2142 | |||
| 2143 | @item last | ||
| 2144 | Value of the last field processed. | ||
| 2145 | @end table | ||
| 2146 | |||
| 2147 | @item fill @var{len} | ||
| 2148 | Skip @var{len} bytes. In packing, this leaves them unchanged, | ||
| 2149 | which normally means they remain zero. In unpacking, this means | ||
| 2150 | they are ignored. | ||
| 2151 | |||
| 2152 | @item align @var{len} | ||
| 2153 | Skip to the next multiple of @var{len} bytes. | ||
| 2154 | |||
| 2155 | @item struct @var{spec-name} | ||
| 2156 | Process @var{spec-name} as a sub-specification. This descrobes a | ||
| 2157 | structure nested within another structure. | ||
| 2158 | |||
| 2159 | @item union @var{form} (@var{tag} @var{spec})@dots{} | ||
| 2160 | @c ??? I don't see how one would actually use this. | ||
| 2161 | @c ??? what kind of expression would be useful for @var{form}? | ||
| 2162 | Evaluate @var{form}, a Lisp expression, find the first @var{tag} | ||
| 2163 | that matches it, and process its associated data layout specification | ||
| 2164 | @var{spec}. Matching can occur in one of three ways: | ||
| 2165 | |||
| 2166 | @itemize | ||
| 2167 | @item | ||
| 2168 | If a @var{tag} has the form @code{(eval @var{expr})}, evaluate | ||
| 2169 | @var{expr} with the variable @code{tag} dynamically bound to the value | ||
| 2170 | of @var{form}. A non-@code{nil} result indicates a match. | ||
| 2171 | |||
| 2172 | @item | ||
| 2173 | @var{tag} matches if it is @code{equal} to the value of @var{form}. | ||
| 2174 | |||
| 2175 | @item | ||
| 2176 | @var{tag} matches unconditionally if it is @code{t}. | ||
| 2177 | @end itemize | ||
| 2178 | |||
| 2179 | @item repeat @var{count} @var{field-spec}@dots{} | ||
| 2180 | @var{count} may be an integer, or a list of one element naming a | ||
| 2181 | previous field. For correct operation, each @var{field-spec} must | ||
| 2182 | include a name. | ||
| 2183 | @c ??? What does it MEAN? | ||
| 2184 | @end table | ||
| 2185 | |||
| 2186 | @node Bindat Functions | ||
| 2187 | @subsection Functions to Unpack and Pack Bytes | ||
| 2188 | |||
| 2189 | In the following documentation, @var{spec} refers to a data layout | ||
| 2190 | specification, @code{raw-data} to a byte array, and @var{struct} to an | ||
| 2191 | alist representing unpacked field data. | ||
| 2192 | |||
| 2193 | @defun bindat-unpack spec raw-data &optional pos | ||
| 2194 | This function unpacks data from the byte array @code{raw-data} | ||
| 2195 | according to @var{spec}. Normally this starts unpacking at the | ||
| 2196 | beginning of the byte array, but if @var{pos} is non-@code{nil}, it | ||
| 2197 | specifies a zero-based starting position to use instead. | ||
| 2198 | |||
| 2199 | The value is an alist or nested alist in which each element describes | ||
| 2200 | one unpacked field. | ||
| 2201 | @end defun | ||
| 2202 | |||
| 2203 | @defun bindat-get-field struct &rest name | ||
| 2204 | This function selects a field's data from the nested alist | ||
| 2205 | @var{struct}. Usually @var{struct} was returned by | ||
| 2206 | @code{bindat-unpack}. If @var{name} corresponds to just one argument, | ||
| 2207 | that means to extract a top-level field value. Multiple @var{name} | ||
| 2208 | arguments specify repeated lookup of sub-structures. An integer name | ||
| 2209 | acts as an array index. | ||
| 2210 | |||
| 2211 | For example, if @var{name} is @code{(a b 2 c)}, that means to find | ||
| 2212 | field @code{c} in the second element of subfield @code{b} of field | ||
| 2213 | @code{a}. (This corresponds to @code{struct.a.b[2].c} in C.) | ||
| 2214 | @end defun | ||
| 2215 | |||
| 2216 | @defun bindat-length spec struct | ||
| 2217 | @c ??? I don't understand this at all -- rms | ||
| 2218 | This function returns the length in bytes of @var{struct}, according | ||
| 2219 | to @var{spec}. | ||
| 2220 | @end defun | ||
| 2221 | |||
| 2222 | @defun bindat-pack spec struct &optional raw-data pos | ||
| 2223 | This function returns a byte array packed according to @var{spec} from | ||
| 2224 | the data in the alist @var{struct}. Normally it creates and fills a | ||
| 2225 | new byte array starting at the beginning. However, if @var{raw-data} | ||
| 2226 | is non-@code{nil}, it speciries a pre-allocated string or vector to | ||
| 2227 | pack into. If @var{pos} is non-@code{nil}, it specifies the starting | ||
| 2228 | offset for packing into @code{raw-data}. | ||
| 2229 | |||
| 2230 | @c ??? Isn't this a bug? Shoudn't it always be unibyte? | ||
| 2231 | Note: The result is a multibyte string; use @code{string-make-unibyte} | ||
| 2232 | on it to make it unibyte if necessary. | ||
| 2233 | @end defun | ||
| 2234 | |||
| 2235 | @defun bindat-ip-to-string ip | ||
| 2236 | Convert the Internet address vector @var{ip} to a string in the usual | ||
| 2237 | dotted notation. | ||
| 2238 | |||
| 2239 | @example | ||
| 2240 | (bindat-ip-to-string [127 0 0 1]) | ||
| 2241 | @result{} "127.0.0.1" | ||
| 2242 | @end example | ||
| 2243 | @end defun | ||
| 2244 | |||
| 2245 | @node Bindat Examples | ||
| 2246 | @subsection Examples of Byte Unpacking and Packing | ||
| 2247 | |||
| 2248 | Here is a complete example of byte unpacking and packing: | ||
| 2249 | |||
| 2250 | @lisp | ||
| 2251 | (defvar fcookie-index-spec | ||
| 2252 | '((:version u32) | ||
| 2253 | (:count u32) | ||
| 2254 | (:longest u32) | ||
| 2255 | (:shortest u32) | ||
| 2256 | (:flags u32) | ||
| 2257 | (:delim u8) | ||
| 2258 | (:ignored fill 3) | ||
| 2259 | (:offset repeat (:count) | ||
| 2260 | (:foo u32))) | ||
| 2261 | "Description of a fortune cookie index file's contents.") | ||
| 2262 | |||
| 2263 | (defun fcookie (cookies &optional index) | ||
| 2264 | "Display a random fortune cookie from file COOKIES. | ||
| 2265 | Optional second arg INDEX specifies the associated index | ||
| 2266 | filename, which is by default constructed by appending | ||
| 2267 | \".dat\" to COOKIES. Display cookie text in possibly | ||
| 2268 | new buffer \"*Fortune Cookie: BASENAME*\" where BASENAME | ||
| 2269 | is COOKIES without the directory part." | ||
| 2270 | (interactive "fCookies file: ") | ||
| 2271 | (let* ((info (with-temp-buffer | ||
| 2272 | (insert-file-contents-literally | ||
| 2273 | (or index (concat cookies ".dat"))) | ||
| 2274 | (bindat-unpack fcookie-index-spec | ||
| 2275 | (buffer-string)))) | ||
| 2276 | (sel (random (bindat-get-field info :count))) | ||
| 2277 | (beg (cdar (bindat-get-field info :offset sel))) | ||
| 2278 | (end (or (cdar (bindat-get-field info :offset (1+ sel))) | ||
| 2279 | (nth 7 (file-attributes cookies))))) | ||
| 2280 | (switch-to-buffer (get-buffer-create | ||
| 2281 | (format "*Fortune Cookie: %s*" | ||
| 2282 | (file-name-nondirectory cookies)))) | ||
| 2283 | (erase-buffer) | ||
| 2284 | (insert-file-contents-literally cookies nil beg (- end 3)))) | ||
| 2285 | |||
| 2286 | (defun fcookie-create-index (cookies &optional index delim) | ||
| 2287 | "Scan file COOKIES, and write out its index file. | ||
| 2288 | Optional second arg INDEX specifies the index filename, | ||
| 2289 | which is by default constructed by appending \".dat\" to | ||
| 2290 | COOKIES. Optional third arg DELIM specifies the unibyte | ||
| 2291 | character which, when found on a line of its own in | ||
| 2292 | COOKIES, indicates the border between entries." | ||
| 2293 | (interactive "fCookies file: ") | ||
| 2294 | (setq delim (or delim ?%)) | ||
| 2295 | (let ((delim-line (format "\n%c\n" delim)) | ||
| 2296 | (count 0) | ||
| 2297 | (max 0) | ||
| 2298 | min p q len offsets) | ||
| 2299 | (unless (= 3 (string-bytes delim-line)) | ||
| 2300 | (error "Delimiter cannot be represented in one byte")) | ||
| 2301 | (with-temp-buffer | ||
| 2302 | (insert-file-contents-literally cookies) | ||
| 2303 | (while (and (setq p (point)) | ||
| 2304 | (search-forward delim-line (point-max) t) | ||
| 2305 | (setq len (- (point) 3 p))) | ||
| 2306 | (setq count (1+ count) | ||
| 2307 | max (max max len) | ||
| 2308 | min (min (or min max) len) | ||
| 2309 | offsets (cons (1- p) offsets)))) | ||
| 2310 | (with-temp-buffer | ||
| 2311 | (set-buffer-multibyte nil) | ||
| 2312 | (insert (string-make-unibyte | ||
| 2313 | (bindat-pack | ||
| 2314 | fcookie-index-spec | ||
| 2315 | `((:version . 2) | ||
| 2316 | (:count . ,count) | ||
| 2317 | (:longest . ,max) | ||
| 2318 | (:shortest . ,min) | ||
| 2319 | (:flags . 0) | ||
| 2320 | (:delim . ,delim) | ||
| 2321 | (:offset . ,(mapcar (lambda (o) | ||
| 2322 | (list (cons :foo o))) | ||
| 2323 | (nreverse offsets))))))) | ||
| 2324 | (let ((coding-system-for-write 'raw-text-unix)) | ||
| 2325 | (write-file (or index (concat cookies ".dat"))))))) | ||
| 2326 | @end lisp | ||
| 2327 | |||
| 2328 | Following is an example of defining and unpacking a complex structure. | ||
| 2329 | Consider the following C structures: | ||
| 2330 | |||
| 2331 | @example | ||
| 2332 | struct header @{ | ||
| 2333 | unsigned long dest_ip; | ||
| 2334 | unsigned long src_ip; | ||
| 2335 | unsigned short dest_port; | ||
| 2336 | unsigned short src_port; | ||
| 2337 | @}; | ||
| 2338 | |||
| 2339 | struct data @{ | ||
| 2340 | unsigned char type; | ||
| 2341 | unsigned char opcode; | ||
| 2342 | unsigned long length; /* In little endian order */ | ||
| 2343 | unsigned char id[8]; /* nul-terminated string */ | ||
| 2344 | unsigned char data[/* (length + 3) & ~3 */]; | ||
| 2345 | @}; | ||
| 2346 | |||
| 2347 | struct packet @{ | ||
| 2348 | struct header header; | ||
| 2349 | unsigned char items; | ||
| 2350 | unsigned char filler[3]; | ||
| 2351 | struct data item[/* items */]; | ||
| 2352 | |||
| 2353 | @}; | ||
| 2354 | @end example | ||
| 2355 | |||
| 2356 | The corresponding data layout specification: | ||
| 2357 | |||
| 2358 | @lisp | ||
| 2359 | (setq header-spec | ||
| 2360 | '((dest-ip ip) | ||
| 2361 | (src-ip ip) | ||
| 2362 | (dest-port u16) | ||
| 2363 | (src-port u16))) | ||
| 2364 | |||
| 2365 | (setq data-spec | ||
| 2366 | '((type u8) | ||
| 2367 | (opcode u8) | ||
| 2368 | (length u16r) ;; little endian order | ||
| 2369 | (id strz 8) | ||
| 2370 | (data vec (length)) | ||
| 2371 | (align 4))) | ||
| 2372 | |||
| 2373 | (setq packet-spec | ||
| 2374 | '((header struct header-spec) | ||
| 2375 | (items u8) | ||
| 2376 | (fill 3) | ||
| 2377 | (item repeat (items) | ||
| 2378 | (struct data-spec)))) | ||
| 2379 | @end lisp | ||
| 2380 | |||
| 2381 | A binary data representation: | ||
| 2382 | |||
| 2383 | @lisp | ||
| 2384 | (setq binary-data | ||
| 2385 | [ 192 168 1 100 192 168 1 101 01 28 21 32 2 0 0 0 | ||
| 2386 | 2 3 5 0 ?A ?B ?C ?D ?E ?F 0 0 1 2 3 4 5 0 0 0 | ||
| 2387 | 1 4 7 0 ?B ?C ?D ?E ?F ?G 0 0 6 7 8 9 10 11 12 0 ]) | ||
| 2388 | @end lisp | ||
| 2389 | |||
| 2390 | The corresponding decoded structure: | ||
| 2391 | |||
| 2392 | @lisp | ||
| 2393 | (setq decoded-structure (bindat-unpack packet-spec binary-data)) | ||
| 2394 | @result{} | ||
| 2395 | ((header | ||
| 2396 | (dest-ip . [192 168 1 100]) | ||
| 2397 | (src-ip . [192 168 1 101]) | ||
| 2398 | (dest-port . 284) | ||
| 2399 | (src-port . 5408)) | ||
| 2400 | (items . 2) | ||
| 2401 | (item ((data . [1 2 3 4 5]) | ||
| 2402 | (id . "ABCDEF") | ||
| 2403 | (length . 5) | ||
| 2404 | (opcode . 3) | ||
| 2405 | (type . 2)) | ||
| 2406 | ((data . [6 7 8 9 10 11 12]) | ||
| 2407 | (id . "BCDEFG") | ||
| 2408 | (length . 7) | ||
| 2409 | (opcode . 4) | ||
| 2410 | (type . 1)))) | ||
| 2411 | @end lisp | ||
| 2412 | |||
| 2413 | Fetching data from this structure: | ||
| 2414 | |||
| 2415 | @lisp | ||
| 2416 | (bindat-get-field decoded-structure 'item 1 'id) | ||
| 2417 | @result{} "BCDEFG" | ||
| 2418 | @end lisp | ||
| 2419 | |||
| 2018 | @ignore | 2420 | @ignore |
| 2019 | arch-tag: ba9da253-e65f-4e7f-b727-08fba0a1df7a | 2421 | arch-tag: ba9da253-e65f-4e7f-b727-08fba0a1df7a |
| 2020 | @end ignore | 2422 | @end ignore |