aboutsummaryrefslogtreecommitdiffstats
path: root/src/amazons3/S3.py
diff options
context:
space:
mode:
authorCraig Jackson2011-12-13 14:46:59 -0700
committerCraig Jackson2011-12-13 14:46:59 -0700
commit647c078f0c801eb9fa63dfc7ba3c017b30e4d467 (patch)
treea13b16dbd41bb39269c7649fcc2f2ad77137500a /src/amazons3/S3.py
parentdb95070a37f70a00f80a1a2a9ba732c55f6bee1b (diff)
downloadamazons3-py-647c078f0c801eb9fa63dfc7ba3c017b30e4d467.tar.gz
amazons3-py-647c078f0c801eb9fa63dfc7ba3c017b30e4d467.zip
Made the project available in pypi. Involved moving files and
initializing a setup.py.
Diffstat (limited to 'src/amazons3/S3.py')
-rw-r--r--src/amazons3/S3.py633
1 files changed, 633 insertions, 0 deletions
diff --git a/src/amazons3/S3.py b/src/amazons3/S3.py
new file mode 100644
index 0000000..0999f27
--- /dev/null
+++ b/src/amazons3/S3.py
@@ -0,0 +1,633 @@
1#!/usr/bin/env python
2
3# This software code is made available "AS IS" without warranties of any
4# kind. You may copy, display, modify and redistribute the software
5# code either by itself or as incorporated into your code; provided that
6# you do not remove any proprietary notices. Your use of this software
7# code is at your own risk and you waive any claim against Amazon
8# Digital Services, Inc. or its affiliates with respect to your use of
9# this software code. (c) 2006-2007 Amazon Digital Services, Inc. or its
10# affiliates.
11
12import base64
13import hmac
14import httplib
15import re
16from hashlib import sha1
17import sys
18import time
19import urllib
20import urlparse
21import xml.sax
22
23DEFAULT_HOST = 's3.amazonaws.com'
24PORTS_BY_SECURITY = { True: 443, False: 80 }
25METADATA_PREFIX = 'x-amz-meta-'
26AMAZON_HEADER_PREFIX = 'x-amz-'
27
28# generates the aws canonical string for the given parameters
29def canonical_string(method, bucket="", key="", query_args={}, headers={}, expires=None):
30 interesting_headers = {}
31 for header_key in headers:
32 lk = header_key.lower()
33 if lk in ['content-md5', 'content-type', 'date'] or lk.startswith(AMAZON_HEADER_PREFIX):
34 interesting_headers[lk] = headers[header_key].strip()
35
36 # these keys get empty strings if they don't exist
37 if not interesting_headers.has_key('content-type'):
38 interesting_headers['content-type'] = ''
39 if not interesting_headers.has_key('content-md5'):
40 interesting_headers['content-md5'] = ''
41
42 # just in case someone used this. it's not necessary in this lib.
43 if interesting_headers.has_key('x-amz-date'):
44 interesting_headers['date'] = ''
45
46 # if you're using expires for query string auth, then it trumps date
47 # (and x-amz-date)
48 if expires:
49 interesting_headers['date'] = str(expires)
50
51 sorted_header_keys = interesting_headers.keys()
52 sorted_header_keys.sort()
53
54 buf = "%s\n" % method
55 for header_key in sorted_header_keys:
56 if header_key.startswith(AMAZON_HEADER_PREFIX):
57 buf += "%s:%s\n" % (header_key, interesting_headers[header_key])
58 else:
59 buf += "%s\n" % interesting_headers[header_key]
60
61 # append the bucket if it exists
62 if bucket != "":
63 buf += "/%s" % bucket
64
65 # add the key. even if it doesn't exist, add the slash
66 buf += "/%s" % urllib.quote_plus(key.encode('utf-8'))
67
68 # handle special query string arguments
69
70 if query_args.has_key("acl"):
71 buf += "?acl"
72 elif query_args.has_key("torrent"):
73 buf += "?torrent"
74 elif query_args.has_key("logging"):
75 buf += "?logging"
76 elif query_args.has_key("location"):
77 buf += "?location"
78
79 return buf
80
81# computes the base64'ed hmac-sha hash of the canonical string and the secret
82# access key, optionally urlencoding the result
83def encode(aws_secret_access_key, str, urlencode=False):
84 b64_hmac = base64.encodestring(hmac.new(aws_secret_access_key, str, sha1).digest()).strip()
85 if urlencode:
86 return urllib.quote_plus(b64_hmac)
87 else:
88 return b64_hmac
89
90def merge_meta(headers, metadata):
91 final_headers = headers.copy()
92 for k in metadata.keys():
93 final_headers[METADATA_PREFIX + k] = metadata[k]
94
95 return final_headers
96
97# builds the query arg string
98def query_args_hash_to_string(query_args):
99 query_string = ""
100 pairs = []
101 for k, v in query_args.items():
102 piece = k
103 if v != None:
104 piece += "=%s" % urllib.quote_plus(str(v).encode('utf-8'))
105 pairs.append(piece)
106
107 return '&'.join(pairs)
108
109
110class CallingFormat:
111 PATH = 1
112 SUBDOMAIN = 2
113 VANITY = 3
114
115 def build_url_base(protocol, server, port, bucket, calling_format):
116 url_base = '%s://' % protocol
117
118 if bucket == '':
119 url_base += server
120 elif calling_format == CallingFormat.SUBDOMAIN:
121 url_base += "%s.%s" % (bucket, server)
122 elif calling_format == CallingFormat.VANITY:
123 url_base += bucket
124 else:
125 url_base += server
126
127 url_base += ":%s" % port
128
129 if (bucket != '') and (calling_format == CallingFormat.PATH):
130 url_base += "/%s" % bucket
131
132 return url_base
133
134 build_url_base = staticmethod(build_url_base)
135
136
137
138class Location:
139 DEFAULT = None
140 EU = 'EU'
141
142
143
144class AWSAuthConnection:
145 def __init__(self, aws_access_key_id, aws_secret_access_key, is_secure=True,
146 server=DEFAULT_HOST, port=None, calling_format=CallingFormat.SUBDOMAIN):
147
148 if not port:
149 port = PORTS_BY_SECURITY[is_secure]
150
151 self.aws_access_key_id = aws_access_key_id
152 self.aws_secret_access_key = aws_secret_access_key
153 self.is_secure = is_secure
154 self.server = server
155 self.port = port
156 self.calling_format = calling_format
157
158 def create_bucket(self, bucket, headers={}):
159 return Response(self._make_request('PUT', bucket, '', {}, headers))
160
161 def create_located_bucket(self, bucket, location=Location.DEFAULT, headers={}):
162 if location == Location.DEFAULT:
163 body = ""
164 else:
165 body = "<CreateBucketConstraint><LocationConstraint>" + \
166 location + \
167 "</LocationConstraint></CreateBucketConstraint>"
168 return Response(self._make_request('PUT', bucket, '', {}, headers, body))
169
170 def check_bucket_exists(self, bucket):
171 return self._make_request('HEAD', bucket, '', {}, {})
172
173 def list_bucket(self, bucket, options={}, headers={}):
174 return ListBucketResponse(self._make_request('GET', bucket, '', options, headers))
175
176 def delete_bucket(self, bucket, headers={}):
177 return Response(self._make_request('DELETE', bucket, '', {}, headers))
178
179 def put(self, bucket, key, object, headers={}):
180 if not isinstance(object, S3Object):
181 object = S3Object(object)
182
183 return Response(
184 self._make_request(
185 'PUT',
186 bucket,
187 key,
188 {},
189 headers,
190 object.data,
191 object.metadata))
192
193 def get(self, bucket, key, headers={}):
194 return GetResponse(
195 self._make_request('GET', bucket, key, {}, headers))
196
197 def delete(self, bucket, key, headers={}):
198 return Response(
199 self._make_request('DELETE', bucket, key, {}, headers))
200
201 def get_bucket_logging(self, bucket, headers={}):
202 return GetResponse(self._make_request('GET', bucket, '', { 'logging': None }, headers))
203
204 def put_bucket_logging(self, bucket, logging_xml_doc, headers={}):
205 return Response(self._make_request('PUT', bucket, '', { 'logging': None }, headers, logging_xml_doc))
206
207 def get_bucket_acl(self, bucket, headers={}):
208 return self.get_acl(bucket, '', headers)
209
210 def get_acl(self, bucket, key, headers={}):
211 return GetResponse(
212 self._make_request('GET', bucket, key, { 'acl': None }, headers))
213
214 def put_bucket_acl(self, bucket, acl_xml_document, headers={}):
215 return self.put_acl(bucket, '', acl_xml_document, headers)
216
217 def put_acl(self, bucket, key, acl_xml_document, headers={}):
218 return Response(
219 self._make_request(
220 'PUT',
221 bucket,
222 key,
223 { 'acl': None },
224 headers,
225 acl_xml_document))
226
227 def list_all_my_buckets(self, headers={}):
228 return ListAllMyBucketsResponse(self._make_request('GET', '', '', {}, headers))
229
230 def get_bucket_location(self, bucket):
231 return LocationResponse(self._make_request('GET', bucket, '', {'location' : None}))
232
233 # end public methods
234
235 def _make_request(self, method, bucket='', key='', query_args={}, headers={}, data='', metadata={}):
236
237 server = ''
238 if bucket == '':
239 server = self.server
240 elif self.calling_format == CallingFormat.SUBDOMAIN:
241 server = "%s.%s" % (bucket, self.server)
242 elif self.calling_format == CallingFormat.VANITY:
243 server = bucket
244 else:
245 server = self.server
246
247 path = ''
248
249 if (bucket != '') and (self.calling_format == CallingFormat.PATH):
250 path += "/%s" % bucket
251
252 # add the slash after the bucket regardless
253 # the key will be appended if it is non-empty
254 path += "/%s" % urllib.quote_plus(key.encode('utf-8'))
255
256
257 # build the path_argument string
258 # add the ? in all cases since
259 # signature and credentials follow path args
260 if len(query_args):
261 path += "?" + query_args_hash_to_string(query_args)
262
263 is_secure = self.is_secure
264 host = "%s:%d" % (server, self.port)
265 while True:
266 if (is_secure):
267 connection = httplib.HTTPSConnection(host)
268 else:
269 connection = httplib.HTTPConnection(host)
270
271 final_headers = merge_meta(headers, metadata)
272 # add auth header
273 self._add_aws_auth_header(final_headers, method, bucket, key, query_args)
274
275 try:
276 connection.request(method, path, data, final_headers)
277 except:
278 # Try try again
279 connection.close()
280 continue
281
282 try:
283 resp = connection.getresponse()
284 except:
285 # Sometimes the connection is reset by peer. If that happens
286 # just try it again and we'll see what happens.
287 connection.close()
288 continue
289
290 if resp.status < 300 or resp.status >= 400:
291 return resp
292 # handle redirect
293 location = resp.getheader('location')
294 if not location:
295 return resp
296 # (close connection)
297 resp.read()
298 scheme, host, path, params, query, fragment \
299 = urlparse.urlparse(location)
300 if scheme == "http":
301 is_secure = True
302 elif scheme == "https": is_secure = False
303 else: raise invalidURL("Not http/https: " + location)
304 if query:
305 path += "?" + query
306 # retry with redirect
307 connection.close()
308
309 def _add_aws_auth_header(self, headers, method, bucket, key, query_args):
310 if not headers.has_key('Date'):
311 headers['Date'] = time.strftime("%a, %d %b %Y %X GMT", time.gmtime())
312
313 c_string = canonical_string(method, bucket, key, query_args, headers)
314 headers['Authorization'] = \
315 "AWS %s:%s" % (self.aws_access_key_id, encode(self.aws_secret_access_key, c_string))
316
317
318class QueryStringAuthGenerator:
319 # by default, expire in 1 minute
320 DEFAULT_EXPIRES_IN = 60
321
322 def __init__(self, aws_access_key_id, aws_secret_access_key, is_secure=True,
323 server=DEFAULT_HOST, port=None, calling_format=CallingFormat.SUBDOMAIN):
324
325 if not port:
326 port = PORTS_BY_SECURITY[is_secure]
327
328 self.aws_access_key_id = aws_access_key_id
329 self.aws_secret_access_key = aws_secret_access_key
330 if (is_secure):
331 self.protocol = 'https'
332 else:
333 self.protocol = 'http'
334
335 self.is_secure = is_secure
336 self.server = server
337 self.port = port
338 self.calling_format = calling_format
339 self.__expires_in = QueryStringAuthGenerator.DEFAULT_EXPIRES_IN
340 self.__expires = None
341
342 # for backwards compatibility with older versions
343 self.server_name = "%s:%s" % (self.server, self.port)
344
345 def set_expires_in(self, expires_in):
346 self.__expires_in = expires_in
347 self.__expires = None
348
349 def set_expires(self, expires):
350 self.__expires = expires
351 self.__expires_in = None
352
353 def create_bucket(self, bucket, headers={}):
354 return self.generate_url('PUT', bucket, '', {}, headers)
355
356 def list_bucket(self, bucket, options={}, headers={}):
357 return self.generate_url('GET', bucket, '', options, headers)
358
359 def delete_bucket(self, bucket, headers={}):
360 return self.generate_url('DELETE', bucket, '', {}, headers)
361
362 def put(self, bucket, key, object, headers={}):
363 if not isinstance(object, S3Object):
364 object = S3Object(object)
365
366 return self.generate_url(
367 'PUT',
368 bucket,
369 key,
370 {},
371 merge_meta(headers, object.metadata))
372
373 def get(self, bucket, key, headers={}):
374 return self.generate_url('GET', bucket, key, {}, headers)
375
376 def delete(self, bucket, key, headers={}):
377 return self.generate_url('DELETE', bucket, key, {}, headers)
378
379 def get_bucket_logging(self, bucket, headers={}):
380 return self.generate_url('GET', bucket, '', { 'logging': None }, headers)
381
382 def put_bucket_logging(self, bucket, logging_xml_doc, headers={}):
383 return self.generate_url('PUT', bucket, '', { 'logging': None }, headers)
384
385 def get_bucket_acl(self, bucket, headers={}):
386 return self.get_acl(bucket, '', headers)
387
388 def get_acl(self, bucket, key='', headers={}):
389 return self.generate_url('GET', bucket, key, { 'acl': None }, headers)
390
391 def put_bucket_acl(self, bucket, acl_xml_document, headers={}):
392 return self.put_acl(bucket, '', acl_xml_document, headers)
393
394 # don't really care what the doc is here.
395 def put_acl(self, bucket, key, acl_xml_document, headers={}):
396 return self.generate_url('PUT', bucket, key, { 'acl': None }, headers)
397
398 def list_all_my_buckets(self, headers={}):
399 return self.generate_url('GET', '', '', {}, headers)
400
401 def make_bare_url(self, bucket, key=''):
402 full_url = self.generate_url(self, bucket, key)
403 return full_url[:full_url.index('?')]
404
405 def generate_url(self, method, bucket='', key='', query_args={}, headers={}):
406 expires = 0
407 if self.__expires_in != None:
408 expires = int(time.time() + self.__expires_in)
409 elif self.__expires != None:
410 expires = int(self.__expires)
411 else:
412 raise "Invalid expires state"
413
414 canonical_str = canonical_string(method, bucket, key, query_args, headers, expires)
415 encoded_canonical = encode(self.aws_secret_access_key, canonical_str)
416
417 url = CallingFormat.build_url_base(self.protocol, self.server, self.port, bucket, self.calling_format)
418
419 url += "/%s" % urllib.quote_plus(key.encode('utf-8'))
420
421 query_args['Signature'] = encoded_canonical
422 query_args['Expires'] = expires
423 query_args['AWSAccessKeyId'] = self.aws_access_key_id
424
425 url += "?%s" % query_args_hash_to_string(query_args)
426
427 return url
428
429
430class S3Object:
431 def __init__(self, data, metadata={}):
432 self.data = data
433 self.metadata = metadata
434
435class Owner:
436 def __init__(self, id='', display_name=''):
437 self.id = id
438 self.display_name = display_name
439
440class ListEntry:
441 def __init__(self, key='', last_modified=None, etag='', size=0, storage_class='', owner=None):
442 self.key = key
443 self.last_modified = last_modified
444 self.etag = etag
445 self.size = size
446 self.storage_class = storage_class
447 self.owner = owner
448
449class CommonPrefixEntry:
450 def __init(self, prefix=''):
451 self.prefix = prefix
452
453class Bucket:
454 def __init__(self, name='', creation_date=''):
455 self.name = name
456 self.creation_date = creation_date
457
458class Response:
459 def __init__(self, http_response):
460 self.http_response = http_response
461 # you have to do this read, even if you don't expect a body.
462 # otherwise, the next request fails.
463 self.body = http_response.read()
464 if http_response.status >= 300 and self.body:
465 self.message = self.body
466 else:
467 self.message = "%03d %s" % (http_response.status, http_response.reason)
468
469
470
471class ListBucketResponse(Response):
472 def __init__(self, http_response):
473 Response.__init__(self, http_response)
474 if http_response.status < 300:
475 handler = ListBucketHandler()
476 xml.sax.parseString(self.body, handler)
477 self.entries = handler.entries
478 self.common_prefixes = handler.common_prefixes
479 self.name = handler.name
480 self.marker = handler.marker
481 self.prefix = handler.prefix
482 self.is_truncated = handler.is_truncated
483 self.delimiter = handler.delimiter
484 self.max_keys = handler.max_keys
485 self.next_marker = handler.next_marker
486 else:
487 self.entries = []
488
489class ListAllMyBucketsResponse(Response):
490 def __init__(self, http_response):
491 Response.__init__(self, http_response)
492 if http_response.status < 300:
493 handler = ListAllMyBucketsHandler()
494 xml.sax.parseString(self.body, handler)
495 self.entries = handler.entries
496 else:
497 self.entries = []
498
499class GetResponse(Response):
500 def __init__(self, http_response):
501 Response.__init__(self, http_response)
502 response_headers = http_response.msg # older pythons don't have getheaders
503 metadata = self.get_aws_metadata(response_headers)
504 self.object = S3Object(self.body, metadata)
505
506 def get_aws_metadata(self, headers):
507 metadata = {}
508 for hkey in headers.keys():
509 if hkey.lower().startswith(METADATA_PREFIX):
510 metadata[hkey[len(METADATA_PREFIX):]] = headers[hkey]
511 del headers[hkey]
512
513 return metadata
514
515class LocationResponse(Response):
516 def __init__(self, http_response):
517 Response.__init__(self, http_response)
518 if http_response.status < 300:
519 handler = LocationHandler()
520 xml.sax.parseString(self.body, handler)
521 self.location = handler.location
522
523class ListBucketHandler(xml.sax.ContentHandler):
524 def __init__(self):
525 self.entries = []
526 self.curr_entry = None
527 self.curr_text = ''
528 self.common_prefixes = []
529 self.curr_common_prefix = None
530 self.name = ''
531 self.marker = ''
532 self.prefix = ''
533 self.is_truncated = False
534 self.delimiter = ''
535 self.max_keys = 0
536 self.next_marker = ''
537 self.is_echoed_prefix_set = False
538
539 def startElement(self, name, attrs):
540 if name == 'Contents':
541 self.curr_entry = ListEntry()
542 elif name == 'Owner':
543 self.curr_entry.owner = Owner()
544 elif name == 'CommonPrefixes':
545 self.curr_common_prefix = CommonPrefixEntry()
546
547
548 def endElement(self, name):
549 if name == 'Contents':
550 self.entries.append(self.curr_entry)
551 elif name == 'CommonPrefixes':
552 self.common_prefixes.append(self.curr_common_prefix)
553 elif name == 'Key':
554 self.curr_entry.key = self.curr_text
555 elif name == 'LastModified':
556 self.curr_entry.last_modified = self.curr_text
557 elif name == 'ETag':
558 self.curr_entry.etag = self.curr_text
559 elif name == 'Size':
560 self.curr_entry.size = int(self.curr_text)
561 elif name == 'ID':
562 self.curr_entry.owner.id = self.curr_text
563 elif name == 'DisplayName':
564 self.curr_entry.owner.display_name = self.curr_text
565 elif name == 'StorageClass':
566 self.curr_entry.storage_class = self.curr_text
567 elif name == 'Name':
568 self.name = self.curr_text
569 elif name == 'Prefix' and self.is_echoed_prefix_set:
570 self.curr_common_prefix.prefix = self.curr_text
571 elif name == 'Prefix':
572 self.prefix = self.curr_text
573 self.is_echoed_prefix_set = True
574 elif name == 'Marker':
575 self.marker = self.curr_text
576 elif name == 'IsTruncated':
577 self.is_truncated = self.curr_text == 'true'
578 elif name == 'Delimiter':
579 self.delimiter = self.curr_text
580 elif name == 'MaxKeys':
581 self.max_keys = int(self.curr_text)
582 elif name == 'NextMarker':
583 self.next_marker = self.curr_text
584
585 self.curr_text = ''
586
587 def characters(self, content):
588 self.curr_text += content
589
590
591class ListAllMyBucketsHandler(xml.sax.ContentHandler):
592 def __init__(self):
593 self.entries = []
594 self.curr_entry = None
595 self.curr_text = ''
596
597 def startElement(self, name, attrs):
598 if name == 'Bucket':
599 self.curr_entry = Bucket()
600
601 def endElement(self, name):
602 if name == 'Name':
603 self.curr_entry.name = self.curr_text
604 elif name == 'CreationDate':
605 self.curr_entry.creation_date = self.curr_text
606 elif name == 'Bucket':
607 self.entries.append(self.curr_entry)
608
609 def characters(self, content):
610 self.curr_text = content
611
612
613class LocationHandler(xml.sax.ContentHandler):
614 def __init__(self):
615 self.location = None
616 self.state = 'init'
617
618 def startElement(self, name, attrs):
619 if self.state == 'init':
620 if name == 'LocationConstraint':
621 self.state = 'tag_location'
622 self.location = ''
623 else: self.state = 'bad'
624 else: self.state = 'bad'
625
626 def endElement(self, name):
627 if self.state == 'tag_location' and name == 'LocationConstraint':
628 self.state = 'done'
629 else: self.state = 'bad'
630
631 def characters(self, content):
632 if self.state == 'tag_location':
633 self.location += content