#!/usr/bin/env python # This software code is made available "AS IS" without warranties of any # kind. You may copy, display, modify and redistribute the software # code either by itself or as incorporated into your code; provided that # you do not remove any proprietary notices. Your use of this software # code is at your own risk and you waive any claim against Amazon # Digital Services, Inc. or its affiliates with respect to your use of # this software code. (c) 2006 Amazon Digital Services, Inc. or its # affiliates. import base64 import hmac import httplib import re import sha import sys import time import urllib import xml.sax DEFAULT_HOST = 's3.amazonaws.com' PORTS_BY_SECURITY = { True: 443, False: 80 } METADATA_PREFIX = 'x-amz-meta-' AMAZON_HEADER_PREFIX = 'x-amz-' # generates the aws canonical string for the given parameters def canonical_string(method, path, headers, expires=None): interesting_headers = {} for key in headers: lk = key.lower() if lk in ['content-md5', 'content-type', 'date'] or lk.startswith(AMAZON_HEADER_PREFIX): interesting_headers[lk] = headers[key] # these keys get empty strings if they don't exist if not interesting_headers.has_key('content-type'): interesting_headers['content-type'] = '' if not interesting_headers.has_key('content-md5'): interesting_headers['content-md5'] = '' # just in case someone used this. it's not necessary in this lib. if interesting_headers.has_key('x-amz-date'): interesting_headers['date'] = '' # if you're using expires for query string auth, then it trumps date # (and x-amz-date) if expires: interesting_headers['date'] = str(expires) sorted_header_keys = interesting_headers.keys() sorted_header_keys.sort() buf = "%s\n" % method for key in sorted_header_keys: if key.startswith(AMAZON_HEADER_PREFIX): buf += "%s:%s\n" % (key, interesting_headers[key]) else: buf += "%s\n" % interesting_headers[key] # don't include anything after the first ? in the resource... buf += "/%s" % path.split('?')[0] # ...unless there is an acl or torrent parameter if re.search("[&?]acl($|=|&)", path): buf += "?acl" elif re.search("[&?]torrent($|=|&)", path): buf += "?torrent" return buf # computes the base64'ed hmac-sha hash of the canonical string and the secret # access key, optionally urlencoding the result def encode(aws_secret_access_key, str, urlencode=False): b64_hmac = base64.encodestring(hmac.new(aws_secret_access_key, str, sha).digest()).strip() if urlencode: return urllib.quote_plus(b64_hmac) else: return b64_hmac def merge_meta(headers, metadata): final_headers = headers.copy() for k in metadata.keys(): final_headers[METADATA_PREFIX + k] = metadata[k] return final_headers class AWSAuthConnection: def __init__(self, aws_access_key_id, aws_secret_access_key, is_secure=True, server=DEFAULT_HOST, port=None): if not port: port = PORTS_BY_SECURITY[is_secure] self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key if (is_secure): self.connection = httplib.HTTPSConnection("%s:%d" % (server, port)) else: self.connection = httplib.HTTPConnection("%s:%d" % (server, port)) def create_bucket(self, bucket, headers={}): return Response(self.make_request('PUT', bucket, headers)) def list_bucket(self, bucket, options={}, headers={}): path = bucket if options: path += '?' + '&'.join(["%s=%s" % (param, urllib.quote_plus(str(options[param]))) for param in options]) return ListBucketResponse(self.make_request('GET', path, headers)) def delete_bucket(self, bucket, headers={}): return Response(self.make_request('DELETE', bucket, headers)) def put(self, bucket, key, object, headers={}): if not isinstance(object, S3Object): object = S3Object(object) return Response( self.make_request( 'PUT', '%s/%s' % (bucket, urllib.quote_plus(key)), headers, object.data, object.metadata)) def get(self, bucket, key, headers={}): return GetResponse( self.make_request('GET', '%s/%s' % (bucket, urllib.quote_plus(key)), headers)) def delete(self, bucket, key, headers={}): return Response( self.make_request('DELETE', '%s/%s' % (bucket, urllib.quote_plus(key)), headers)) def get_bucket_acl(self, bucket, headers={}): return self.get_acl(bucket, '', headers) def get_acl(self, bucket, key, headers={}): return GetResponse( self.make_request('GET', '%s/%s?acl' % (bucket, urllib.quote_plus(key)), headers)) def put_bucket_acl(self, bucket, acl_xml_document, headers={}): return self.put_acl(bucket, '', acl_xml_document, headers) def put_acl(self, bucket, key, acl_xml_document, headers={}): return Response( self.make_request( 'PUT', '%s/%s?acl' % (bucket, urllib.quote_plus(key)), headers, acl_xml_document)) def list_all_my_buckets(self, headers={}): return ListAllMyBucketsResponse(self.make_request('GET', '', headers)) def make_request(self, method, path, headers={}, data='', metadata={}): final_headers = merge_meta(headers, metadata); # add auth header self.add_aws_auth_header(final_headers, method, path) self.connection.request(method, "/%s" % path, data, final_headers) return self.connection.getresponse() def add_aws_auth_header(self, headers, method, path): if not headers.has_key('Date'): headers['Date'] = time.strftime("%a, %d %b %Y %X GMT", time.gmtime()) c_string = canonical_string(method, path, headers) headers['Authorization'] = \ "AWS %s:%s" % (self.aws_access_key_id, encode(self.aws_secret_access_key, c_string)) class QueryStringAuthGenerator: # by default, expire in 1 minute DEFAULT_EXPIRES_IN = 60 def __init__(self, aws_access_key_id, aws_secret_access_key, is_secure=True, server=DEFAULT_HOST, port=None): if not port: port = PORTS_BY_SECURITY[is_secure] self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.server_name = "%s:%d" % (server, port) self.__expires_in = QueryStringAuthGenerator.DEFAULT_EXPIRES_IN self.__expires = None def set_expires_in(self, expires_in): self.__expires_in = expires_in self.__expires = None def set_expires(self, expires): self.__expires = expires self.__expires_in = None def create_bucket(self, bucket, headers={}): return self.generate_url('PUT', bucket, headers) def list_bucket(self, bucket, options={}, headers={}): path = bucket if options: path += '?' + '&'.join(["%s=%s" % (param, urllib.quote_plus(options[param])) for param in options]) return self.generate_url('GET', path, headers) def delete_bucket(self, bucket, headers={}): return self.generate_url('DELETE', bucket, headers) def put(self, bucket, key, object, headers={}): if not isinstance(object, S3Object): object = S3Object(object) return self.generate_url( 'PUT', '%s/%s' % (bucket, urllib.quote_plus(key)), merge_meta(headers, object.metadata)) def get(self, bucket, key, headers={}): return self.generate_url('GET', '%s/%s' % (bucket, urllib.quote_plus(key)), headers) def delete(self, bucket, key, headers={}): return self.generate_url('DELETE', '%s/%s' % (bucket, urllib.quote_plus(key)), headers) def get_bucket_acl(self, bucket, headers={}): return self.get_acl(bucket, '', headers) def get_acl(self, bucket, key='', headers={}): return self.generate_url('GET', '%s/%s?acl' % (bucket, urllib.quote_plus(key)), headers) def put_bucket_acl(self, bucket, acl_xml_document, headers={}): return self.put_acl(bucket, '', acl_xml_document, headers) # don't really care what the doc is here. def put_acl(self, bucket, key, acl_xml_document, headers={}): return self.generate_url('PUT', '%s/%s?acl' % (bucket, urllib.quote_plus(key)), headers) def list_all_my_buckets(self, headers={}): return self.generate_url('GET', '', headers) def make_bare_url(self, bucket, key=''): return self.protocol + '://' + self.server_name + '/' + bucket + '/' + key def generate_url(self, method, path, headers): expires = 0 if self.__expires_in != None: expires = int(time.time() + self.__expires_in) elif self.__expires != None: expires = int(self.__expires) else: raise "Invalid expires state" canonical_str = canonical_string(method, path, headers, expires) encoded_canonical = encode(self.aws_secret_access_key, canonical_str, True) if '?' in path: arg_div = '&' else: arg_div = '?' query_part = "Signature=%s&Expires=%d&AWSAccessKeyId=%s" % (encoded_canonical, expires, self.aws_access_key_id) return self.protocol + '://' + self.server_name + '/' + path + arg_div + query_part class S3Object: def __init__(self, data, metadata={}): self.data = data self.metadata = metadata class Owner: def __init__(self, id='', display_name=''): self.id = id self.display_name = display_name class ListEntry: def __init__(self, key='', last_modified=None, etag='', size=0, storage_class='', owner=None): self.key = key self.last_modified = last_modified self.etag = etag self.size = size self.storage_class = storage_class self.owner = owner class Bucket: def __init__(self, name='', creation_date=''): self.name = name self.creation_date = creation_date class Response: def __init__(self, http_response): self.http_response = http_response # you have to do this read, even if you don't expect a body. # otherwise, the next request fails. self.body = http_response.read() class ListBucketResponse(Response): def __init__(self, http_response): Response.__init__(self, http_response) if http_response.status < 300: handler = ListBucketHandler() xml.sax.parseString(self.body, handler) self.entries = handler.entries else: self.entries = [] class ListAllMyBucketsResponse(Response): def __init__(self, http_response): Response.__init__(self, http_response) if http_response.status < 300: handler = ListAllMyBucketsHandler() xml.sax.parseString(self.body, handler) self.entries = handler.entries else: self.entries = [] class GetResponse(Response): def __init__(self, http_response): Response.__init__(self, http_response) response_headers = http_response.msg # older pythons don't have getheaders metadata = self.get_aws_metadata(response_headers) self.object = S3Object(self.body, metadata) def get_aws_metadata(self, headers): metadata = {} for hkey in headers.keys(): if hkey.lower().startswith(METADATA_PREFIX): metadata[hkey[len(METADATA_PREFIX):]] = headers[hkey] del headers[hkey] return metadata class ListBucketHandler(xml.sax.ContentHandler): def __init__(self): self.entries = [] self.curr_entry = None self.curr_text = '' def startElement(self, name, attrs): if name == 'Contents': self.curr_entry = ListEntry() elif name == 'Owner': self.curr_entry.owner = Owner() def endElement(self, name): if name == 'Contents': self.entries.append(self.curr_entry) elif name == 'Key': self.curr_entry.key = self.curr_text elif name == 'LastModified': self.curr_entry.last_modified = self.curr_text elif name == 'ETag': self.curr_entry.etag = self.curr_text elif name == 'Size': self.curr_entry.size = int(self.curr_text) elif name == 'ID': self.curr_entry.owner.id = self.curr_text elif name == 'DisplayName': self.curr_entry.owner.display_name = self.curr_text elif name == 'StorageClass': self.curr_entry.storage_class = self.curr_text self.curr_text = '' def characters(self, content): self.curr_text += content class ListAllMyBucketsHandler(xml.sax.ContentHandler): def __init__(self): self.entries = [] self.curr_entry = None self.curr_text = '' def startElement(self, name, attrs): if name == 'Bucket': self.curr_entry = Bucket() def endElement(self, name): if name == 'Name': self.curr_entry.name = self.curr_text elif name == 'CreationDate': self.curr_entry.creation_date = self.curr_text elif name == 'Bucket': self.entries.append(self.curr_entry) def characters(self, content): self.curr_text = content