Skip to content

Commit ccc5e07

Browse files
committed
Revert "Always use raw response data. (#87)"
This reverts commit 2b9ffc8.
1 parent 073360d commit ccc5e07

File tree

8 files changed

+248
-178
lines changed

8 files changed

+248
-178
lines changed

google/resumable_media/_download.py

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -349,41 +349,20 @@ def _process_response(self, response):
349349
return
350350

351351
_helpers.require_status_code(
352-
response,
353-
_ACCEPTABLE_STATUS_CODES,
354-
self._get_status_code,
355-
callback=self._make_invalid,
356-
)
357-
headers = self._get_headers(response)
352+
response, _ACCEPTABLE_STATUS_CODES,
353+
self._get_status_code, callback=self._make_invalid)
354+
content_length = _helpers.header_required(
355+
response, u'content-length', self._get_headers,
356+
callback=self._make_invalid)
357+
num_bytes = int(content_length)
358+
_, end_byte, total_bytes = get_range_info(
359+
response, self._get_headers, callback=self._make_invalid)
358360
response_body = self._get_body(response)
359-
360-
start_byte, end_byte, total_bytes = get_range_info(
361-
response, self._get_headers, callback=self._make_invalid
362-
)
363-
364-
transfer_encoding = headers.get(u"transfer-encoding")
365-
366-
if transfer_encoding is None:
367-
content_length = _helpers.header_required(
368-
response,
369-
u"content-length",
370-
self._get_headers,
371-
callback=self._make_invalid,
372-
)
373-
num_bytes = int(content_length)
374-
if len(response_body) != num_bytes:
375-
self._make_invalid()
376-
raise common.InvalidResponse(
377-
response,
378-
u"Response is different size than content-length",
379-
u"Expected",
380-
num_bytes,
381-
u"Received",
382-
len(response_body),
383-
)
384-
else:
385-
# 'content-length' header not allowed with chunked encoding.
386-
num_bytes = end_byte - start_byte + 1
361+
if len(response_body) != num_bytes:
362+
self._make_invalid()
363+
raise common.InvalidResponse(
364+
response, u'Response is different size than content-length',
365+
u'Expected', num_bytes, u'Received', len(response_body))
387366

388367
# First update ``bytes_downloaded``.
389368
self._bytes_downloaded += num_bytes

google/resumable_media/requests/_helpers.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626

2727
_DEFAULT_RETRY_STRATEGY = common.RetryStrategy()
28-
_SINGLE_GET_CHUNK_SIZE = 8192
2928
# The number of seconds to wait to establish a connection
3029
# (connect() call on socket). Avoid setting this to a multiple of 3 to not
3130
# Align with TCP Retransmission timing. (typically 2.5-3s)
@@ -76,12 +75,7 @@ def _get_body(response):
7675
Returns:
7776
bytes: The body of the ``response``.
7877
"""
79-
if response._content is False:
80-
response._content = b"".join(
81-
response.raw.stream(_SINGLE_GET_CHUNK_SIZE, decode_content=False)
82-
)
83-
response._content_consumed = True
84-
return response._content
78+
return response.content
8579

8680

8781
def http_request(

google/resumable_media/requests/download.py

Lines changed: 78 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@
1818
import hashlib
1919
import logging
2020

21+
import urllib3.response
22+
2123
from google.resumable_media import _download
2224
from google.resumable_media import common
2325
from google.resumable_media.requests import _helpers
2426

2527

2628
_LOGGER = logging.getLogger(__name__)
27-
_HASH_HEADER = u"x-goog-hash"
29+
_SINGLE_GET_CHUNK_SIZE = 8192
30+
_HASH_HEADER = u'x-goog-hash'
2831
_MISSING_MD5 = u"""\
2932
No MD5 checksum was returned from the service while downloading {}
3033
(which happens for composite objects), so client-side content integrity
@@ -113,13 +116,12 @@ def _write_to_stream(self, response):
113116
with response:
114117
# NOTE: This might "donate" ``md5_hash`` to the decoder and replace
115118
# it with a ``_DoNothingHash``.
116-
body_iter = response.raw.stream(
117-
_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
118-
)
119+
local_hash = _add_decoder(response.raw, md5_hash)
120+
body_iter = response.iter_content(
121+
chunk_size=_SINGLE_GET_CHUNK_SIZE, decode_unicode=False)
119122
for chunk in body_iter:
120123
self._stream.write(chunk)
121-
md5_hash.update(chunk)
122-
response._content_consumed = True
124+
local_hash.update(chunk)
123125

124126
if expected_md5_hash is None:
125127
return
@@ -155,15 +157,16 @@ def consume(self, transport):
155157
"""
156158
method, url, payload, headers = self._prepare_request()
157159
# NOTE: We assume "payload is None" but pass it along anyway.
158-
response = _helpers.http_request(
159-
transport,
160-
method,
161-
url,
162-
data=payload,
163-
headers=headers,
164-
retry_strategy=self._retry_strategy,
165-
stream=True,
166-
)
160+
request_kwargs = {
161+
u'data': payload,
162+
u'headers': headers,
163+
u'retry_strategy': self._retry_strategy,
164+
}
165+
if self._stream is not None:
166+
request_kwargs[u'stream'] = True
167+
168+
result = _helpers.http_request(
169+
transport, method, url, **request_kwargs)
167170

168171
self._process_response(response)
169172

@@ -216,17 +219,11 @@ def consume_next_chunk(self, transport):
216219
"""
217220
method, url, payload, headers = self._prepare_request()
218221
# NOTE: We assume "payload is None" but pass it along anyway.
219-
response = _helpers.http_request(
220-
transport,
221-
method,
222-
url,
223-
data=payload,
224-
headers=headers,
225-
retry_strategy=self._retry_strategy,
226-
stream=True,
227-
)
228-
self._process_response(response)
229-
return response
222+
result = _helpers.http_request(
223+
transport, method, url, data=payload, headers=headers,
224+
retry_strategy=self._retry_strategy)
225+
self._process_response(result)
226+
return result
230227

231228

232229
def _parse_md5_header(header_value, response):
@@ -294,3 +291,58 @@ def update(self, unused_chunk):
294291
Args:
295292
unused_chunk (bytes): A chunk of data.
296293
"""
294+
295+
296+
def _add_decoder(response_raw, md5_hash):
297+
"""Patch the ``_decoder`` on a ``urllib3`` response.
298+
299+
This is so that we can intercept the compressed bytes before they are
300+
decoded.
301+
302+
Only patches if the content encoding is ``gzip``.
303+
304+
Args:
305+
response_raw (urllib3.response.HTTPResponse): The raw response for
306+
an HTTP request.
307+
md5_hash (Union[_DoNothingHash, hashlib.md5]): A hash function which
308+
will get updated when it encounters compressed bytes.
309+
310+
Returns:
311+
Union[_DoNothingHash, hashlib.md5]: Either the original ``md5_hash``
312+
if ``_decoder`` is not patched. Otherwise, returns a ``_DoNothingHash``
313+
since the caller will no longer need to hash to decoded bytes.
314+
"""
315+
encoding = response_raw.headers.get(u'content-encoding', u'').lower()
316+
if encoding != u'gzip':
317+
return md5_hash
318+
319+
response_raw._decoder = _GzipDecoder(md5_hash)
320+
return _DoNothingHash()
321+
322+
323+
class _GzipDecoder(urllib3.response.GzipDecoder):
324+
"""Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.
325+
326+
Allows an MD5 hash function to see the compressed bytes before they are
327+
decoded. This way the hash of the compressed value can be computed.
328+
329+
Args:
330+
md5_hash (Union[_DoNothingHash, hashlib.md5]): A hash function which
331+
will get updated when it encounters compressed bytes.
332+
"""
333+
334+
def __init__(self, md5_hash):
335+
super(_GzipDecoder, self).__init__()
336+
self._md5_hash = md5_hash
337+
338+
def decompress(self, data):
339+
"""Decompress the bytes.
340+
341+
Args:
342+
data (bytes): The compressed bytes to be decompressed.
343+
344+
Returns:
345+
bytes: The decompressed bytes from ``data``.
346+
"""
347+
self._md5_hash.update(data)
348+
return super(_GzipDecoder, self).decompress(data)

noxfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
GOOGLE_AUTH = 'google-auth >= 0.10.0'
2525

2626

27-
@nox.session(python=['2.7', '3.4', '3.5', '3.6', '3.7'])
27+
@nox.session(python=['2,7', '3.4', '3.5', '3.6', '3.7'])
2828
def unit_tests(session):
2929
"""Run the unit test suite."""
3030

tests/system/requests/test_download.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@
2525
from six.moves import http_client
2626

2727
from google import resumable_media
28-
from google.resumable_media import requests as resumable_requests
29-
from google.resumable_media.requests import download as download_mod
30-
from google.resumable_media.requests import _helpers
28+
import google.resumable_media.requests as resumable_requests
29+
import google.resumable_media.requests.download as download_mod
3130
from tests.system import utils
3231

3332

@@ -57,6 +56,7 @@
5756
slice(-256, None, None), # obj[-256:]
5857
slice(262144, None, None), # obj[262144:]
5958
),
59+
<<<<<<< HEAD
6060
},
6161
{
6262
u"path": os.path.realpath(os.path.join(DATA_DIR, u"file.txt")),
@@ -70,6 +70,23 @@
7070
u"checksum": u"KHRs/+ZSrc/FuuR4qz/PZQ==",
7171
u"slices": (),
7272
u"metadata": {u"contentEncoding": u"gzip"},
73+
=======
74+
}, {
75+
u'path': os.path.realpath(os.path.join(DATA_DIR, u'file.txt')),
76+
u'content_type': PLAIN_TEXT,
77+
u'checksum': u'KHRs/+ZSrc/FuuR4qz/PZQ==',
78+
u'slices': (),
79+
}, {
80+
u'path': os.path.realpath(os.path.join(DATA_DIR, u'gzipped.txt.gz')),
81+
u'uncompressed':
82+
os.path.realpath(os.path.join(DATA_DIR, u'gzipped.txt')),
83+
u'content_type': PLAIN_TEXT,
84+
u'checksum': u'KHRs/+ZSrc/FuuR4qz/PZQ==',
85+
u'slices': (),
86+
u'metadata': {
87+
u'contentEncoding': u'gzip',
88+
},
89+
>>>>>>> parent of 2b9ffc8... Always use raw response data. (#87)
7390
},
7491
)
7592
ENCRYPTED_ERR = b"The target object is encrypted by a customer-supplied encryption key."
@@ -126,13 +143,22 @@ def _get_contents_for_upload(info):
126143

127144

128145
def _get_contents(info):
146+
<<<<<<< HEAD
129147
full_path = info[u"path"]
130148
with open(full_path, u"rb") as file_obj:
149+
=======
150+
full_path = info.get(u'uncompressed', info[u'path'])
151+
with open(full_path, u'rb') as file_obj:
152+
>>>>>>> parent of 2b9ffc8... Always use raw response data. (#87)
131153
return file_obj.read()
132154

133155

134156
def _get_blob_name(info):
157+
<<<<<<< HEAD
135158
full_path = info[u"path"]
159+
=======
160+
full_path = info.get(u'uncompressed', info[u'path'])
161+
>>>>>>> parent of 2b9ffc8... Always use raw response data. (#87)
136162
return os.path.basename(full_path)
137163

138164

@@ -179,12 +205,15 @@ def check_tombstoned(download, transport):
179205
assert exc_info.match(u"Download has finished.")
180206

181207

208+
<<<<<<< HEAD
182209
def read_raw_content(response):
183210
return b"".join(
184211
response.raw.stream(_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False)
185212
)
186213

187214

215+
=======
216+
>>>>>>> parent of 2b9ffc8... Always use raw response data. (#87)
188217
def test_download_full(add_files, authorized_transport):
189218
for info in ALL_FILES:
190219
actual_contents = _get_contents(info)
@@ -196,7 +225,7 @@ def test_download_full(add_files, authorized_transport):
196225
# Consume the resource.
197226
response = download.consume(authorized_transport)
198227
assert response.status_code == http_client.OK
199-
assert read_raw_content(response) == actual_contents
228+
assert response.content == actual_contents
200229
check_tombstoned(download, authorized_transport)
201230

202231

@@ -221,6 +250,7 @@ def test_download_to_stream(add_files, authorized_transport):
221250
check_tombstoned(download, authorized_transport)
222251

223252

253+
@pytest.mark.xfail # See: #76
224254
def test_corrupt_download(add_files, corrupting_transport):
225255
for info in ALL_FILES:
226256
blob_name = _get_blob_name(info)
@@ -396,7 +426,8 @@ def consume_chunks(download, authorized_transport, total_bytes, actual_contents)
396426
return num_responses, response
397427

398428

399-
def test_chunked_download_full(add_files, authorized_transport):
429+
@pytest.mark.xfail # See issue #56
430+
def test_chunked_download(add_files, authorized_transport):
400431
for info in ALL_FILES:
401432
actual_contents = _get_contents(info)
402433
blob_name = _get_blob_name(info)

tests/unit/requests/test__helpers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def test__get_status_code(self):
2727
assert status_code == _helpers.RequestsMixin._get_status_code(response)
2828

2929
def test__get_headers(self):
30+
<<<<<<< HEAD
3031
headers = {u"fruit": u"apple"}
3132
response = mock.Mock(headers=headers, spec=["headers"])
3233
assert headers == _helpers.RequestsMixin._get_headers(response)
@@ -44,6 +45,15 @@ def test__get_body_wo_content_consumed(self):
4445
def test__get_body_w_content_consumed(self):
4546
body = b"This is the payload."
4647
response = mock.Mock(_content=body, spec=["_content"])
48+
=======
49+
headers = {u'fruit': u'apple'}
50+
response = mock.Mock(headers=headers, spec=[u'headers'])
51+
assert headers == _helpers.RequestsMixin._get_headers(response)
52+
53+
def test__get_body(self):
54+
body = b'This is the payload.'
55+
response = mock.Mock(content=body, spec=[u'content'])
56+
>>>>>>> parent of 2b9ffc8... Always use raw response data. (#87)
4757
assert body == _helpers.RequestsMixin._get_body(response)
4858

4959

0 commit comments

Comments
 (0)