-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Expand file tree
/
Copy pathbase.py
More file actions
2570 lines (2248 loc) · 102 KB
/
base.py
File metadata and controls
2570 lines (2248 loc) · 102 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (c) 2010-2016 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# NOTE: swift_conn
# You'll see swift_conn passed around a few places in this file. This is the
# source bufferedhttp connection of whatever it is attached to.
# It is used when early termination of reading from the connection should
# happen, such as when a range request is satisfied but there's still more the
# source connection would like to send. To prevent having to read all the data
# that could be left, the source connection can be .close() and then reads
# commence to empty out any buffers.
# These shenanigans are to ensure all related objects can be garbage
# collected. We've seen objects hang around forever otherwise.
from urllib.parse import quote
import time
import json
import functools
import inspect
import itertools
import operator
import random
from copy import deepcopy
from types import SimpleNamespace
from eventlet.timeout import Timeout
from swift.common.memcached import MemcacheConnectionError
from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request
from swift.common.utils import Timestamp, WatchdogTimeout, config_true_value, \
public, split_path, list_from_csv, GreenthreadSafeIterator, \
GreenAsyncPile, quorum_size, parse_content_type, drain_and_close, \
document_iters_to_http_response_body, cache_from_env, \
CooperativeIterator, NamespaceBoundList, Namespace, ClosingMapper
from swift.common.bufferedhttp import http_connect
from swift.common import constraints
from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \
ConnectionTimeout, RangeAlreadyComplete, ShortReadError
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.http import is_informational, is_success, is_redirection, \
is_server_error, HTTP_OK, HTTP_PARTIAL_CONTENT, HTTP_MULTIPLE_CHOICES, \
HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_UNAUTHORIZED, HTTP_CONTINUE, \
HTTP_GONE, HTTP_REQUESTED_RANGE_NOT_SATISFIABLE
from swift.common.swob import Request, Response, Range, \
HTTPException, HTTPRequestedRangeNotSatisfiable, HTTPServiceUnavailable, \
status_map, wsgi_to_str, str_to_wsgi, wsgi_quote, wsgi_unquote, \
normalize_etag
from swift.common.request_helpers import strip_sys_meta_prefix, \
strip_user_meta_prefix, is_user_meta, is_sys_meta, is_sys_or_user_meta, \
http_response_to_document_iters, is_object_transient_sysmeta, \
strip_object_transient_sysmeta_prefix, get_ip_port, get_user_meta_prefix, \
get_sys_meta_prefix, is_use_replication_network
from swift.common.storage_policy import POLICIES
DEFAULT_RECHECK_ACCOUNT_EXISTENCE = 60 # seconds
DEFAULT_RECHECK_CONTAINER_EXISTENCE = 60 # seconds
DEFAULT_RECHECK_UPDATING_SHARD_RANGES = 3600 # seconds
DEFAULT_RECHECK_LISTING_SHARD_RANGES = 600 # seconds
def update_headers(response, headers):
"""
Helper function to update headers in the response.
:param response: swob.Response object
:param headers: dictionary headers
"""
if hasattr(headers, 'items'):
headers = headers.items()
for name, value in headers:
if name.lower() == 'etag':
response.headers[name] = value.replace('"', '')
elif name.lower() not in (
'date', 'content-length', 'content-type',
'connection', 'x-put-timestamp', 'x-delete-after'):
response.headers[name] = value
def delay_denial(func):
"""
Decorator to declare which methods should have any swift.authorize call
delayed. This is so the method can load the Request object up with
additional information that may be needed by the authorization system.
:param func: function for which authorization will be delayed
"""
func.delay_denial = True
return func
def _prep_headers_to_info(headers, server_type):
"""
Helper method that iterates once over a dict of headers,
converting all keys to lower case and separating
into subsets containing user metadata, system metadata
and other headers.
"""
meta = {}
sysmeta = {}
other = {}
for key, val in dict(headers).items():
lkey = wsgi_to_str(key).lower()
val = wsgi_to_str(val) if isinstance(val, str) else val
if is_user_meta(server_type, lkey):
meta[strip_user_meta_prefix(server_type, lkey)] = val
elif is_sys_meta(server_type, lkey):
sysmeta[strip_sys_meta_prefix(server_type, lkey)] = val
else:
other[lkey] = val
return other, meta, sysmeta
def headers_to_account_info(headers, status_int=HTTP_OK):
"""
Construct a cacheable dict of account info based on response headers.
"""
headers, meta, sysmeta = _prep_headers_to_info(headers, 'account')
account_info = {
'status': status_int,
# 'container_count' anomaly:
# Previous code sometimes expects an int sometimes a string
# Current code aligns to str and None, yet translates to int in
# deprecated functions as needed
'container_count': headers.get('x-account-container-count'),
'total_object_count': headers.get('x-account-object-count'),
'bytes': headers.get('x-account-bytes-used'),
'storage_policies': {policy.idx: {
'container_count': int(headers.get(
'x-account-storage-policy-{}-container-count'.format(
policy.name), 0)),
'object_count': int(headers.get(
'x-account-storage-policy-{}-object-count'.format(
policy.name), 0)),
'bytes': int(headers.get(
'x-account-storage-policy-{}-bytes-used'.format(
policy.name), 0))}
for policy in POLICIES
},
'meta': meta,
'sysmeta': sysmeta,
}
if is_success(status_int):
account_info['account_really_exists'] = not config_true_value(
headers.get('x-backend-fake-account-listing'))
return account_info
def headers_to_container_info(headers, status_int=HTTP_OK):
"""
Construct a cacheable dict of container info based on response headers.
"""
headers, meta, sysmeta = _prep_headers_to_info(headers, 'container')
return {
'status': status_int,
'read_acl': headers.get('x-container-read'),
'write_acl': headers.get('x-container-write'),
'sync_to': headers.get('x-container-sync-to'),
'sync_key': headers.get('x-container-sync-key'),
'object_count': headers.get('x-container-object-count'),
'bytes': headers.get('x-container-bytes-used'),
'versions': headers.get('x-versions-location'),
'storage_policy': headers.get('x-backend-storage-policy-index', '0'),
'cors': {
'allow_origin': meta.get('access-control-allow-origin'),
'expose_headers': meta.get('access-control-expose-headers'),
'max_age': meta.get('access-control-max-age')
},
'meta': meta,
'sysmeta': sysmeta,
'sharding_state': headers.get('x-backend-sharding-state', 'unsharded'),
# the 'internal' format version of timestamps is cached since the
# normal format can be derived from this when required
'created_at': headers.get('x-backend-timestamp'),
'put_timestamp': headers.get('x-backend-put-timestamp'),
'delete_timestamp': headers.get('x-backend-delete-timestamp'),
'status_changed_at': headers.get('x-backend-status-changed-at'),
}
def headers_from_container_info(info):
"""
Construct a HeaderKeyDict from a container info dict.
:param info: a dict of container metadata
:returns: a HeaderKeyDict or None if info is None or any required headers
could not be constructed
"""
if not info:
return None
required = (
('x-backend-timestamp', 'created_at'),
('x-backend-put-timestamp', 'put_timestamp'),
('x-backend-delete-timestamp', 'delete_timestamp'),
('x-backend-status-changed-at', 'status_changed_at'),
('x-backend-storage-policy-index', 'storage_policy'),
('x-container-object-count', 'object_count'),
('x-container-bytes-used', 'bytes'),
('x-backend-sharding-state', 'sharding_state'),
)
required_normal_format_timestamps = (
('x-timestamp', 'created_at'),
('x-put-timestamp', 'put_timestamp'),
)
optional = (
('x-container-read', 'read_acl'),
('x-container-write', 'write_acl'),
('x-container-sync-key', 'sync_key'),
('x-container-sync-to', 'sync_to'),
('x-versions-location', 'versions'),
)
cors_optional = (
('access-control-allow-origin', 'allow_origin'),
('access-control-expose-headers', 'expose_headers'),
('access-control-max-age', 'max_age')
)
def lookup(info, key):
# raises KeyError or ValueError
val = info[key]
if val is None:
raise ValueError
return val
# note: required headers may be missing from info for example during
# upgrade when stale info is still in cache
headers = HeaderKeyDict()
for hdr, key in required:
try:
headers[hdr] = lookup(info, key)
except (KeyError, ValueError):
return None
for hdr, key in required_normal_format_timestamps:
try:
headers[hdr] = Timestamp(lookup(info, key)).normal
except (KeyError, ValueError):
return None
for hdr, key in optional:
try:
headers[hdr] = lookup(info, key)
except (KeyError, ValueError):
pass
policy_index = info.get('storage_policy')
headers['x-storage-policy'] = POLICIES[int(policy_index)].name
prefix = get_user_meta_prefix('container')
headers.update(
(prefix + k, v)
for k, v in info.get('meta', {}).items())
for hdr, key in cors_optional:
try:
headers[prefix + hdr] = lookup(info.get('cors'), key)
except (KeyError, ValueError):
pass
prefix = get_sys_meta_prefix('container')
headers.update(
(prefix + k, v)
for k, v in info.get('sysmeta', {}).items())
return headers
def headers_to_object_info(headers, status_int=HTTP_OK):
"""
Construct a cacheable dict of object info based on response headers.
"""
headers, meta, sysmeta = _prep_headers_to_info(headers, 'object')
transient_sysmeta = {}
for key, val in headers.items():
if is_object_transient_sysmeta(key):
key = strip_object_transient_sysmeta_prefix(key.lower())
transient_sysmeta[key] = val
info = {'status': status_int,
'length': headers.get('content-length'),
'type': headers.get('content-type'),
'etag': headers.get('etag'),
'meta': meta,
'sysmeta': sysmeta,
'transient_sysmeta': transient_sysmeta
}
return info
def cors_validation(func):
"""
Decorator to check if the request is a CORS request and if so, if it's
valid.
:param func: function to check
"""
@functools.wraps(func)
def wrapped(*a, **kw):
controller = a[0]
req = a[1]
# The logic here was interpreted from
# http://www.w3.org/TR/cors/#resource-requests
# Is this a CORS request?
req_origin = req.headers.get('Origin', None)
if req_origin:
# Yes, this is a CORS request so test if the origin is allowed
container_info = \
controller.container_info(controller.account_name,
controller.container_name, req)
cors_info = container_info.get('cors', {})
# Call through to the decorated method
resp = func(*a, **kw)
if controller.app.strict_cors_mode and \
not controller.is_origin_allowed(cors_info, req_origin):
return resp
# Expose,
# - simple response headers,
# http://www.w3.org/TR/cors/#simple-response-header
# - swift specific: etag, x-timestamp, x-trans-id
# - headers provided by the operator in cors_expose_headers
# - user metadata headers
# - headers provided by the user in
# x-container-meta-access-control-expose-headers
if 'Access-Control-Expose-Headers' not in resp.headers:
expose_headers = set([
'cache-control', 'content-language', 'content-type',
'expires', 'last-modified', 'pragma', 'etag',
'x-timestamp', 'x-trans-id', 'x-openstack-request-id'])
expose_headers.update(controller.app.cors_expose_headers)
for header in resp.headers:
if header.startswith('X-Container-Meta') or \
header.startswith('X-Object-Meta'):
expose_headers.add(header.lower())
if cors_info.get('expose_headers'):
expose_headers = expose_headers.union(
[header_line.strip().lower()
for header_line in
cors_info['expose_headers'].split(' ')
if header_line.strip()])
resp.headers['Access-Control-Expose-Headers'] = \
', '.join(expose_headers)
# The user agent won't process the response if the Allow-Origin
# header isn't included
if 'Access-Control-Allow-Origin' not in resp.headers:
if cors_info['allow_origin'] and \
cors_info['allow_origin'].strip() == '*':
resp.headers['Access-Control-Allow-Origin'] = '*'
else:
resp.headers['Access-Control-Allow-Origin'] = req_origin
if 'Vary' in resp.headers:
resp.headers['Vary'] += ', Origin'
else:
resp.headers['Vary'] = 'Origin'
return resp
else:
# Not a CORS request so make the call as normal
return func(*a, **kw)
return wrapped
def get_object_info(env, app, path=None, swift_source=None):
"""
Get the info structure for an object, based on env and app.
This is useful to middlewares.
.. note::
This call bypasses auth. Success does not imply that the request has
authorization to the object.
"""
(version, account, container, obj) = \
split_path(path or env['PATH_INFO'], 4, 4, True)
info = _get_object_info(app, env, account, container, obj,
swift_source=swift_source)
if info:
info = deepcopy(info)
else:
info = headers_to_object_info({}, 0)
for field in ('length',):
if info.get(field) is None:
info[field] = 0
else:
info[field] = int(info[field])
return info
def _record_ac_info_cache_metrics(
app, cache_state, container=None, resp=None):
"""
Record a single cache operation by account or container lookup into its
corresponding metrics.
:param app: the application object
:param cache_state: the state of this cache operation, includes
infocache_hit, memcache hit, miss, error, skip, force_skip
and disabled.
:param container: the container name
:param resp: the response from either backend or cache hit.
"""
try:
proxy_app = app._pipeline_final_app
except AttributeError:
logger = None
else:
logger = proxy_app.logger
server_type = 'container' if container else 'account'
if logger:
record_cache_op_metrics(logger, server_type, 'info', cache_state, resp)
def get_container_info(env, app, swift_source=None, cache_only=False):
"""
Get the info structure for a container, based on env and app.
This is useful to middlewares.
:param env: the environment used by the current request
:param app: the application object
:param swift_source: Used to mark the request as originating out of
middleware. Will be logged in proxy logs.
:param cache_only: If true, indicates that caller doesn't want to HEAD the
backend container when cache miss.
:returns: the object info
.. note::
This call bypasses auth. Success does not imply that the request has
authorization to the container.
"""
(version, wsgi_account, wsgi_container, unused) = \
split_path(env['PATH_INFO'], 3, 4, True)
if not constraints.valid_api_version(version):
# Not a valid Swift request; return 0 like we do
# if there's an account failure
return headers_to_container_info({}, 0)
account = wsgi_to_str(wsgi_account)
container = wsgi_to_str(wsgi_container)
# Try to cut through all the layers to the proxy app
# (while also preserving logging)
try:
logged_app = app._pipeline_request_logging_app
proxy_app = app._pipeline_final_app
except AttributeError:
logged_app = proxy_app = app
# Check in environment cache and in memcache (in that order)
info, cache_state = _get_info_from_caches(
proxy_app, env, account, container)
resp = None
if not info and not cache_only:
# Cache miss; go HEAD the container and populate the caches
env.setdefault('swift.infocache', {})
# Before checking the container, make sure the account exists.
#
# If it is an autocreateable account, just assume it exists; don't
# HEAD the account, as a GET or HEAD response for an autocreateable
# account is successful whether the account actually has .db files
# on disk or not.
is_autocreate_account = account.startswith(
constraints.AUTO_CREATE_ACCOUNT_PREFIX)
if not is_autocreate_account:
account_info = get_account_info(env, logged_app, swift_source)
if not account_info or not is_success(account_info['status']):
_record_ac_info_cache_metrics(
logged_app, cache_state, container)
return headers_to_container_info({}, 0)
req = _prepare_pre_auth_info_request(
env, ("/%s/%s/%s" % (version, wsgi_account, wsgi_container)),
(swift_source or 'GET_CONTAINER_INFO'))
# *Always* allow reserved names for get-info requests -- it's on the
# caller to keep the result private-ish
req.headers['X-Backend-Allow-Reserved-Names'] = 'true'
resp = req.get_response(logged_app)
drain_and_close(resp)
# Check in infocache to see if the proxy (or anyone else) already
# populated the cache for us. If they did, just use what's there.
#
# See similar comment in get_account_info() for justification.
info = _get_info_from_infocache(env, account, container)
if info is None:
info = set_info_cache(env, account, container, resp)
if info:
info = deepcopy(info) # avoid mutating what's in swift.infocache
else:
status_int = 0 if cache_only else 503
info = headers_to_container_info({}, status_int)
# Old data format in memcache immediately after a Swift upgrade; clean
# it up so consumers of get_container_info() aren't exposed to it.
if 'object_count' not in info and 'container_size' in info:
info['object_count'] = info.pop('container_size')
for field in ('storage_policy', 'bytes', 'object_count'):
if info.get(field) is None:
info[field] = 0
else:
info[field] = int(info[field])
if info.get('sharding_state') is None:
info['sharding_state'] = 'unsharded'
versions_cont = info.get('sysmeta', {}).get('versions-container', '')
if versions_cont:
versions_cont = wsgi_unquote(str_to_wsgi(
versions_cont)).split('/')[0]
versions_req = _prepare_pre_auth_info_request(
env, ("/%s/%s/%s" % (version, wsgi_account, versions_cont)),
(swift_source or 'GET_CONTAINER_INFO'))
versions_req.headers['X-Backend-Allow-Reserved-Names'] = 'true'
versions_info = get_container_info(versions_req.environ, app)
info['bytes'] = info['bytes'] + versions_info['bytes']
_record_ac_info_cache_metrics(logged_app, cache_state, container, resp)
return info
def get_account_info(env, app, swift_source=None):
"""
Get the info structure for an account, based on env and app.
This is useful to middlewares.
.. note::
This call bypasses auth. Success does not imply that the request has
authorization to the account.
:raises ValueError: when path doesn't contain an account
"""
(version, wsgi_account, _junk) = split_path(env['PATH_INFO'], 2, 3, True)
if not constraints.valid_api_version(version):
return headers_to_account_info({}, 0)
account = wsgi_to_str(wsgi_account)
# Try to cut through all the layers to the proxy app
# (while also preserving logging)
try:
app = app._pipeline_request_logging_app
except AttributeError:
pass
# Check in environment cache and in memcache (in that order)
info, cache_state = _get_info_from_caches(app, env, account)
# Cache miss; go HEAD the account and populate the caches
if info:
resp = None
else:
env.setdefault('swift.infocache', {})
req = _prepare_pre_auth_info_request(
env, "/%s/%s" % (version, wsgi_account),
(swift_source or 'GET_ACCOUNT_INFO'))
# *Always* allow reserved names for get-info requests -- it's on the
# caller to keep the result private-ish
req.headers['X-Backend-Allow-Reserved-Names'] = 'true'
resp = req.get_response(app)
drain_and_close(resp)
# Check in infocache to see if the proxy (or anyone else) already
# populated the cache for us. If they did, just use what's there.
#
# The point of this is to avoid setting the value in memcached
# twice. Otherwise, we're needlessly sending requests across the
# network.
#
# If the info didn't make it into the cache, we'll compute it from
# the response and populate the cache ourselves.
#
# Note that this is taking "exists in infocache" to imply "exists in
# memcache". That's because we're trying to avoid superfluous
# network traffic, and checking in memcache prior to setting in
# memcache would defeat the purpose.
info = _get_info_from_infocache(env, account)
if info is None:
info = set_info_cache(env, account, None, resp)
if info:
info = info.copy() # avoid mutating what's in swift.infocache
else:
info = headers_to_account_info({}, 503)
for field in ('container_count', 'bytes', 'total_object_count'):
if info.get(field) is None:
info[field] = 0
else:
info[field] = int(info[field])
_record_ac_info_cache_metrics(app, cache_state, container=None, resp=resp)
return info
def get_cache_key(account, container=None, obj=None, shard=None):
"""
Get the keys for both memcache and env['swift.infocache'] (cache_key)
where info about accounts, containers, and objects is cached
:param account: The name of the account
:param container: The name of the container (or None if account)
:param obj: The name of the object (or None if account or container)
:param shard: Sharding state for the container query; typically 'updating'
or 'listing' (Requires account and container; cannot use
with obj)
:returns: a (native) string cache_key
"""
def to_native(s):
if s is None or isinstance(s, str):
return s
return s.decode('utf8', 'surrogateescape')
account = to_native(account)
container = to_native(container)
obj = to_native(obj)
if shard:
if not (account and container):
raise ValueError('Shard cache key requires account and container')
if obj:
raise ValueError('Shard cache key cannot have obj')
cache_key = 'shard-%s-v2/%s/%s' % (shard, account, container)
elif obj:
if not (account and container):
raise ValueError('Object cache key requires account and container')
cache_key = 'object/%s/%s/%s' % (account, container, obj)
elif container:
if not account:
raise ValueError('Container cache key requires account')
cache_key = 'container/%s/%s' % (account, container)
else:
cache_key = 'account/%s' % account
# Use a unique environment cache key per account and one container.
# This allows caching both account and container and ensures that when we
# copy this env to form a new request, it won't accidentally reuse the
# old container or account info
return cache_key
def set_info_cache(env, account, container, resp):
"""
Cache info in both memcache and env.
:param env: the WSGI request environment
:param account: the unquoted account name
:param container: the unquoted container name or None
:param resp: the response received or None if info cache should be cleared
:returns: the info that was placed into the cache, or None if the
request status was not in (404, 410, 2xx).
"""
cache_key = get_cache_key(account, container)
infocache = env.setdefault('swift.infocache', {})
memcache = cache_from_env(env, True)
if resp is None:
clear_info_cache(env, account, container)
return
if container:
cache_time = int(resp.headers.get(
'X-Backend-Recheck-Container-Existence',
DEFAULT_RECHECK_CONTAINER_EXISTENCE))
else:
cache_time = int(resp.headers.get(
'X-Backend-Recheck-Account-Existence',
DEFAULT_RECHECK_ACCOUNT_EXISTENCE))
if resp.status_int in (HTTP_NOT_FOUND, HTTP_GONE):
cache_time *= 0.1
elif not is_success(resp.status_int):
# If we got a response, it was unsuccessful, and it wasn't an
# "authoritative" failure, bail without touching caches.
return
if container:
info = headers_to_container_info(resp.headers, resp.status_int)
else:
info = headers_to_account_info(resp.headers, resp.status_int)
if memcache:
memcache.set(cache_key, info, time=cache_time)
infocache[cache_key] = info
return info
def set_object_info_cache(app, env, account, container, obj, resp):
"""
Cache object info in the WSGI environment, but not in memcache. Caching
in memcache would lead to cache pressure and mass evictions due to the
large number of objects in a typical Swift cluster. This is a
per-request cache only.
:param app: the application object
:param env: the environment used by the current request
:param account: the unquoted account name
:param container: the unquoted container name
:param obj: the unquoted object name
:param resp: a GET or HEAD response received from an object server, or
None if info cache should be cleared
:returns: the object info
"""
cache_key = get_cache_key(account, container, obj)
if 'swift.infocache' in env and not resp:
env['swift.infocache'].pop(cache_key, None)
return
info = headers_to_object_info(resp.headers, resp.status_int)
env.setdefault('swift.infocache', {})[cache_key] = info
return info
def clear_info_cache(env, account, container=None, shard=None):
"""
Clear the cached info in both memcache and env
:param env: the WSGI request environment
:param account: the account name
:param container: the container name if clearing info for containers, or
None
:param shard: the sharding state if clearing info for container shard
ranges, or None
"""
cache_key = get_cache_key(account, container, shard=shard)
infocache = env.setdefault('swift.infocache', {})
memcache = cache_from_env(env, True)
infocache.pop(cache_key, None)
if memcache:
memcache.delete(cache_key)
def _get_info_from_infocache(env, account, container=None):
"""
Get cached account or container information from request-environment
cache (swift.infocache).
:param env: the environment used by the current request
:param account: the account name
:param container: the container name
:returns: a dictionary of cached info on cache hit, None on miss
"""
cache_key = get_cache_key(account, container)
if 'swift.infocache' in env and cache_key in env['swift.infocache']:
return env['swift.infocache'][cache_key]
return None
def record_cache_op_metrics(
logger, server_type, op_type, cache_state, resp=None):
"""
Record a single cache operation into its corresponding metrics.
:param logger: the metrics logger
:param server_type: 'account' or 'container'
:param op_type: the name of the operation type, includes 'shard_listing',
'shard_updating', and etc.
:param cache_state: the state of this cache operation. When it's
'infocache_hit' or memcache 'hit', expect it succeeded and 'resp'
will be None; for all other cases like memcache 'miss' or 'skip'
which will make to backend, expect a valid 'resp'.
:param resp: the response from backend for all cases except cache hits.
"""
server_type = server_type.lower()
if cache_state == 'infocache_hit':
logger.increment('%s.%s.infocache.hit' % (server_type, op_type))
elif cache_state == 'hit':
# memcache hits.
logger.increment('%s.%s.cache.hit' % (server_type, op_type))
else:
# the cases of cache_state is memcache miss, error, skip, force_skip
# or disabled.
if resp:
logger.increment('%s.%s.cache.%s.%d' % (
server_type, op_type, cache_state, resp.status_int))
else:
# In some situation, we choose not to lookup backend after cache
# miss.
logger.increment('%s.%s.cache.%s' % (
server_type, op_type, cache_state))
def _get_info_from_memcache(app, env, account, container=None):
"""
Get cached account or container information from memcache
:param app: the application object
:param env: the environment used by the current request
:param account: the account name
:param container: the container name
:returns: a tuple of two values, the first is a dictionary of cached info
on cache hit, None on miss or if memcache is not in use; the second is
cache state.
"""
memcache = cache_from_env(env, True)
if not memcache:
return None, 'disabled'
try:
proxy_app = app._pipeline_final_app
except AttributeError:
# Only the middleware entry-points get a reference to the
# proxy-server app; if a middleware composes itself as multiple
# filters, we'll just have to choose a reasonable default
skip_chance = 0.0
else:
if container:
skip_chance = proxy_app.container_existence_skip_cache
else:
skip_chance = proxy_app.account_existence_skip_cache
cache_key = get_cache_key(account, container)
if skip_chance and random.random() < skip_chance:
info = None
cache_state = 'skip'
else:
info = memcache.get(cache_key)
cache_state = 'hit' if info else 'miss'
if info:
env.setdefault('swift.infocache', {})[cache_key] = info
return info, cache_state
def _get_info_from_caches(app, env, account, container=None):
"""
Get the cached info from env or memcache (if used) in that order.
Used for both account and container info.
:param app: the application object
:param env: the environment used by the current request
:returns: a tuple of (the cached info or None if not cached, cache state)
"""
info = _get_info_from_infocache(env, account, container)
if info:
cache_state = 'infocache_hit'
else:
info, cache_state = _get_info_from_memcache(
app, env, account, container)
return info, cache_state
def namespace_bounds_to_list(bounds):
"""
This function converts the namespaces bounds to ``NamespaceBoundList``.
:param bounds: a list of namespaces bounds(tuple of lower and name).
:returns: the object instance of ``NamespaceBoundList``; None if ``bounds``
is None or empty.
"""
ns_bound_list = None
if bounds:
ns_bound_list = NamespaceBoundList(bounds)
return ns_bound_list
def namespace_list_to_bounds(ns_bound_list):
"""
This function converts ``NamespaceBoundList`` to the namespaces bounds.
:param ns_bound_list: an object instance of ``NamespaceBoundList``.
:returns: a list of namespaces bounds(tuple of lower and name); None if
``ns_bound_list`` is None or empty.
"""
bounds = None
if ns_bound_list:
bounds = ns_bound_list.bounds
return bounds
def get_namespaces_from_cache(req, cache_key, skip_chance):
"""
Get cached namespaces from infocache or memcache.
:param req: a :class:`swift.common.swob.Request` object.
:param cache_key: the cache key for both infocache and memcache.
:param skip_chance: the probability of skipping the memcache look-up.
:return: a tuple of (value, cache state). Value is an instance of
:class:`swift.common.utils.NamespaceBoundList` if a non-empty list is
found in memcache. Otherwise value is ``None``, for example if memcache
look-up was skipped, or no value was found, or an empty list was found.
"""
# try get namespaces from infocache first
infocache = req.environ.setdefault('swift.infocache', {})
ns_bound_list = infocache.get(cache_key)
if ns_bound_list:
return ns_bound_list, 'infocache_hit'
# then try get them from memcache
memcache = cache_from_env(req.environ, True)
if skip_chance and random.random() < skip_chance:
return None, 'skip'
try:
bounds = memcache.get(cache_key, raise_on_error=True)
cache_state = 'hit' if bounds else 'miss'
except MemcacheConnectionError:
bounds = None
cache_state = 'error'
ns_bound_list = namespace_bounds_to_list(bounds)
infocache[cache_key] = ns_bound_list
return ns_bound_list, cache_state
def set_namespaces_in_cache(req, cache_key, ns_bound_list, time):
"""
Set a list of namespace bounds in infocache and memcache.
:param req: a :class:`swift.common.swob.Request` object.
:param cache_key: the cache key for both infocache and memcache.
:param ns_bound_list: a :class:`swift.common.utils.NamespaceBoundList`;
should NOT be None nor empty.
:param time: how long the namespaces should remain in memcache.
:return: the cache_state.
"""
if cache_key.startswith('shard-updating'):
raise ValueError('shard-updating cache should use '
'CooperativeNamespaceCachePopulator')
infocache = req.environ.setdefault('swift.infocache', {})
infocache[cache_key] = ns_bound_list
memcache = cache_from_env(req.environ, True)
if memcache:
bounds = namespace_list_to_bounds(ns_bound_list)
try:
memcache.set(cache_key, bounds, time=time, raise_on_error=True)
except MemcacheConnectionError:
cache_state = 'set_error'
else:
cache_state = 'set'
else:
# N.B. get_namespaces_from_cache is used for both types of namespace
# cache objects (updating and listing), and both code paths only call
# that helper if memcache is enabled. But this function is now only
# used to set cache *listing* namespace objects over in
# ContainerController, so if that code path learns to not call it when
# memcache is disabled this func could also drop cache_state=disabled
cache_state = 'disabled'
return cache_state
def _prepare_pre_auth_info_request(env, path, swift_source):
"""
Prepares a pre authed request to obtain info using a HEAD.
:param env: the environment used by the current request
:param path: The unquoted, WSGI-str request path
:param swift_source: value for swift.source in WSGI environment
:returns: the pre authed request
"""
# Set the env for the pre_authed call without a query string
newenv = make_pre_authed_env(env, 'HEAD', path, agent='Swift',
query_string='', swift_source=swift_source)
# This is a sub request for container metadata- drop the Origin header from
# the request so the it is not treated as a CORS request.
newenv.pop('HTTP_ORIGIN', None)
# ACLs are only shown to account owners, so let's make sure this request
# looks like it came from the account owner.
newenv['swift_owner'] = True
# Note that Request.blank expects quoted path
return Request.blank(wsgi_quote(path), environ=newenv)
def get_info(app, env, account, container=None, swift_source=None):
"""
Get info about accounts or containers
Note: This call bypasses auth. Success does not imply that the
request has authorization to the info.
:param app: the application object
:param env: the environment used by the current request
:param account: The unquoted name of the account
:param container: The unquoted name of the container (or None if account)
:param swift_source: swift source logged for any subrequests made while
retrieving the account or container info
:returns: information about the specified entity in a dictionary. See
get_account_info and get_container_info for details on what's in the
dictionary.