Skip to content

Commit 2074c8a

Browse files
gh-142889: Restructure PyDictKeysObject memory layout for simpler entry access
Restructure dict keys allocation to store dk_indices before the PyDictKeysObject header and keep dk_entries after the header. Update dict index access and related allocation/free/clone paths, adjust gdb dict entry location logic, and add layout coverage tests. Local dict microbenchmarks showed about a 1.4% overall improvement, with most operations around 1-2% faster.
1 parent 2be2dd5 commit 2074c8a

File tree

6 files changed

+138
-45
lines changed

6 files changed

+138
-45
lines changed

Include/internal/pycore_dict.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,6 @@ struct _dictkeysobject {
198198
/* Number of used entries in dk_entries. */
199199
Py_ssize_t dk_nentries;
200200

201-
202201
/* Actual hash table of dk_size entries. It holds indices in dk_entries,
203202
or DKIX_EMPTY(-1) or DKIX_DUMMY(-2).
204203
@@ -243,10 +242,21 @@ struct _dictvalues {
243242
#define DK_SIZE(dk) (1<<DK_LOG_SIZE(dk))
244243
#endif
245244

245+
static inline void* _DK_INDICES_END(const PyDictKeysObject *dk) {
246+
return (void *)dk;
247+
}
248+
249+
static inline void* _DK_INDICES_BASE(const PyDictKeysObject *dk) {
250+
size_t indices_size = (size_t)1 << dk->dk_log2_index_bytes;
251+
return (char *)dk - indices_size;
252+
}
253+
254+
static inline void* _DK_ALLOC_BASE(PyDictKeysObject *dk) {
255+
return _DK_INDICES_BASE(dk);
256+
}
257+
246258
static inline void* _DK_ENTRIES(PyDictKeysObject *dk) {
247-
int8_t *indices = (int8_t*)(dk->dk_indices);
248-
size_t index = (size_t)1 << dk->dk_log2_index_bytes;
249-
return (&indices[index]);
259+
return (void *)(&dk->dk_indices[0]);
250260
}
251261

252262
static inline PyDictKeyEntry* DK_ENTRIES(PyDictKeysObject *dk) {

Lib/test/test_dict.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,6 +1711,21 @@ def __hash__(self):
17111711
self.assertEqual(dict_getitem_knownhash(d, k1, hash(k1)), 1)
17121712
self.assertRaises(Exc, dict_getitem_knownhash, d, k2, hash(k2))
17131713

1714+
@support.cpython_only
1715+
def test_indices_layout(self):
1716+
_testinternalcapi = import_helper.import_module('_testinternalcapi')
1717+
check_layout = _testinternalcapi.dict_check_indices_layout
1718+
1719+
dicts = [
1720+
{},
1721+
{i: i for i in range(10)},
1722+
{i: i for i in range(200)},
1723+
{i: i for i in range(2000)},
1724+
{i: i for i in range(70000)},
1725+
]
1726+
for d in dicts:
1727+
with self.subTest(size=len(d)):
1728+
self.assertTrue(check_layout(d))
17141729

17151730
from test import mapping_tests
17161731

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
gh-issue-142889: Restructure ``PyDictKeysObject`` memory layout so the indices array is stored
2+
before the object header, and update dict index access accordingly. In local
3+
dict-operation microbenchmarks this was about 1.4% faster overall, with most
4+
operations improving by roughly 1-2%.

Modules/_testinternalcapi.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,6 +1903,53 @@ dict_getitem_knownhash(PyObject *self, PyObject *args)
19031903
return Py_XNewRef(result);
19041904
}
19051905

1906+
static size_t
1907+
dict_index_bytes_for_keys(PyDictKeysObject *keys)
1908+
{
1909+
int index_shift = keys->dk_log2_index_bytes - DK_LOG_SIZE(keys);
1910+
if (index_shift == 0) {
1911+
return 1;
1912+
}
1913+
if (index_shift == 1) {
1914+
return 2;
1915+
}
1916+
#if SIZEOF_VOID_P > 4
1917+
if (index_shift == 3) {
1918+
return 8;
1919+
}
1920+
#endif
1921+
assert(index_shift == 2);
1922+
return 4;
1923+
}
1924+
1925+
static PyObject*
1926+
dict_check_indices_layout(PyObject *self, PyObject *arg)
1927+
{
1928+
if (!PyAnyDict_Check(arg)) {
1929+
PyErr_SetString(PyExc_TypeError, "expected a dict");
1930+
return NULL;
1931+
}
1932+
1933+
PyDictObject *mp = (PyDictObject *)arg;
1934+
PyDictKeysObject *keys = mp->ma_keys;
1935+
1936+
size_t indices_size = (size_t)1 << keys->dk_log2_index_bytes;
1937+
char *base = (char *)_DK_ALLOC_BASE(keys);
1938+
char *header = (char *)keys;
1939+
char *entries = (char *)_DK_ENTRIES(keys);
1940+
1941+
bool ok = true;
1942+
ok &= (header == base + indices_size);
1943+
ok &= (entries == header + sizeof(PyDictKeysObject));
1944+
1945+
size_t index_bytes = dict_index_bytes_for_keys(keys);
1946+
char *idx_base = (char *)_DK_INDICES_BASE(keys);
1947+
/* Index 0 is stored immediately before the header. */
1948+
char *idx0 = (char *)_DK_INDICES_END(keys) - (ptrdiff_t)index_bytes;
1949+
ok &= (idx0 == idx_base + indices_size - (ptrdiff_t)index_bytes);
1950+
1951+
return PyBool_FromLong(ok);
1952+
}
19061953

19071954
static int
19081955
_init_interp_config_from_object(PyInterpreterConfig *config, PyObject *obj)
@@ -2902,6 +2949,7 @@ static PyMethodDef module_functions[] = {
29022949
{"get_object_dict_values", get_object_dict_values, METH_O},
29032950
{"hamt", new_hamt, METH_NOARGS},
29042951
{"dict_getitem_knownhash", dict_getitem_knownhash, METH_VARARGS},
2952+
{"dict_check_indices_layout", dict_check_indices_layout, METH_O},
29052953
{"create_interpreter", _PyCFunction_CAST(create_interpreter),
29062954
METH_VARARGS | METH_KEYWORDS},
29072955
{"destroy_interpreter", _PyCFunction_CAST(destroy_interpreter),

Objects/dictobject.c

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here:
1616
1717
layout:
1818
19-
+---------------------+
2019
| dk_refcnt |
2120
| dk_log2_size |
2221
| dk_log2_index_bytes |
@@ -176,8 +175,8 @@ ASSERT_DICT_LOCKED(PyObject *op)
176175

177176
#define IS_DICT_SHARED(mp) _PyObject_GC_IS_SHARED(mp)
178177
#define SET_DICT_SHARED(mp) _PyObject_GC_SET_SHARED(mp)
179-
#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)keys->dk_indices)[idx]);
180-
#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)keys->dk_indices)[idx], (int##size##_t)value);
178+
#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)]);
179+
#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)], (int##size##_t)value);
181180
#define ASSERT_OWNED_OR_SHARED(mp) \
182181
assert(_Py_IsOwnedByCurrentThread((PyObject *)mp) || IS_DICT_SHARED(mp));
183182

@@ -256,8 +255,8 @@ static inline void split_keys_entry_added(PyDictKeysObject *keys)
256255
#define UNLOCK_KEYS_IF_SPLIT(keys, kind)
257256
#define IS_DICT_SHARED(mp) (false)
258257
#define SET_DICT_SHARED(mp)
259-
#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)(keys->dk_indices))[idx]
260-
#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)(keys->dk_indices))[idx] = (int##size##_t)value
258+
#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)]
259+
#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)] = (int##size##_t)value
261260

262261
static inline void split_keys_entry_added(PyDictKeysObject *keys)
263262
{
@@ -513,14 +512,14 @@ dictkeys_get_index(const PyDictKeysObject *keys, Py_ssize_t i)
513512
int log2size = DK_LOG_SIZE(keys);
514513
Py_ssize_t ix;
515514

516-
if (log2size < 8) {
515+
if (keys->dk_log2_index_bytes == log2size) {
517516
ix = LOAD_INDEX(keys, 8, i);
518517
}
519-
else if (log2size < 16) {
518+
else if (keys->dk_log2_index_bytes == log2size + 1) {
520519
ix = LOAD_INDEX(keys, 16, i);
521520
}
522521
#if SIZEOF_VOID_P > 4
523-
else if (log2size >= 32) {
522+
else if (keys->dk_log2_index_bytes == log2size + 3) {
524523
ix = LOAD_INDEX(keys, 64, i);
525524
}
526525
#endif
@@ -540,16 +539,16 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
540539
assert(ix >= DKIX_DUMMY);
541540
assert(keys->dk_version == 0);
542541

543-
if (log2size < 8) {
542+
if (keys->dk_log2_index_bytes == log2size) {
544543
assert(ix <= 0x7f);
545544
STORE_INDEX(keys, 8, i, ix);
546545
}
547-
else if (log2size < 16) {
546+
else if (keys->dk_log2_index_bytes == log2size + 1) {
548547
assert(ix <= 0x7fff);
549548
STORE_INDEX(keys, 16, i, ix);
550549
}
551550
#if SIZEOF_VOID_P > 4
552-
else if (log2size >= 32) {
551+
else if (keys->dk_log2_index_bytes == log2size + 3) {
553552
STORE_INDEX(keys, 64, i, ix);
554553
}
555554
#endif
@@ -626,7 +625,15 @@ estimate_log2_keysize(Py_ssize_t n)
626625
* See https://github.com/python/cpython/pull/127568#discussion_r1868070614
627626
* for the rationale of using dk_log2_index_bytes=3 instead of 0.
628627
*/
629-
static PyDictKeysObject empty_keys_struct = {
628+
typedef struct {
629+
int8_t indices[8];
630+
PyDictKeysObject keys;
631+
} _PyDict_EmptyKeysStorage;
632+
633+
static _PyDict_EmptyKeysStorage empty_keys_storage = {
634+
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
635+
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY},
636+
{
630637
_Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */
631638
0, /* dk_log2_size */
632639
3, /* dk_log2_index_bytes */
@@ -637,11 +644,14 @@ static PyDictKeysObject empty_keys_struct = {
637644
1, /* dk_version */
638645
0, /* dk_usable (immutable) */
639646
0, /* dk_nentries */
640-
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
641-
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
647+
{},
648+
}
642649
};
643650

644-
#define Py_EMPTY_KEYS &empty_keys_struct
651+
static_assert(offsetof(_PyDict_EmptyKeysStorage, keys) == 8,
652+
"empty_keys_storage layout mismatch");
653+
654+
#define Py_EMPTY_KEYS (&empty_keys_storage.keys)
645655

646656
/* Uncomment to check the dict content in _PyDict_CheckConsistency() */
647657
// #define DEBUG_PYDICT
@@ -809,18 +819,27 @@ new_keys_object(uint8_t log2_size, bool unicode)
809819
}
810820

811821
PyDictKeysObject *dk = NULL;
822+
size_t indices_size = (size_t)1 << log2_bytes;
823+
void *base = NULL;
824+
812825
if (log2_size == PyDict_LOG_MINSIZE && unicode) {
813-
dk = _Py_FREELIST_POP_MEM(dictkeys);
826+
base = _Py_FREELIST_POP_MEM(dictkeys);
827+
if (base != NULL) {
828+
dk = (PyDictKeysObject *)((char *)base + indices_size);
829+
}
814830
}
815-
if (dk == NULL) {
816-
dk = PyMem_Malloc(sizeof(PyDictKeysObject)
817-
+ ((size_t)1 << log2_bytes)
818-
+ entry_size * usable);
819-
if (dk == NULL) {
831+
832+
if (base == NULL) {
833+
base = PyMem_Malloc(indices_size
834+
+ sizeof(PyDictKeysObject)
835+
+ entry_size * usable);
836+
if (base == NULL) {
820837
PyErr_NoMemory();
821838
return NULL;
822839
}
840+
dk = (PyDictKeysObject *)((char *)base + indices_size);
823841
}
842+
824843
#ifdef Py_REF_DEBUG
825844
_Py_IncRefTotal(_PyThreadState_GET());
826845
#endif
@@ -834,25 +853,28 @@ new_keys_object(uint8_t log2_size, bool unicode)
834853
dk->dk_nentries = 0;
835854
dk->dk_usable = usable;
836855
dk->dk_version = 0;
837-
memset(&dk->dk_indices[0], 0xff, ((size_t)1 << log2_bytes));
838-
memset(&dk->dk_indices[(size_t)1 << log2_bytes], 0, entry_size * usable);
856+
memset(_DK_INDICES_BASE(dk), 0xff, indices_size);
857+
memset(&dk->dk_indices[0], 0, entry_size * usable);
839858
return dk;
840859
}
841860

842861
static void
843862
free_keys_object(PyDictKeysObject *keys, bool use_qsbr)
844863
{
864+
void *base = _DK_ALLOC_BASE(keys);
865+
845866
#ifdef Py_GIL_DISABLED
846867
if (use_qsbr) {
847-
_PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys));
868+
_PyMem_FreeDelayed(base, _PyDict_KeysSize(keys));
848869
return;
849870
}
850871
#endif
872+
851873
if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE && keys->dk_kind == DICT_KEYS_UNICODE) {
852-
_Py_FREELIST_FREE(dictkeys, keys, PyMem_Free);
874+
_Py_FREELIST_FREE(dictkeys, base, PyMem_Free);
853875
}
854876
else {
855-
PyMem_Free(keys);
877+
PyMem_Free(base);
856878
}
857879
}
858880

@@ -950,14 +972,19 @@ clone_combined_dict_keys(PyDictObject *orig)
950972

951973
ASSERT_DICT_LOCKED(orig);
952974

953-
size_t keys_size = _PyDict_KeysSize(orig->ma_keys);
954-
PyDictKeysObject *keys = PyMem_Malloc(keys_size);
955-
if (keys == NULL) {
975+
PyDictKeysObject *orig_keys = orig->ma_keys;
976+
size_t keys_size = _PyDict_KeysSize(orig_keys);
977+
size_t indices_size = (size_t)1 << orig_keys->dk_log2_index_bytes;
978+
979+
void *base = PyMem_Malloc(keys_size);
980+
if (base == NULL) {
956981
PyErr_NoMemory();
957982
return NULL;
958983
}
959984

960-
memcpy(keys, orig->ma_keys, keys_size);
985+
PyDictKeysObject *keys = (PyDictKeysObject *)((char *)base + indices_size);
986+
987+
memcpy(base, _DK_ALLOC_BASE(orig_keys), keys_size);
961988

962989
/* After copying key/value pairs, we need to incref all
963990
keys and values and they are about to be co-owned by a

Tools/gdb/libpython.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -835,19 +835,8 @@ def write_repr(self, out, visited):
835835
@staticmethod
836836
def _get_entries(keys):
837837
dk_nentries = int(keys['dk_nentries'])
838-
dk_size = 1<<int(keys['dk_log2_size'])
839-
840-
if dk_size <= 0xFF:
841-
offset = dk_size
842-
elif dk_size <= 0xFFFF:
843-
offset = 2 * dk_size
844-
elif dk_size <= 0xFFFFFFFF:
845-
offset = 4 * dk_size
846-
else:
847-
offset = 8 * dk_size
848838

849839
ent_addr = keys['dk_indices'].address
850-
ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
851840
if int(keys['dk_kind']) == 0: # DICT_KEYS_GENERAL
852841
ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
853842
else:

0 commit comments

Comments
 (0)