heremaps · VeaaC · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/.github/workflows/py.yml b/.github/workflows/py.yml
@@ -19,7 +19,9 @@ jobs:
       - name: Run tests
         run: |
           cd flatdata-py
-          uv run --with pytest --with ../flatdata-generator pytest -v
-          pip install .
-          flatdata-inspector --help
+          uv venv
+          uv pip install ../flatdata-generator
+          uv pip install ".[inspector]" pytest
+          .venv/bin/pytest -v
+          .venv/bin/flatdata-inspector --help
 
diff --git a/flatdata-generator/flatdata/generator/templates/py/python.jinja2 b/flatdata-generator/flatdata/generator/templates/py/python.jinja2
@@ -10,6 +10,7 @@ import flatdata.lib as flatdata
 {{ struct.doc|to_python_doc}}
 class {{ tree.namespace_path(struct, "_") }}_{{ struct.name }}(flatdata.structure.Structure):
     """{{ struct.doc|safe_py_string_line }}"""
+    __slots__ = ()
     _SCHEMA = """{{ tree.schema(struct) }}"""
     _NAME = "{{ tree.namespace_path(struct, "_") }}_{{ struct.name }}"
     _SIZE_IN_BITS = {{ struct.size_in_bits }}

diff --git a/flatdata-generator/pyproject.toml b/flatdata-generator/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "flatdata-generator"
-version = "0.4.10"
+version = "0.4.11"
 description = "Generate source code for C++, Rust, Go or Python from a Flatdata schema file"
 readme = "README.md"
 authors = [

diff --git a/flatdata-generator/tests/generators/py_expectations/archives/multivector.py b/flatdata-generator/tests/generators/py_expectations/archives/multivector.py
@@ -1,5 +1,6 @@
 class n_S(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct S
 {
@@ -20,6 +21,7 @@ class n_S(flatdata.structure.Structure):
 
 class n_T(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct T
 {
@@ -40,6 +42,7 @@ class n_T(flatdata.structure.Structure):
 #  Builtin type to for MultiVector index 
 class n__builtin_multivector_IndexType8(flatdata.structure.Structure):
     """/** Builtin type to for MultiVector index */"""
+    __slots__ = ()
     _SCHEMA = """"""
     _NAME = "n__builtin_multivector_IndexType8"
     _SIZE_IN_BITS = 8
@@ -53,6 +56,7 @@ class n__builtin_multivector_IndexType8(flatdata.structure.Structure):
 #  Builtin type to for MultiVector index 
 class n__builtin_multivector_IndexType16(flatdata.structure.Structure):
     """/** Builtin type to for MultiVector index */"""
+    __slots__ = ()
     _SCHEMA = """"""
     _NAME = "n__builtin_multivector_IndexType16"
     _SIZE_IN_BITS = 16
@@ -66,6 +70,7 @@ class n__builtin_multivector_IndexType16(flatdata.structure.Structure):
 #  Builtin type to for MultiVector index 
 class n__builtin_multivector_IndexType64(flatdata.structure.Structure):
     """/** Builtin type to for MultiVector index */"""
+    __slots__ = ()
     _SCHEMA = """"""
     _NAME = "n__builtin_multivector_IndexType64"
     _SIZE_IN_BITS = 64

diff --git a/flatdata-generator/tests/generators/py_expectations/archives/namespaces.py b/flatdata-generator/tests/generators/py_expectations/archives/namespaces.py
@@ -1,5 +1,6 @@
 class n_S(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct S
 {
@@ -92,6 +93,7 @@ def __init__(self, resource_storage):
 
 class m_S(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace m {
 struct S
 {
@@ -184,6 +186,7 @@ def __init__(self, resource_storage):
 #  Builtin type to for MultiVector index 
 class a__builtin_multivector_IndexType32(flatdata.structure.Structure):
     """/** Builtin type to for MultiVector index */"""
+    __slots__ = ()
     _SCHEMA = """"""
     _NAME = "a__builtin_multivector_IndexType32"
     _SIZE_IN_BITS = 32

diff --git a/flatdata-generator/tests/generators/py_expectations/archives/ranges.py b/flatdata-generator/tests/generators/py_expectations/archives/ranges.py
@@ -1,5 +1,6 @@
 class n_S(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct S
 {

diff --git a/flatdata-generator/tests/generators/py_expectations/archives/struct.py b/flatdata-generator/tests/generators/py_expectations/archives/struct.py
@@ -1,5 +1,6 @@
 class n_S(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct S
 {

diff --git a/flatdata-generator/tests/generators/py_expectations/archives/vector.py b/flatdata-generator/tests/generators/py_expectations/archives/vector.py
@@ -1,5 +1,6 @@
 class n_S(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct S
 {

diff --git a/flatdata-generator/tests/generators/py_expectations/structs/comments.py.1 b/flatdata-generator/tests/generators/py_expectations/structs/comments.py.1
@@ -1,4 +1,5 @@
 #  This is a comment about Foo
 class n_Foo(flatdata.structure.Structure):
     """// This is a comment about Foo"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
diff --git a/flatdata-generator/tests/generators/py_expectations/structs/comments.py.2 b/flatdata-generator/tests/generators/py_expectations/structs/comments.py.2
@@ -1,3 +1,4 @@
 #  This is a comment about Foo
 class n_Foo(flatdata.structure.Structure):
-    """// This is a comment about Foo"""
+    """// This is a comment about Foo"""
+    __slots__ = ()
diff --git a/flatdata-generator/tests/generators/py_expectations/structs/integers.py b/flatdata-generator/tests/generators/py_expectations/structs/integers.py
@@ -12,6 +12,7 @@
 
 class n_U8(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U8
 {
@@ -32,6 +33,7 @@ class n_U8(flatdata.structure.Structure):
 
 class n_I8(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I8
 {
@@ -52,6 +54,7 @@ class n_I8(flatdata.structure.Structure):
 
 class n_U16(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U16
 {
@@ -72,6 +75,7 @@ class n_U16(flatdata.structure.Structure):
 
 class n_I16(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I16
 {
@@ -92,6 +96,7 @@ class n_I16(flatdata.structure.Structure):
 
 class n_U32(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U32
 {
@@ -112,6 +117,7 @@ class n_U32(flatdata.structure.Structure):
 
 class n_I32(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I32
 {
@@ -132,6 +138,7 @@ class n_I32(flatdata.structure.Structure):
 
 class n_U64(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U64
 {
@@ -152,6 +159,7 @@ class n_U64(flatdata.structure.Structure):
 
 class n_I64(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I64
 {

diff --git a/flatdata-generator/tests/generators/py_expectations/structs/namespaces.py b/flatdata-generator/tests/generators/py_expectations/structs/namespaces.py
@@ -1,5 +1,6 @@
 class n_Foo(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct Foo
 {
@@ -20,6 +21,7 @@ class n_Foo(flatdata.structure.Structure):
 
 class m_Foo(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace m {
 struct Foo
 {

diff --git a/flatdata-generator/tests/generators/py_expectations/structs/unaligned.py b/flatdata-generator/tests/generators/py_expectations/structs/unaligned.py
@@ -1,5 +1,6 @@
 class n_U8(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U8
 {
@@ -23,6 +24,7 @@ class n_U8(flatdata.structure.Structure):
 
 class n_I8(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I8
 {
@@ -46,6 +48,7 @@ class n_I8(flatdata.structure.Structure):
 
 class n_U16(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U16
 {
@@ -69,6 +72,7 @@ class n_U16(flatdata.structure.Structure):
 
 class n_I16(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I16
 {
@@ -92,6 +96,7 @@ class n_I16(flatdata.structure.Structure):
 
 class n_U32(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U32
 {
@@ -115,6 +120,7 @@ class n_U32(flatdata.structure.Structure):
 
 class n_I32(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I32
 {
@@ -138,6 +144,7 @@ class n_I32(flatdata.structure.Structure):
 
 class n_U64(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct U64
 {
@@ -161,6 +168,7 @@ class n_U64(flatdata.structure.Structure):
 
 class n_I64(flatdata.structure.Structure):
     """"""
+    __slots__ = ()
     _SCHEMA = """namespace n {
 struct I64
 {

diff --git a/flatdata-py/README.md b/flatdata-py/README.md
@@ -18,6 +18,37 @@ Once you have [created a flatdata schema file](../README.md#creating-a-schema),
 flatdata-generator --gen py --schema locations.flatdata --output-file locations.py
 ```
 
+## Performance tips
+
+`flatdata-py` supports two data access patterns with very different performance characteristics on large archives.
+
+Iterating over a vector yields one Python object per element. Each field access unpacks bits from the underlying memory-mapped data. This is fine for accessing individual elements or small ranges, but has significant per-element overhead for bulk operations:
+
+```python
+count = sum(1 for x in archive.links if x.speed_limit > 100)
+```
+
+For bulk operations, use the vectorized access methods that read fields directly into NumPy arrays:
+
+```python
+# single column access, returns a pandas DataFrame
+df = archive.links.speed_limit
+count = len(df[df['speed_limit'] > 100])
+
+# full NumPy structured array with all fields
+arr = archive.links.to_numpy()
+count = int(np.sum(arr['speed_limit'] > 100))
+
+# slices work too
+arr = archive.links[1000:2000].to_numpy()
+df = archive.links[::10].to_data_frame()
+```
+
+* Use `vector.field_name` (column access) when you only need one or a few fields.
+* Use `vector.to_numpy()` or `vector.to_data_frame()` when you need all fields at once.
+* Use `vector[i].field` for random access to individual elements.
+* The underlying data is memory-mapped; the OS pages it from disk on demand. Vectorized results are materialized as NumPy arrays in RAM.
+
 ## Using the inspector
 
 `flatdata-py` comes with a handy tool called the `flatdata-inspector` to inspect the contents of an archive:

diff --git a/flatdata-py/flatdata/lib/archive.py b/flatdata-py/flatdata/lib/archive.py
@@ -39,9 +39,9 @@ def __init__(self, resource_storage):
             self.__getattr__(name)
 
     def __getattr__(self, name):
-        if name not in list(self._RESOURCES.keys()):
+        if name not in self._RESOURCES:
             raise AttributeError("Resource %s not defined in archive." % name)
-        if name not in list(self._loaded_resources.keys()):
+        if name not in self._loaded_resources:
             self._loaded_resources[name] = self._open_resource(name)
         return self._loaded_resources[name]
 

diff --git a/flatdata-py/flatdata/lib/data_access.py b/flatdata-py/flatdata/lib/data_access.py
@@ -3,6 +3,8 @@
  See the LICENSE file in the root of this project for license details.
 '''
 
+import numpy as np
+
 # Sign bits cache for the value reading.
 _SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
 
@@ -20,7 +22,7 @@ def read_value(data, offset_bits, num_bits, is_signed):
         remainder = data[offset_bytes + total_bytes]
         result |= remainder << (total_bytes * 8 - offset_extra_bits)
 
-    if num_bits < 64:
+    if num_bits < 64 or offset_extra_bits > 0:
         result = result & ((1 << num_bits) - 1)
 
     if not is_signed:
@@ -62,3 +64,49 @@ def write_value(data, offset_bits, num_bits, is_signed, value):
         surrounding_bits = data[offset_bytes + byte_idx] & ~((1 << offset_bits) - 1)
         data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx] & ((1 << (8 - (bits_written % 8))) - 1)
         data[offset_bytes + byte_idx] |= surrounding_bits
+
+
+def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
+    """Read a bit-packed field from all elements at once, returning a numpy array.
+
+    :param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
+    :param field_offset_bits: bit offset of the field within each element
+    :param field_width_bits: width of the field in bits (max 64)
+    :param is_signed: whether to sign-extend the result
+    :return: numpy array of field values
+    """
+    if field_width_bits == 1:
+        byte_idx = field_offset_bits // 8
+        bit_idx = field_offset_bits % 8
+        return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
+                np.uint64(1))
+
+    byte_start = field_offset_bits // 8
+    bit_shift = field_offset_bits % 8
+    bytes_needed = (bit_shift + field_width_bits + 7) // 8
+
+    # Use Python int arithmetic for the shift to avoid numpy overflow,
+    # then broadcast back to the array.
+    result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
+    for b in range(min(bytes_needed, 8)):
+        result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
+    result >>= np.uint64(bit_shift)
+
+    # If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
+    bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
+    if bits_so_far < field_width_bits and bytes_needed > 8:
+        extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
+        result |= extra << np.uint64(bits_so_far)
+
+    if field_width_bits < 64:
+        result &= np.uint64((1 << field_width_bits) - 1)
+
+    if is_signed:
+        if field_width_bits == 64:
+            return result.view(np.int64)
+        sign_bit = np.uint64(1 << (field_width_bits - 1))
+        offset = -(1 << field_width_bits)
+        signed = result.astype(np.int64) + np.int64(offset)
+        result = np.where(result & sign_bit, signed, result.astype(np.int64))
+
+    return result