diff --git a/kolibri/core/auth/models.py b/kolibri/core/auth/models.py index 4135c98d34a..e3eb2c281ae 100644 --- a/kolibri/core/auth/models.py +++ b/kolibri/core/auth/models.py @@ -1435,7 +1435,7 @@ def __str__(self): user=self.user, collection=self.collection ) - def save(self, *args, **kwargs): + def validate_membership(self): if self.collection.kind == collection_kinds.FACILITY: raise InvalidMembershipError( "Cannot create membership objects for facilities, as should already be a member by facility attribute" @@ -1452,6 +1452,9 @@ def save(self, *args, **kwargs): raise InvalidMembershipError( "Cannot create membership for a user in a LearnerGroup or AdHocGroup when they are not a member of the parent Classrooom" ) + + def save(self, *args, **kwargs): + self.validate_membership() return super().save(*args, **kwargs) def delete(self, **kwargs): @@ -1528,7 +1531,7 @@ def __str__(self): user=self.user, kind=self.kind, collection=self.collection ) - def save(self, *args, **kwargs): + def validate_role(self): if ( self.collection.kind == collection_kinds.LEARNERGROUP or self.collection.kind == collection_kinds.ADHOCLEARNERSGROUP @@ -1537,21 +1540,28 @@ def save(self, *args, **kwargs): raise InvalidRoleKind( "Cannot assign roles to Learner Groups or AdHoc Groups" ) + if self.collection.kind == collection_kinds.CLASSROOM: + # We only support coaches to be assigned at the classroom level currently + if self.kind != role_kinds.COACH: + raise InvalidRoleKind("Can only assign Coach roles to Classrooms") + + def ensure_coach_role_at_facility(self): + if self.collection.kind == collection_kinds.CLASSROOM: + if not Role.objects.filter( + user=self.user, collection_id=self.collection.parent_id + ).exists(): + # If the user doesn't already have a facility role, then create the assignable coach role for the user + # at the facility level. + Role.objects.create( + user=self.user, + collection_id=self.collection.parent_id, + kind=role_kinds.ASSIGNABLE_COACH, + ) + + def save(self, *args, **kwargs): + self.validate_role() with transaction.atomic(): - if self.collection.kind == collection_kinds.CLASSROOM: - # We only support coaches to be assigned at the classroom level currently - if self.kind != role_kinds.COACH: - raise InvalidRoleKind("Can only assign Coach roles to Classrooms") - if not Role.objects.filter( - user=self.user, collection_id=self.collection.parent_id - ).exists(): - # If the user doesn't already have a facility role, then create the assignable coach role for the user - # at the facility level. - Role.objects.create( - user=self.user, - collection_id=self.collection.parent_id, - kind=role_kinds.ASSIGNABLE_COACH, - ) + self.ensure_coach_role_at_facility() return super().save(*args, **kwargs) def delete(self, **kwargs): diff --git a/kolibri/core/auth/serializers.py b/kolibri/core/auth/serializers.py index 608da1ceb05..9d6259e575e 100644 --- a/kolibri/core/auth/serializers.py +++ b/kolibri/core/auth/serializers.py @@ -2,15 +2,20 @@ from django.core.exceptions import ValidationError as DjangoValidationError from django.core.validators import MinLengthValidator +from django.db import connections from django.db import transaction +from morango.sync.backends.utils import calculate_max_sqlite_variables from rest_framework import serializers from rest_framework.exceptions import ParseError from rest_framework.validators import UniqueTogetherValidator +from .constants import collection_kinds from .constants import facility_presets +from .constants import role_kinds from .errors import IncompatibleDeviceSettingError from .errors import InvalidCollectionHierarchy from .errors import InvalidMembershipError +from .errors import InvalidRoleKind from .models import Classroom from .models import Facility from .models import FacilityDataset @@ -27,16 +32,100 @@ logger = logging.getLogger(__name__) +def _prepare_for_bulk_create(instance): + """ + Prepare a morango SyncableModel instance for bulk_create by manually + setting the fields that would normally be set during save(). + """ + instance.pre_save() + instance.id = instance.calculate_uuid() + instance._morango_dirty_bit = True + + +def _get_batch_size(Model): + """ + Calculate a safe batch_size for bulk_create to avoid SQLite variable limits. + Cap at 500 to prevent 'too many terms in compound SELECT' errors. + Same pattern as kolibri.core.auth.utils.migrate._batch_save. + """ + vendor = connections[Model.objects.db].vendor + if vendor == "sqlite": + return min(calculate_max_sqlite_variables() // len(Model._meta.fields), 500) + return 750 + + class RoleListSerializer(serializers.ListSerializer): - def create(self, validated_data): - created_objects = [] + def validate(self, attrs): + for item in attrs: + instance = Role(**item) + try: + instance.validate_role() + except InvalidRoleKind as e: + raise serializers.ValidationError(str(e)) + return attrs + def create(self, validated_data): + objects_to_create = [] for model_data in validated_data: - obj, created = Role.objects.get_or_create(**model_data) - if created: - created_objects.append(obj) + instance = Role(**model_data) + _prepare_for_bulk_create(instance) + objects_to_create.append(instance) + + batch_size = _get_batch_size(Role) + + with transaction.atomic(): + # Filter out already-existing roles by their deterministic morango UUID + existing_ids = set( + Role.objects.filter( + id__in=[obj.id for obj in objects_to_create] + ).values_list("id", flat=True) + ) + new_objects = [ + obj for obj in objects_to_create if obj.id not in existing_ids + ] + + if new_objects: + Role.objects.bulk_create( + new_objects, + batch_size=batch_size, + ignore_conflicts=True, + ) - return created_objects + # Handle ASSIGNABLE_COACH side effect for classroom coach roles + classroom_roles = [ + obj + for obj in new_objects + if obj.collection.kind == collection_kinds.CLASSROOM + ] + if classroom_roles: + user_ids = {obj.user_id for obj in classroom_roles} + facility_ids = {obj.collection.parent_id for obj in classroom_roles} + users_with_facility_role = set( + Role.objects.filter( + collection_id__in=facility_ids, + user_id__in=user_ids, + ).values_list("user_id", "collection_id") + ) + assignable_roles = [] + for obj in classroom_roles: + pair = (obj.user_id, obj.collection.parent_id) + if pair not in users_with_facility_role: + instance = Role( + user=obj.user, + collection_id=obj.collection.parent_id, + kind=role_kinds.ASSIGNABLE_COACH, + ) + _prepare_for_bulk_create(instance) + assignable_roles.append(instance) + users_with_facility_role.add(pair) + if assignable_roles: + Role.objects.bulk_create( + assignable_roles, + batch_size=batch_size, + ignore_conflicts=True, + ) + + return new_objects class RoleSerializer(serializers.ModelSerializer): @@ -141,15 +230,68 @@ def validate(self, attrs): class MembershipListSerializer(serializers.ListSerializer): - def create(self, validated_data): - created_objects = [] + def validate(self, attrs): + lg_items = [] + for item in attrs: + collection = item["collection"] + if collection.kind == collection_kinds.FACILITY: + raise serializers.ValidationError( + "Cannot create membership objects for facilities, " + "as should already be a member by facility attribute" + ) + if collection.kind in ( + collection_kinds.LEARNERGROUP, + collection_kinds.ADHOCLEARNERSGROUP, + ): + lg_items.append(item) + + if lg_items: + # Batch check parent classroom memberships with a single query + needed_pairs = { + (item["collection"].parent_id, item["user"].id) for item in lg_items + } + existing_memberships = set( + Membership.objects.filter( + collection_id__in={p[0] for p in needed_pairs}, + user_id__in={p[1] for p in needed_pairs}, + ).values_list("collection_id", "user_id") + ) + for item in lg_items: + pair = (item["collection"].parent_id, item["user"].id) + if pair not in existing_memberships: + raise serializers.ValidationError( + "Cannot create membership for a user in a " + "LearnerGroup or AdHocGroup when they are not a " + "member of the parent Classroom" + ) + return attrs + def create(self, validated_data): + objects_to_create = [] for model_data in validated_data: - obj, created = Membership.objects.get_or_create(**model_data) - if created: - created_objects.append(obj) + instance = Membership(**model_data) + _prepare_for_bulk_create(instance) + objects_to_create.append(instance) + + with transaction.atomic(): + # Filter out already-existing memberships by their deterministic morango UUID + existing_ids = set( + Membership.objects.filter( + id__in=[obj.id for obj in objects_to_create] + ).values_list("id", flat=True) + ) + new_objects = [ + obj for obj in objects_to_create if obj.id not in existing_ids + ] + + if new_objects: + Membership.objects.bulk_create( + new_objects, + batch_size=_get_batch_size(Membership), + ignore_conflicts=True, + ) - return created_objects + return new_objects class MembershipSerializer(serializers.ModelSerializer): diff --git a/kolibri/core/auth/test/test_api.py b/kolibri/core/auth/test/test_api.py index 057e744ee63..431f592ff84 100644 --- a/kolibri/core/auth/test/test_api.py +++ b/kolibri/core/auth/test/test_api.py @@ -8,7 +8,9 @@ import factory from django.conf import settings +from django.db import connection from django.db.models.signals import pre_delete +from django.test.utils import CaptureQueriesContext from django.urls import reverse from django.utils import timezone from mock import patch @@ -31,6 +33,7 @@ from ..constants import role_kinds from ..constants.facility_presets import mappings from ..models import Facility +from ..serializers import _prepare_for_bulk_create from .helpers import create_superuser from .helpers import DUMMY_PASSWORD from .helpers import provision_device @@ -2243,6 +2246,10 @@ def setUpTestData(cls): # create other user memberships models.Membership.objects.create(collection=cls.classroom, user=cls.other_user) models.Membership.objects.create(collection=cls.lg, user=cls.other_user) + # users for bulk create tests (no pre-existing memberships) + cls.bulk_user1 = FacilityUserFactory.create(facility=cls.facility) + cls.bulk_user2 = FacilityUserFactory.create(facility=cls.facility) + cls.bulk_user3 = FacilityUserFactory.create(facility=cls.facility) def login_superuser(self): self.client.login( @@ -2353,6 +2360,120 @@ def test_delete_does_not_affect_other_user_memberships(self): expected_count, ) + def test_bulk_create_memberships(self): + self.login_superuser() + url = reverse("kolibri:core:membership-list") + data = [ + {"user": self.bulk_user1.id, "collection": self.classroom.id}, + {"user": self.bulk_user2.id, "collection": self.classroom.id}, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + self.assertEqual(len(response.data), 2) + self.assertTrue( + models.Membership.objects.filter( + user=self.bulk_user1, collection=self.classroom + ).exists() + ) + self.assertTrue( + models.Membership.objects.filter( + user=self.bulk_user2, collection=self.classroom + ).exists() + ) + + def test_bulk_create_memberships_have_valid_morango_fields(self): + self.login_superuser() + url = reverse("kolibri:core:membership-list") + data = [ + {"user": self.bulk_user1.id, "collection": self.classroom.id}, + {"user": self.bulk_user2.id, "collection": self.classroom.id}, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + for m_data in response.data: + membership = models.Membership.objects.get(id=m_data["id"]) + self.assertEqual(len(membership.id), 32) + self.assertIsNotNone(membership._morango_partition) + self.assertIsNotNone(membership._morango_source_id) + self.assertTrue(membership._morango_dirty_bit) + + def test_bulk_create_memberships_idempotent(self): + self.login_superuser() + models.Membership.objects.create( + user=self.bulk_user1, collection=self.classroom + ) + url = reverse("kolibri:core:membership-list") + data = [ + {"user": self.bulk_user1.id, "collection": self.classroom.id}, + {"user": self.bulk_user2.id, "collection": self.classroom.id}, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + self.assertEqual(len(response.data), 1) + self.assertEqual(response.data[0]["user"], self.bulk_user2.id) + + def test_bulk_create_learnergroup_membership_requires_classroom_membership(self): + self.login_superuser() + url = reverse("kolibri:core:membership-list") + data = [ + {"user": self.bulk_user3.id, "collection": self.lg.id}, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 400) + + def test_bulk_create_facility_membership_rejected(self): + self.login_superuser() + url = reverse("kolibri:core:membership-list") + data = [ + {"user": self.bulk_user1.id, "collection": self.facility.id}, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 400) + + def test_bulk_create_memberships_query_count_does_not_scale(self): + """Verify our serializer's validate/create use O(1) queries, not O(N).""" + self.login_superuser() + url = reverse("kolibri:core:membership-list") + + # Batch of 1 + user_a = FacilityUserFactory.create(facility=self.facility) + data_1 = [ + {"user": user_a.id, "collection": self.classroom.id}, + ] + with CaptureQueriesContext(connection) as ctx_1: + response = self.client.post(url, data_1, format="json") + self.assertEqual(response.status_code, 201) + + # Batch of 3 + user_b = FacilityUserFactory.create(facility=self.facility) + user_c = FacilityUserFactory.create(facility=self.facility) + user_d = FacilityUserFactory.create(facility=self.facility) + data_3 = [ + {"user": user_b.id, "collection": self.classroom.id}, + {"user": user_c.id, "collection": self.classroom.id}, + {"user": user_d.id, "collection": self.classroom.id}, + ] + with CaptureQueriesContext(connection) as ctx_3: + response = self.client.post(url, data_3, format="json") + self.assertEqual(response.status_code, 201) + + # DRF's PrimaryKeyRelatedField adds O(N) queries for field resolution + # and permissions/serialization add further per-item overhead, + # but our serializer's validate() and create() should use a fixed number. + # The key assertion is that query growth is linear (not quadratic). + actual_diff = len(ctx_3) - len(ctx_1) + extra_items = 2 # 3-item batch has 2 more items than 1-item batch + max_per_item_overhead = 10 + self.assertLessEqual(actual_diff, max_per_item_overhead * extra_items) + + def test_prepare_for_bulk_create_sets_morango_fields(self): + membership = models.Membership(user=self.bulk_user1, collection=self.classroom) + _prepare_for_bulk_create(membership) + self.assertEqual(len(membership.id), 32) + self.assertIsNotNone(membership._morango_partition) + self.assertIsNotNone(membership._morango_source_id) + self.assertTrue(membership._morango_dirty_bit) + class GroupMembership(APITestCase): databases = "__all__" @@ -2790,3 +2911,252 @@ def test_register_enqueues_automatic_sync( format="json", ) mock_enqueue_sync.assert_called_once_with(self.facility) + + +class RoleAPITestCase(APITestCase): + databases = "__all__" + + @classmethod + def setUpTestData(cls): + provision_device() + cls.facility = FacilityFactory.create() + cls.superuser = create_superuser(cls.facility) + cls.classroom = ClassroomFactory.create(parent=cls.facility) + cls.lg = LearnerGroupFactory.create(parent=cls.classroom) + cls.user1 = FacilityUserFactory.create(facility=cls.facility) + cls.user2 = FacilityUserFactory.create(facility=cls.facility) + + def setUp(self): + self.client.login( + username=self.superuser.username, + password=DUMMY_PASSWORD, + facility=self.facility, + ) + + def test_bulk_create_multiple_roles(self): + url = reverse("kolibri:core:role-list") + data = [ + { + "user": self.user1.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + { + "user": self.user2.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + self.assertEqual(len(response.data), 2) + self.assertTrue( + models.Role.objects.filter( + user=self.user1, collection=self.classroom, kind=role_kinds.COACH + ).exists() + ) + self.assertTrue( + models.Role.objects.filter( + user=self.user2, collection=self.classroom, kind=role_kinds.COACH + ).exists() + ) + + def test_bulk_create_duplicate_roles_idempotent(self): + models.Role.objects.create( + user=self.user1, collection=self.classroom, kind=role_kinds.COACH + ) + url = reverse("kolibri:core:role-list") + data = [ + { + "user": self.user1.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + { + "user": self.user2.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + # Only the new role should be in the response + self.assertEqual(len(response.data), 1) + self.assertEqual(response.data[0]["user"], self.user2.id) + + def test_bulk_create_classroom_coach_creates_assignable_coach_role(self): + url = reverse("kolibri:core:role-list") + data = [ + { + "user": self.user1.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + self.assertTrue( + models.Role.objects.filter( + user=self.user1, + collection=self.facility, + kind=role_kinds.ASSIGNABLE_COACH, + ).exists() + ) + + def test_bulk_create_classroom_coach_no_duplicate_assignable_coach(self): + # User already has a facility role + models.Role.objects.create( + user=self.user1, collection=self.facility, kind=role_kinds.ADMIN + ) + url = reverse("kolibri:core:role-list") + data = [ + { + "user": self.user1.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + self.assertFalse( + models.Role.objects.filter( + user=self.user1, + collection=self.facility, + kind=role_kinds.ASSIGNABLE_COACH, + ).exists() + ) + + def test_bulk_create_facility_admin_roles(self): + url = reverse("kolibri:core:role-list") + data = [ + { + "user": self.user1.id, + "collection": self.facility.id, + "kind": role_kinds.ADMIN, + }, + { + "user": self.user2.id, + "collection": self.facility.id, + "kind": role_kinds.ADMIN, + }, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + self.assertEqual(len(response.data), 2) + self.assertTrue( + models.Role.objects.filter( + user=self.user1, collection=self.facility, kind=role_kinds.ADMIN + ).exists() + ) + self.assertTrue( + models.Role.objects.filter( + user=self.user2, collection=self.facility, kind=role_kinds.ADMIN + ).exists() + ) + + def test_bulk_create_roles_have_valid_morango_fields(self): + url = reverse("kolibri:core:role-list") + data = [ + { + "user": self.user1.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + { + "user": self.user2.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 201) + for role_data in response.data: + role = models.Role.objects.get(id=role_data["id"]) + self.assertEqual(len(role.id), 32) + self.assertIsNotNone(role._morango_partition) + self.assertIsNotNone(role._morango_source_id) + self.assertTrue(role._morango_dirty_bit) + + def test_bulk_create_learnergroup_role_rejected(self): + url = reverse("kolibri:core:role-list") + data = [ + { + "user": self.user1.id, + "collection": self.lg.id, + "kind": role_kinds.COACH, + }, + ] + response = self.client.post(url, data, format="json") + self.assertEqual(response.status_code, 400) + + def test_bulk_create_query_count_does_not_scale(self): + """Verify our serializer's validate/create use O(1) queries, not O(N).""" + url = reverse("kolibri:core:role-list") + + # Batch of 1 + user_a = FacilityUserFactory.create(facility=self.facility) + data_1 = [ + { + "user": user_a.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + ] + with CaptureQueriesContext(connection) as ctx_1: + response = self.client.post(url, data_1, format="json") + self.assertEqual(response.status_code, 201) + + # Batch of 3 + user_b = FacilityUserFactory.create(facility=self.facility) + user_c = FacilityUserFactory.create(facility=self.facility) + user_d = FacilityUserFactory.create(facility=self.facility) + data_3 = [ + { + "user": user_b.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + { + "user": user_c.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + { + "user": user_d.id, + "collection": self.classroom.id, + "kind": role_kinds.COACH, + }, + ] + with CaptureQueriesContext(connection) as ctx_3: + response = self.client.post(url, data_3, format="json") + self.assertEqual(response.status_code, 201) + + # DRF's PrimaryKeyRelatedField adds O(N) queries for field resolution + # and permissions/serialization add further per-item overhead, + # but our serializer's validate() and create() should use a fixed number. + # The key assertion is that query growth is linear (not quadratic). + actual_diff = len(ctx_3) - len(ctx_1) + extra_items = 2 # 3-item batch has 2 more items than 1-item batch + max_per_item_overhead = 10 + self.assertLessEqual(actual_diff, max_per_item_overhead * extra_items) + + def test_prepare_for_bulk_create_sets_morango_fields(self): + role = models.Role( + user=self.user1, collection=self.classroom, kind=role_kinds.COACH + ) + _prepare_for_bulk_create(role) + self.assertEqual(len(role.id), 32) + self.assertIsNotNone(role._morango_partition) + self.assertIsNotNone(role._morango_source_id) + self.assertTrue(role._morango_dirty_bit) + + def test_prepare_for_bulk_create_uuid_is_deterministic(self): + role1 = models.Role( + user=self.user1, collection=self.classroom, kind=role_kinds.COACH + ) + role2 = models.Role( + user=self.user1, collection=self.classroom, kind=role_kinds.COACH + ) + _prepare_for_bulk_create(role1) + _prepare_for_bulk_create(role2) + self.assertEqual(role1.id, role2.id)