Skip to content

Commit 0f90a7f

Browse files
feat: Count implicit tags (#133)
This updates our "get tag counts" API so that the tag counts can include the implicit, ancestor tags.
1 parent 1d9c459 commit 0f90a7f

5 files changed

Lines changed: 112 additions & 10 deletions

File tree

openedx_tagging/core/tagging/api.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
from .data import TagDataQuerySet
2323
from .models import ObjectTag, Tag, Taxonomy
24-
from .models.utils import ConcatNull
24+
from .models.utils import ConcatNull, StringAgg
2525

2626
# Export this as part of the API
2727
TagDoesNotExist = Tag.DoesNotExist
@@ -196,7 +196,7 @@ def get_object_tags(
196196
return tags
197197

198198

199-
def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]:
199+
def get_object_tag_counts(object_id_pattern: str, count_implicit=False) -> dict[str, int]:
200200
"""
201201
Given an object ID, a "starts with" glob pattern like
202202
"course-v1:foo+bar+baz@*", or a list of "comma,separated,IDs", return a
@@ -217,8 +217,36 @@ def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]:
217217
qs = qs.exclude(taxonomy_id=None) # The whole taxonomy was deleted
218218
qs = qs.exclude(taxonomy__enabled=False) # The whole taxonomy is disabled
219219
qs = qs.exclude(tag_id=None, taxonomy__allow_free_text=False) # The taxonomy exists but the tag is deleted
220-
qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
221-
return {row["object_id"]: row["num_tags"] for row in qs}
220+
if count_implicit:
221+
# Counting the implicit tags is tricky, because if two "grandchild" tags have the same implicit parent tag, we
222+
# need to count that parent tag only once. To do that, we collect all the ancestor tag IDs into an aggregate
223+
# string, and then count the unique values using python
224+
qs = qs.values("object_id").annotate(
225+
num_tags=models.Count("id"),
226+
tag_ids_str_1=StringAgg("tag_id"),
227+
tag_ids_str_2=StringAgg("tag__parent_id"),
228+
tag_ids_str_3=StringAgg("tag__parent__parent_id"),
229+
tag_ids_str_4=StringAgg("tag__parent__parent__parent_id"),
230+
).order_by("object_id")
231+
result = {}
232+
for row in qs:
233+
# ObjectTags for free text taxonomies will be included in "num_tags" count, but not "tag_ids_str_1" since
234+
# they have no tag ID. We can compute how many free text tags each object has now:
235+
if row["tag_ids_str_1"]:
236+
num_free_text_tags = row["num_tags"] - len(row["tag_ids_str_1"].split(","))
237+
else:
238+
num_free_text_tags = row["num_tags"]
239+
# Then we count the total number of *unique* Tags for this object, both implicit and explicit:
240+
other_tag_ids = set()
241+
for field in ("tag_ids_str_1", "tag_ids_str_2", "tag_ids_str_3", "tag_ids_str_4"):
242+
if row[field] is not None:
243+
for tag_id in row[field].split(","):
244+
other_tag_ids.add(int(tag_id))
245+
result[row["object_id"]] = num_free_text_tags + len(other_tag_ids)
246+
return result
247+
else:
248+
qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
249+
return {row["object_id"]: row["num_tags"] for row in qs}
222250

223251

224252
def delete_object_tags(object_id: str):

openedx_tagging/core/tagging/models/utils.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Utilities for tagging and taxonomy models
33
"""
4-
4+
from django.db.models import Aggregate, CharField
55
from django.db.models.expressions import Func
66

77

@@ -22,3 +22,23 @@ def as_sqlite(self, compiler, connection, **extra_context):
2222
arg_joiner=" || ",
2323
**extra_context,
2424
)
25+
26+
27+
class StringAgg(Aggregate): # pylint: disable=abstract-method
28+
"""
29+
Aggregate function that collects the values of some column across all rows,
30+
and creates a string by concatenating those values, with "," as a separator.
31+
32+
This is the same as Django's django.contrib.postgres.aggregates.StringAgg,
33+
but this version works with MySQL and SQLite.
34+
"""
35+
function = 'GROUP_CONCAT'
36+
template = '%(function)s(%(distinct)s%(expressions)s)'
37+
38+
def __init__(self, expression, distinct=False, **extra):
39+
super().__init__(
40+
expression,
41+
distinct='DISTINCT ' if distinct else '',
42+
output_field=CharField(),
43+
**extra,
44+
)

openedx_tagging/core/tagging/rest_api/v1/views.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,11 @@ class ObjectTagCountsView(
532532
**Retrieve Parameters**
533533
* object_id_pattern (required): - The Object ID to retrieve ObjectTags for. Can contain '*' at the end
534534
for wildcard matching, or use ',' to separate multiple object IDs.
535+
* count_implicit (optional): If present, implicit parent/grandparent tags will be included in the counts
535536
536537
**Retrieve Example Requests**
537538
GET api/tagging/v1/object_tag_counts/:object_id_pattern
539+
GET api/tagging/v1/object_tag_counts/:object_id_pattern?count_implicit
538540
539541
**Retrieve Query Returns**
540542
* 200 - Success
@@ -553,8 +555,9 @@ def retrieve(self, request, *args, **kwargs) -> Response:
553555
"""
554556
# This API does NOT bother doing any permission checks as the # of tags is not considered sensitive information.
555557
object_id_pattern = self.kwargs["object_id_pattern"]
558+
count_implicit = "count_implicit" in request.query_params
556559
try:
557-
return Response(get_object_tag_counts(object_id_pattern))
560+
return Response(get_object_tag_counts(object_id_pattern, count_implicit=count_implicit))
558561
except ValueError as err:
559562
raise ValidationError(err.args[0]) from err
560563

tests/openedx_tagging/core/tagging/test_api.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,36 @@ def test_get_object_tag_counts(self) -> None:
704704
assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}") == {obj1: 1, obj2: 2}
705705
assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2}
706706

707+
def test_get_object_tag_counts_implicit(self) -> None:
708+
"""
709+
Basic test of get_object_tag_counts, including implicit (parent) tags
710+
711+
Note that:
712+
- "DPANN" is "Archaea > DPANN" (2 tags, 1 implicit), and
713+
- "Chordata" is "Eukaryota > Animalia > Chordata" (3 tags, 2 implicit)
714+
- "Arthropoda" is "Eukaryota > Animalia > Arthropoda" (same)
715+
"""
716+
self.taxonomy.allow_multiple = True
717+
self.taxonomy.save()
718+
obj1, obj2, obj3 = "object_id1", "object_id2", "object_id3"
719+
other = "other_object"
720+
# Give each object 1-2 tags:
721+
tagging_api.tag_object(object_id=obj1, taxonomy=self.taxonomy, tags=["DPANN"])
722+
tagging_api.tag_object(object_id=obj2, taxonomy=self.taxonomy, tags=["Chordata"])
723+
tagging_api.tag_object(object_id=obj2, taxonomy=self.free_text_taxonomy, tags=["has a notochord"])
724+
tagging_api.tag_object(object_id=obj3, taxonomy=self.taxonomy, tags=["Chordata", "Arthropoda"])
725+
tagging_api.tag_object(object_id=other, taxonomy=self.free_text_taxonomy, tags=["other"])
726+
727+
assert tagging_api.get_object_tag_counts(obj1, count_implicit=True) == {obj1: 2}
728+
assert tagging_api.get_object_tag_counts(obj2, count_implicit=True) == {obj2: 4}
729+
assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}", count_implicit=True) == {obj1: 2, obj2: 4}
730+
assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == {
731+
obj1: 2,
732+
obj2: 4,
733+
obj3: 4, # obj3 has 2 explicit tags and 2 implicit tags (not 4 because the implicit tags are the same)
734+
}
735+
assert tagging_api.get_object_tag_counts(other, count_implicit=True) == {other: 1}
736+
707737
def test_get_object_tag_counts_deleted_disabled(self) -> None:
708738
"""
709739
Test that get_object_tag_counts doesn't "count" disabled taxonomies or
@@ -726,6 +756,9 @@ def test_get_object_tag_counts_deleted_disabled(self) -> None:
726756
self.free_text_taxonomy.enabled = False
727757
self.free_text_taxonomy.save()
728758
assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 1}
759+
# Also check the result with count_implicit:
760+
# "English" has no implicit tags but "Chordata" has two, so we expect these totals:
761+
assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == {obj1: 1, obj2: 3}
729762

730763
# But, by the way, if we re-enable the taxonomy and restore the tag, the counts return:
731764
self.free_text_taxonomy.enabled = True

tests/openedx_tagging/core/tagging/test_views.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,10 +1029,14 @@ def test_get_counts(self):
10291029
# Course 7 Unit 2
10301030
api.tag_object(object_id="course07-unit02-problem01", taxonomy=self.free_text_taxonomy, tags=["b"])
10311031
api.tag_object(object_id="course07-unit02-problem02", taxonomy=self.free_text_taxonomy, tags=["c", "d"])
1032-
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M", "x"])
1033-
1034-
def check(object_id_pattern: str):
1035-
result = self.client.get(OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern))
1032+
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M"])
1033+
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.taxonomy, tags=["Mammalia"])
1034+
1035+
def check(object_id_pattern: str, count_implicit=False):
1036+
url = OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern)
1037+
if count_implicit:
1038+
url += "?count_implicit"
1039+
result = self.client.get(url)
10361040
assert result.status_code == status.HTTP_200_OK
10371041
return result.data
10381042

@@ -1045,6 +1049,20 @@ def check(object_id_pattern: str):
10451049
"course07-unit01-problem01": 3,
10461050
"course07-unit01-problem02": 2,
10471051
}
1052+
with self.assertNumQueries(1):
1053+
assert check(object_id_pattern="course07-unit02-*") == {
1054+
"course07-unit02-problem01": 1,
1055+
"course07-unit02-problem02": 2,
1056+
"course07-unit02-problem03": 3,
1057+
}
1058+
with self.assertNumQueries(1):
1059+
assert check(object_id_pattern="course07-unit02-*", count_implicit=True) == {
1060+
"course07-unit02-problem01": 1,
1061+
"course07-unit02-problem02": 2,
1062+
# "Mammalia" includes 1 explicit + 3 implicit tags: "Eukaryota > Animalia > Chordata > Mammalia"
1063+
# so problem03 has 2 free text tags and "4" life on earth tags:
1064+
"course07-unit02-problem03": 6,
1065+
}
10481066
with self.assertNumQueries(1):
10491067
assert check(object_id_pattern="course07-unit*") == {
10501068
"course07-unit01-problem01": 3,

0 commit comments

Comments
 (0)