Skip to content

Commit 18bcd11

Browse files
Witikompenkov
authored andcommitted
Make termsim matrix positive definite even with negative similarities (#2397)
* Make termsim matrix positive definite even with negative similarities * Test that termsim matrix with negative similarities is positive definite * Use `or` instead of `{...}` for documenting a union of types
1 parent ce0af20 commit 18bcd11

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

gensim/similarities/termsim.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class SparseTermSimilarityMatrix(SaveLoad):
168168
strict column diagonal dominance. Positive definiteness is a necessary precondition if you
169169
later wish to derive a change-of-basis matrix from the term similarity matrix using Cholesky
170170
factorization.
171-
nonzero_limit : {int, None}, optional
171+
nonzero_limit : int or None, optional
172172
The maximum number of non-zero elements outside the diagonal in a single column of the
173173
sparse term similarity matrix. If None, then no limit will be imposed.
174174
dtype : numpy.dtype, optional
@@ -242,11 +242,11 @@ def __init__(self, source, dictionary=None, tfidf=None, symmetric=True, positive
242242
key=lambda x: (lambda term_index, _: (tfidf.idfs[term_index], -term_index))(*x), reverse=True)
243243

244244
for row_number, (t2_index, similarity) in zip(range(num_rows), rows):
245-
if positive_definite and column_sum[t1_index] + similarity >= 1.0:
245+
if positive_definite and column_sum[t1_index] + abs(similarity) >= 1.0:
246246
break
247247
if symmetric:
248248
if column_nonzero[t2_index] <= nonzero_limit \
249-
and (not positive_definite or column_sum[t2_index] + similarity < 1.0) \
249+
and (not positive_definite or column_sum[t2_index] + abs(similarity) < 1.0) \
250250
and not (t1_index, t2_index) in matrix:
251251
matrix[t1_index, t2_index] = similarity
252252
column_nonzero[t1_index] += 1

gensim/test/test_similarities.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -800,21 +800,22 @@ def test_symmetric(self):
800800

801801
def test_positive_definite(self):
802802
"""Test the positive_definite parameter of the matrix constructor."""
803+
negative_index = UniformTermSimilarityIndex(self.dictionary, term_similarity=-0.5)
803804
matrix = SparseTermSimilarityMatrix(
804-
self.index, self.dictionary, nonzero_limit=2).matrix.todense()
805+
negative_index, self.dictionary, nonzero_limit=2).matrix.todense()
805806
expected_matrix = numpy.array([
806-
[1.0, 0.5, 0.5, 0.0, 0.0],
807-
[0.5, 1.0, 0.0, 0.5, 0.0],
808-
[0.5, 0.0, 1.0, 0.0, 0.0],
809-
[0.0, 0.5, 0.0, 1.0, 0.0],
807+
[1.0, -.5, -.5, 0.0, 0.0],
808+
[-.5, 1.0, 0.0, -.5, 0.0],
809+
[-.5, 0.0, 1.0, 0.0, 0.0],
810+
[0.0, -.5, 0.0, 1.0, 0.0],
810811
[0.0, 0.0, 0.0, 0.0, 1.0]])
811812
self.assertTrue(numpy.all(expected_matrix == matrix))
812813

813814
matrix = SparseTermSimilarityMatrix(
814-
self.index, self.dictionary, nonzero_limit=2, positive_definite=True).matrix.todense()
815+
negative_index, self.dictionary, nonzero_limit=2, positive_definite=True).matrix.todense()
815816
expected_matrix = numpy.array([
816-
[1.0, 0.5, 0.0, 0.0, 0.0],
817-
[0.5, 1.0, 0.0, 0.0, 0.0],
817+
[1.0, -.5, 0.0, 0.0, 0.0],
818+
[-.5, 1.0, 0.0, 0.0, 0.0],
818819
[0.0, 0.0, 1.0, 0.0, 0.0],
819820
[0.0, 0.0, 0.0, 1.0, 0.0],
820821
[0.0, 0.0, 0.0, 0.0, 1.0]])

0 commit comments

Comments
 (0)