Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ Release Notes
Version 0.7.6dev: Unreleased
-------------------------------------------------------------------------------

FilterSeq:

+ Reset to using mean of Phred quality scores instead of mean of probabilities
of incorrect base call for the average quality score calculation. The latter
gave higher weight to bases with low qualities.

Version 0.7.5: July 28, 2025
-------------------------------------------------------------------------------

Expand Down
12 changes: 5 additions & 7 deletions presto/Sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from presto.Multiprocessing import SeqResult

# Constants
qual_to_prob = tuple(10 ** (-qual / 10) for qual in range(128))
# default_dna_matrix = getDNAScoreDict(mask_score=(0, 1), gap_score=(0, 0))
# default_aa_matrix = getAAScoreDict(mask_score=(0, 1), gap_score=(0, 0))

Expand Down Expand Up @@ -1278,13 +1277,13 @@ def filterRepeats(data, max_repeat=default_filter_max_repeat, include_missing=Fa

return result

def meanQuality(qual, prob=qual_to_prob):
def meanQuality(qual):
"""
Calculate mean quality score
Calculate mean of quality scores.
Note: this is desired over mean of probabilities as the mean of probabilities gives higher weight to bad quality scores.

Arguments:
qual (list): numeric Phred quality scores.
prob (list): mapping of Phred score (index) to probability values

Returns:
int: floor of the mean Phred quality score.
Expand All @@ -1294,10 +1293,9 @@ def meanQuality(qual, prob=qual_to_prob):
return 0

qual_sum = 0.0
for q in qual: qual_sum += prob[q]
p = qual_sum / len(qual)
for q in qual: qual_sum += q

return math.floor(-10 * math.log10(p))
return math.floor(qual_sum / len(qual))


def filterQuality(data, min_qual=default_consensus_min_qual, inner=True,
Expand Down
6 changes: 2 additions & 4 deletions tests/test_Sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,8 @@ def setUp(self):
# Mean quality
self.qual_mean = list()
for qual in qual_dna:
p = [10 ** (-q/10) for q in qual]
m = sum(p) / len(qual)
q = math.floor(-10 * math.log10(m))
self.qual_mean.append(q)
m = math.floor(sum(qual) / len(qual))
self.qual_mean.append(m)

# Make sequence pairs
self.seq_pairs = list(combinations(self.records_dna, 2))
Expand Down