Skip to content

Commit 75320a7

Browse files
committed
working on quarterly measurements
Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>
1 parent 521c43c commit 75320a7

File tree

2 files changed

+36
-4
lines changed

2 files changed

+36
-4
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
s सेकंड
2+
hr घंटा
3+
h घंटे
4+
min मिनट
5+
doz दर्जन
6+
yr साल
7+
yr वर्ष
8+
hp हॉर्सपॉवर
9+
d दिन
10+
month महीना
11+
months महीने
12+
हफ़्ते हफ़्ते

nemo_text_processing/text_normalization/hi/taggers/measure.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,24 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
5353
decimal_integers = pynutil.insert("integer_part: \"") + cardinal_graph + pynutil.insert("\"")
5454
decimal_graph = decimal_integers + point + insert_space + decimal.graph_fractional
5555
unit_graph = pynini.string_file(get_abs_path("data/measure/unit.tsv"))
56+
quarterly_units_graph = pynini.string_file(get_abs_path("data/measure/quarterly_units.tsv"))
5657

5758
optional_graph_negative = pynini.closure(
5859
pynutil.insert("negative: ") + pynini.cross("-", "\"true\"") + insert_space, 0, 1,
5960
)
6061

62+
# Define the quarterly measurements
63+
quarter = pynini.string_map([(".५", "साढ़े"), ("१.५", "डेढ़"), ("२.५", "ढाई"),])
64+
quarter_graph = pynutil.insert("integer_part: \"") + quarter + pynutil.insert("\" ")
65+
6166
# Define the unit handling
6267
unit = pynutil.insert("units: \"") + unit_graph + pynutil.insert("\" ")
68+
units = pynutil.insert(" units: \"") + quarterly_units_graph + pynutil.insert("\" ")
6369

6470
# Handling symbols like x, X, *
6571
symbol_graph = pynini.string_map([("x", "बाई"), ("X", "बाई"), ("*", "बाई"),])
6672

67-
graph_measurements = (
73+
graph_decimal = (
6874
pynutil.insert("decimal { ")
6975
+ optional_graph_negative
7076
+ decimal_graph
@@ -73,7 +79,16 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
7379
+ unit
7480
)
7581

76-
graph_measurements |= (
82+
graph_quarter = (
83+
pynutil.insert("decimal { ")
84+
+ optional_graph_negative
85+
+ quarter_graph
86+
+ pynutil.insert(" }")
87+
+ delete_space
88+
+ units
89+
)
90+
91+
graph_cardinal = (
7792
pynutil.insert("cardinal { ")
7893
+ optional_graph_negative
7994
+ pynutil.insert("integer: \"")
@@ -85,7 +100,7 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
85100
)
86101

87102
# Handling cardinal clubbed with symbol as single token
88-
graph_measurements |= (
103+
graph_exceptions = (
89104
pynutil.insert("cardinal { ")
90105
+ optional_graph_negative
91106
+ pynutil.insert("integer: \"")
@@ -104,7 +119,12 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
104119
+ pynutil.insert("\"")
105120
)
106121

107-
graph = graph_measurements
122+
graph = (
123+
pynutil.add_weight(graph_decimal, 0.01)
124+
| pynutil.add_weight(graph_quarter, 0.001)
125+
| pynutil.add_weight(graph_cardinal, 0.01)
126+
| pynutil.add_weight(graph_exceptions, 0.01)
127+
)
108128
self.graph = graph.optimize()
109129

110130
final_graph = self.add_tokens(graph)

0 commit comments

Comments
 (0)