1212import jasperpath
1313import yaml
1414
15+
1516class PhonetisaurusG2P (object ):
16- PATTERN = re .compile (r'^(?P<word>.+)\t(?P<precision>\d+\.\d+)\t<s> (?P<pronounciation>.*) </s>' , re .MULTILINE )
17+ PATTERN = re .compile (r'^(?P<word>.+)\t(?P<precision>\d+\.\d+)\t<s> ' +
18+ r'(?P<pronounciation>.*) </s>' , re .MULTILINE )
1719
1820 @classmethod
1921 def executable_found (cls ):
@@ -33,75 +35,81 @@ def execute(cls, fst_model, input, is_file=False, nbest=None):
3335 '--model=%s' % fst_model ,
3436 '--input=%s' % input ,
3537 '--words' ]
36-
38+
3739 if is_file :
3840 cmd .append ('--isfile' )
3941
4042 if nbest is not None :
4143 cmd .extend (['--nbest=%d' % nbest ])
4244
4345 cmd = [str (x ) for x in cmd ]
44- with tempfile .SpooledTemporaryFile () as err_f :
45- try :
46- # FIXME: We can't just use subprocess.call and redirect stdout
47- # and stderr, because it looks like Phonetisaurus can't open
48- # an already opened file descriptor a second time. This is why
49- # we have to use this somehow hacky subprocess.Popen approach.
50- proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
51- stdoutdata , stderrdata = proc .communicate ()
52- returncode = proc .returncode
53- if returncode != 0 :
54- logger .warning ("Command '%s' return with exit status %d" , ' ' .join (cmd ), returncode )
55- except OSError :
56- logger .error ("Error occured while executing command '%s'" , ' ' .join (cmd ), exc_info = True )
57- stdoutdata , stderrdata = None , None
58- if stderrdata is not None :
59- for line in stderrdata .splitlines ():
60- message = line .strip ()
61- if message :
62- logger .debug (message )
46+ try :
47+ # FIXME: We can't just use subprocess.call and redirect stdout
48+ # and stderr, because it looks like Phonetisaurus can't open
49+ # an already opened file descriptor a second time. This is why
50+ # we have to use this somehow hacky subprocess.Popen approach.
51+ proc = subprocess .Popen (cmd , stdout = subprocess .PIPE ,
52+ stderr = subprocess .PIPE )
53+ stdoutdata , stderrdata = proc .communicate ()
54+ returncode = proc .returncode
55+ if returncode != 0 :
56+ logger .warning ("Command '%s' return with exit status %d" ,
57+ ' ' .join (cmd ), returncode )
58+ except OSError :
59+ logger .error ("Error occured while executing command '%s'" ,
60+ ' ' .join (cmd ), exc_info = True )
61+ stdoutdata , stderrdata = None , None
62+ if stderrdata is not None :
63+ for line in stderrdata .splitlines ():
64+ message = line .strip ()
65+ if message :
66+ logger .debug (message )
6367
6468 result = {}
6569 if stdoutdata is not None :
66- for word , precision , pronounciation in cls .PATTERN .findall (stdoutdata ):
70+ for word , precision , pron in cls .PATTERN .findall (stdoutdata ):
6771 if word not in result :
6872 result [word ] = []
69- result [word ].append (pronounciation )
73+ result [word ].append (pron )
7074 return result
7175
7276 @classmethod
7377 def get_config (cls ):
7478 # FIXME: Replace this as soon as pull request
7579 # jasperproject/jasper-client#128 has been merged
76-
77- conf = {'fst_model' : os .path .join (jasperpath .APP_PATH , os .pardir , 'phonetisaurus' , 'g014b2b.fst' )}
80+
81+ conf = {'fst_model' : os .path .join (jasperpath .APP_PATH , os .pardir ,
82+ 'phonetisaurus' , 'g014b2b.fst' )}
7883 # Try to get fst_model from config
7984 profile_path = os .path .join (os .path .dirname (__file__ ), 'profile.yml' )
80- if os .path .exists (profile_path ):
81- with open (profile_path , 'r' ) as f :
82- profile = yaml .safe_load (f )
83- if 'pocketsphinx' in profile :
84- if 'fst_model' in profile ['pocketsphinx' ]:
85- conf ['fst_model' ] = profile ['pocketsphinx' ]['fst_model' ]
86- if 'nbest' in profile ['pocketsphinx' ]:
87- conf ['nbest' ] = int (profile ['pocketsphinx' ]['nbest' ])
85+ if not os .access (profile_path , os .R_OK ):
86+ return conf
87+ with open (profile_path , 'r' ) as f :
88+ profile = yaml .safe_load (f )
89+ if 'pocketsphinx' in profile :
90+ if 'fst_model' in profile ['pocketsphinx' ]:
91+ conf ['fst_model' ] = profile ['pocketsphinx' ]['fst_model' ]
92+ if 'nbest' in profile ['pocketsphinx' ]:
93+ conf ['nbest' ] = int (profile ['pocketsphinx' ]['nbest' ])
8894 return conf
8995
90-
91- def __new__ (cls , fst_model = None , * args , ** kwargs ):
96+ def __new__ (cls , fst_model = None , * args , ** kwargs ):
9297 if not cls .executable_found ():
93- raise OSError ("Can't find command 'phonetisaurus-g2p'! Please check if Phonetisaurus is installed and in your $PATH." )
98+ raise OSError ("Can't find command 'phonetisaurus-g2p'! Please " +
99+ "check if Phonetisaurus is installed and in your " +
100+ "$PATH." )
94101 if fst_model is None or not os .access (fst_model , os .R_OK ):
95- raise OSError ("FST model '%r' does not exist! Can't create instance." % fst_model )
102+ raise OSError (("FST model '%r' does not exist! Can't create " +
103+ "instance." ) % fst_model )
96104 inst = object .__new__ (cls , fst_model , * args , ** kwargs )
97105 return inst
98106
99107 def __init__ (self , fst_model = None , nbest = None ):
100108 self ._logger = logging .getLogger (__name__ )
101-
109+
102110 self .fst_model = os .path .abspath (fst_model )
103111 self ._logger .debug ("Using FST model: '%s'" , self .fst_model )
104-
112+
105113 self .nbest = nbest
106114 if self .nbest is not None :
107115 self ._logger .debug ("Will use the %d best results." , self .nbest )
@@ -117,37 +125,42 @@ def _translate_words(self, words):
117125 for word in words :
118126 f .write ("%s\n " % word )
119127 tmp_fname = f .name
120- output = self .execute (self .fst_model , tmp_fname , is_file = True , nbest = self .nbest )
128+ output = self .execute (self .fst_model , tmp_fname , is_file = True ,
129+ nbest = self .nbest )
121130 os .remove (tmp_fname )
122131 return output
123132
124133 def translate (self , words ):
125134 if type (words ) is str or len (words ) == 1 :
126135 self ._logger .debug ('Converting single word to phonemes' )
127- output = self ._translate_word (words if type (words ) is str else words [0 ])
136+ output = self ._translate_word (words if type (words ) is str else
137+ words [0 ])
128138 else :
129139 self ._logger .debug ('Converting %d words to phonemes' , len (words ))
130140 output = self ._translate_words (words )
131- self ._logger .debug ('G2P conversion returned phonemes for %d words' , len (output ))
141+ self ._logger .debug ('G2P conversion returned phonemes for %d words' ,
142+ len (output ))
132143 return output
133144
134145if __name__ == "__main__" :
135146 import pprint
136147 import argparse
137148 parser = argparse .ArgumentParser (description = 'Phonetisaurus G2P module' )
138- parser .add_argument ('fst_model' , action = 'store' , help = 'Path to the FST Model' )
139- parser .add_argument ('--debug' , action = 'store_true' , help = 'Show debug messages' )
149+ parser .add_argument ('fst_model' , action = 'store' ,
150+ help = 'Path to the FST Model' )
151+ parser .add_argument ('--debug' , action = 'store_true' ,
152+ help = 'Show debug messages' )
140153 args = parser .parse_args ()
141154
142155 logging .basicConfig ()
143156 logger = logging .getLogger ()
144157 if args .debug :
145158 logger .setLevel (logging .DEBUG )
146159
147- words = ['THIS' ,'IS' , 'A' , 'TEST' ]
160+ words = ['THIS' , 'IS' , 'A' , 'TEST' ]
148161
149162 g2pconv = PhonetisaurusG2P (args .fst_model , nbest = 3 )
150163 output = g2pconv .translate (words )
151164
152165 pp = pprint .PrettyPrinter (indent = 2 )
153- pp .pprint (output )
166+ pp .pprint (output )
0 commit comments