diff --git a/src/python/handler/py_CT.cpp b/src/python/handler/py_CT.cpp index a15826e..88df7ef 100644 --- a/src/python/handler/py_CT.cpp +++ b/src/python/handler/py_CT.cpp @@ -58,9 +58,10 @@ py::UniqueObj CTModelObject::getCorrelations(PyObject* topicId) const py::UniqueObj CTModelObject::getPriorCov() const { auto* inst = getInst(); + auto cov = inst->getPriorCov(); + if (cov.empty()) return py::buildPyValue(nullptr); float* ptr; auto ret = py::newEmptyArray(ptr, inst->getK(), inst->getK()); - auto cov = inst->getPriorCov(); memcpy(ptr, cov.data(), sizeof(float) * inst->getK() * inst->getK()); return ret; } diff --git a/test/unit_test.py b/test/unit_test.py index 3544cd8..fde7aa4 100644 --- a/test/unit_test.py +++ b/test/unit_test.py @@ -4,6 +4,22 @@ curpath = os.path.dirname(os.path.realpath(__file__)) print(curpath) +def test_concat(): + tokenizer = tp.utils.SimpleTokenizer() + corpus = tp.utils.Corpus(tokenizer=tokenizer) + corpus.process([ + 'a b 0 d e', + 'a b 1 d e', + 'a b d e 2', + 'a b 3 d e', + 'a b 4 d e', + ]) + cands = corpus.extract_ngrams(min_cf=5, min_df=5, normalized=True, min_score=0.5) + print(cands) + corpus.concat_ngrams(cands) + for doc in corpus: + print(doc.words, doc.span) + model_cases = [ (tp.LDAModel, curpath + '/sample.txt', 0, None, {'k':40}, None), (tp.LLDAModel, curpath + '/sample_with_md.txt', 1, lambda x:x, {'k':5}, None), @@ -65,6 +81,23 @@ (tp.PTModel, curpath + '/sample.txt', 0, None, {'k':10, 'p':100}, [tp.ParallelScheme.PARTITION]), ] +def properties(cls, inputFile, mdFields, f, kargs, ps): + print('Test properties') + tw = 0 + print('Initialize model %s with TW=%s ...' % (str(cls), ['one', 'idf', 'pmi'][tw])) + mdl = cls(tw=tw, min_df=2, rm_top=2, **kargs) + all_attributes = [attr for attr in dir(mdl) if not attr.startswith('_')] + ignore_properties = {'CTModel.alpha', 'DTModel.eta'} + for attr in all_attributes: + if '{}.{}'.format(cls.__name__, attr) in ignore_properties: + print('Skipping property {}.{}'.format(cls.__name__, attr)) + continue + try: + print(attr, getattr(mdl, attr), sep=': ') + except Exception as e: + print('Error accessing attribute {}: {}'.format(attr, e)) + raise + def null_doc(cls, inputFile, mdFields, f, kargs, ps): tw = 0 print('Initialize model %s with TW=%s ...' % (str(cls), ['one', 'idf', 'pmi'][tw])) @@ -648,6 +681,13 @@ def test_purge_dead_topics(): mdl.train(100) print('Iteration: {}\tLog-likelihood: {}\tNum. of topics: {}\tNum. of tables: {}'.format(i, mdl.ll_per_word, mdl.live_k, mdl.num_tables)) +for model_case in model_cases: + pss = model_case[5] + if not pss: pss = [tp.ParallelScheme.DEFAULT] + for ps in pss[:1]: + for func in [properties]: + locals()['test_{}_{}_{}'.format(model_case[0].__name__, func.__name__, ps.name)] = (lambda f, mc, ps: lambda: f(*(mc + (ps,))))(func, model_case[:-1], ps) + for model_case in model_cases: pss = model_case[5] if not pss: pss = [tp.ParallelScheme.COPY_MERGE, tp.ParallelScheme.PARTITION] diff --git a/tomotopy/models.py b/tomotopy/models.py index b1cbdcb..4f1115a 100644 --- a/tomotopy/models.py +++ b/tomotopy/models.py @@ -456,9 +456,9 @@ def set_word_prior(self, word, prior) -> None: @classmethod def _summary_extract_param_desc(cls:type): - doc_string = cls.__doc__ or cls.__init__.__doc__ + doc_string = cls.__init__.__doc__ if not doc_string: return {} - ps = doc_string.split('\nParameters\n')[1].split('\n') + ps = doc_string.split('Parameters\n')[1].split('\n') param_name = re.compile(r'^([a-zA-Z0-9_]+)\s*:\s*') directive = re.compile(r'^\s*\.\.') descriptive = re.compile(r'\s+([^\s].*)') @@ -503,7 +503,10 @@ def _summary_training_info(self, file): print('| Log-likelihood per word: {:.5f}'.format(self.ll_per_word), file=file) def _summary_initial_params_info(self, file): - param_desc = self._summary_extract_param_desc() + try: + param_desc = self._summary_extract_param_desc() + except: + param_desc = {} if hasattr(self, 'init_params'): for k, v in self.init_params.items(): if type(v) is float: fmt = ':.5' @@ -1390,7 +1393,7 @@ def get_topic_word_dist(self, topic_id, normalize=True) -> List[float]: @property def k_g(self) -> int: '''the hyperparameter k_g (read-only)''' - return self._k_g + return self._k @property def k_l(self) -> int: diff --git a/tomotopy/viewer/template.html b/tomotopy/viewer/template.html index da002e6..891ef97 100644 --- a/tomotopy/viewer/template.html +++ b/tomotopy/viewer/template.html @@ -444,17 +444,46 @@

{{get_topic_label(topic, prefix="Topic ", id_suffix=True)}}: {{", ".j var category_labels = {{categorical_metadata}};