Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/Python/doc/source/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Function Reference
******************

.. py:class:: Somoclu(n_columns, n_rows, initialcodebook=None, kerneltype=0, maptype='planar', gridtype='rectangular', compactsupport=False, neighborhood='gaussian', std_coeff=0.5, initialization=None)
.. py:class:: Somoclu(n_columns, n_rows, initialcodebook=None, kerneltype=0, maptype='planar', gridtype='rectangular', compactsupport=False, neighborhood='gaussian', vect_distance='euclidean', std_coeff=0.5, initialization=None)

Class for training and visualizing a self-organizing map.

Expand Down Expand Up @@ -40,6 +40,11 @@ Function Reference

* "gaussian": Gaussian neighborhood (default)
* "bubble": bubble neighborhood function
:param vect_distance: Optional parameter to specify the vector distance:

* "euclidean": Euclidean distance (default)
* "norm-inf": Infinity-norm distance (max among components)
* "norm-p": p-norm (p-th root of summed differences raised to the power of p), works only if kerneltype is 0
:type neighborhood: str.
:param std_coeff: Optional parameter to set the coefficient in the Gaussian
neighborhood function exp(-||x-y||^2/(2*(coeff*radius)^2))
Expand Down
2 changes: 1 addition & 1 deletion src/Python/somoclu/somoclu.i
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ void train(float *data, int data_length,
float std_coeff, unsigned int verbose,
float* codebook, int codebook_size,
int* globalBmus, int globalBmus_size,
float* uMatrix, int uMatrix_size);
float* uMatrix, int uMatrix_size, string vect_distance);
25 changes: 23 additions & 2 deletions src/Python/somoclu/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@
"when you are trying to import it. Please refer to the "
"documentation to see your options.")

def is_pos_real(s):
""" Returns True if s is a positive real.
"""
try:
return (float(s) > 0)
except ValueError:
return False

class Somoclu(object):
"""Class for training and visualizing a self-organizing map.
Expand Down Expand Up @@ -82,6 +89,12 @@ class Somoclu(object):
* "gaussian": Gaussian neighborhood (default)
* "bubble": bubble neighborhood function
:type neighborhood: str.
:param vect_distance: Optional parameter to specify the vector distance function:

* "euclidean": Euclidean (default)
* "norm-inf": infinite norm (max absolute distance among components)
* "norm-p": p-th root of sum of absolute differences ^ p (only supported by kerneltype 0)
:type vect_distance: str.
:param std_coeff: Optional parameter to set the coefficient in the Gaussian
neighborhood function exp(-||x-y||^2/(2*(coeff*radius)^2))
Default: 0.5
Expand All @@ -100,7 +113,7 @@ class Somoclu(object):
def __init__(self, n_columns, n_rows, initialcodebook=None,
kerneltype=0, maptype="planar", gridtype="rectangular",
compactsupport=True, neighborhood="gaussian", std_coeff=0.5,
initialization=None, data=None, verbose=0):
initialization=None, data=None, verbose=0, vect_distance="euclidean"):
"""Constructor for the class.
"""
self._n_columns, self._n_rows = n_columns, n_rows
Expand All @@ -109,6 +122,7 @@ def __init__(self, n_columns, n_rows, initialcodebook=None,
self._grid_type = gridtype
self._compact_support = compactsupport
self._neighborhood = neighborhood
self._vect_distance = vect_distance
self._std_coeff = std_coeff
self._verbose = verbose
self._check_parameters()
Expand Down Expand Up @@ -225,7 +239,7 @@ def train(self, data=None, epochs=10, radius0=0, radiusN=1,
self._kernel_type, self._map_type, self._grid_type,
self._compact_support, self._neighborhood == "gaussian",
self._std_coeff, self._verbose, self.codebook, self.bmus,
self.umatrix)
self.umatrix, self._vect_distance)
self.umatrix.shape = (self._n_rows, self._n_columns)
self.bmus.shape = (self.n_vectors, 2)
self.codebook.shape = (self._n_rows, self._n_columns, self.n_dim)
Expand Down Expand Up @@ -475,6 +489,13 @@ def _check_parameters(self):
if self._neighborhood != "gaussian" and self._neighborhood != "bubble":
raise Exception("Invalid parameter for neighborhood: " +
self._neighborhood)
if not (self._vect_distance == "euclidean" or self._vect_distance == "norm-inf"
or (self._vect_distance[:5] == "norm-" and is_pos_real(self._vect_distance[5:]))):
raise Exception("Invalid parameter for vect_distance: " +
self._vect_distance)
if (self._vect_distance[:5] == "norm-" and self._kernel_type != 0):
raise Exception("Invalid parameter for vect_distance: " +
self._vect_distance + " when using kernel_type: " + self._kernel_type)
if self._kernel_type != 0 and self._kernel_type != 1:
raise Exception("Invalid parameter for kernelTye: " +
self._kernel_type)
Expand Down
4 changes: 2 additions & 2 deletions src/R/R/Rsomoclu.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Rsomoclu.train <-
scaleCooling,
kernelType=0, mapType="planar", gridType="rectangular",
compactSupport=TRUE, neighborhood="gaussian", stdCoeff=0.5,
codebook=NULL) {
codebook=NULL, vectDistance="euclidean") {
if (is.null(codebook)) {
codebook <- matrix(data = 0, nrow = nSomX * nSomY, ncol = dim(input_data)[2])
codebook[1, 1] <- 1000
Expand All @@ -21,7 +21,7 @@ Rsomoclu.train <-
radiusCooling, scale0, scaleN,
scaleCooling, kernelType, mapType,
gridType, compactSupport, neighborhood, stdCoeff,
codebook)
codebook, vectDistance)
res
}

Expand Down
6 changes: 4 additions & 2 deletions src/R/src/Rsomoclu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ RcppExport SEXP Rtrain(SEXP data_p,
SEXP gridType_p, SEXP compactSupport_p,
SEXP neighborhood_p,
SEXP stdCoeff_p,
SEXP codebook_p) {
SEXP codebook_p,
SEXP vectDistance_p) {
Rcpp::NumericMatrix dataMatrix(data_p);
Rcpp::NumericMatrix codebookMatrix(codebook_p);
int nVectors = dataMatrix.rows();
Expand All @@ -36,6 +37,7 @@ RcppExport SEXP Rtrain(SEXP data_p,
bool compactSupport = as<bool>(compactSupport_p);
string mapType = as<string>(mapType_p);
string gridType = as<string>(gridType_p);
string vectDistance = as<string>(vectDistance_p);
string neighborhood = as<string>(neighborhood_p);
int data_length = nVectors * nDimensions;
float* data = new float[data_length];
Expand Down Expand Up @@ -63,7 +65,7 @@ RcppExport SEXP Rtrain(SEXP data_p,
gridType, compactSupport, neighborhood == "gaussian",
std_coeff, 0,
codebook, codebook_size, globalBmus, globalBmus_size,
uMatrix, uMatrix_size);
uMatrix, uMatrix_size, vectDistance);
Rcpp::NumericMatrix globalBmusMatrix(nVectors, 2);
Rcpp::NumericMatrix uMatrixMatrix(nSomX, nSomY);
if(codebook != NULL) {
Expand Down
4 changes: 2 additions & 2 deletions src/R/src/Rsomoclu_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
*/

/* .Call calls */
extern SEXP Rtrain(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP Rtrain(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);

static const R_CallMethodDef CallEntries[] = {
{"Rtrain", (DL_FUNC) &Rtrain, 17},
{"Rtrain", (DL_FUNC) &Rtrain, 18},
{NULL, NULL, 0}
};

Expand Down
20 changes: 20 additions & 0 deletions src/denseCpuKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@ float EuclideanDistance::operator()(float* vec1, float* vec2) const{
return sqrt(distance);
}

float NormPDistance::operator()(float* vec1, float* vec2) const{
unsigned int nDimensions = Dim();
double distance = 0.0f;
for (unsigned int d = 0; d < nDimensions; ++d) {
distance += pow(fabs(vec1[d] - vec2[d]), p);
}
return pow(distance, 1.0/p);
}

float NormInfDistance::operator()(float* vec1, float* vec2) const{
unsigned int nDimensions = Dim();
double distance = 0.0f;
for (unsigned int d = 0; d < nDimensions; ++d) {
float tmp = fabs(vec1[d] - vec2[d]);
if (tmp > distance)
distance = tmp;
}
return distance;
}

/** Get node coords for the best matching unit (BMU)
* @param coords - BMU coords
* @param n - row num in the input feature file
Expand Down
33 changes: 28 additions & 5 deletions src/somoclu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ void printUsage() {
"Arguments:\n" \
" -c FILENAME Specify an initial map.codebook for the map.\n" \
" -d NUMBER Coefficient in the Gaussian neighborhood function exp(-||x-y||^2/(2*(coeff*radius)^2)) (default: 0.5)\n" \
" -D NORM Norm to be used among vectors: euclidean, norm-inf or norm-p with any real p > 0 (default: euclidean)\n" \
" -e NUMBER Maximum number of epochs (default: " << N_EPOCH << ")\n" \
" -g TYPE Grid type: rectangular or hexagonal (default: rectangular)\n"\
" -h, --help This help text\n" \
Expand Down Expand Up @@ -92,7 +93,7 @@ void processCommandLine(int argc, char** argv, string *inFilename,
unsigned int *kernelType, string *mapType,
unsigned int *snapshots,
string *gridType, unsigned int *compactSupport,
unsigned int *gaussian, float *std_coeff,
unsigned int *gaussian, string *vect_distance, float *std_coeff,
unsigned int *verbose,
string *initialCodebookFilename) {

Expand All @@ -112,6 +113,7 @@ void processCommandLine(int argc, char** argv, string *inFilename,
*gridType = "rectangular";
*compactSupport = 1;
*gaussian = 1;
*vect_distance = "euclidean";
*std_coeff = 0.5;
*verbose = 0;
string neighborhood_function = "gaussian";
Expand All @@ -124,7 +126,7 @@ void processCommandLine(int argc, char** argv, string *inFilename,
int c;
extern int optind, optopt;
int option_index = 0;
while ((c = getopt_long (argc, argv, "hx:y:d:e:g:k:l:m:n:p:r:s:t:v:c:L:R:T:",
while ((c = getopt_long (argc, argv, "hx:y:d:D:e:g:k:l:m:n:p:r:s:t:v:c:L:R:T:",
long_options, &option_index)) != -1) {
switch (c) {
case 'c':
Expand Down Expand Up @@ -162,6 +164,13 @@ void processCommandLine(int argc, char** argv, string *inFilename,
cli_abort("The argument of option -n should be either bubble or Gaussian.");
}
break;
case 'D':
*vect_distance = optarg;
float p;
if (!(*vect_distance == "euclidean" || *vect_distance == "norm-inf" || (vect_distance->substr(0, 5) == "norm-" && sscanf(vect_distance->c_str() + 5, "%f", &p) == 1 && p > 0))) {
cli_abort("The argument of option -D should be either euclidean or norm-p (with p being a float > 0) or norm-inf.");
}
break;
case 'p':
*compactSupport = atoi(optarg);
if (*compactSupport != 0 && *compactSupport != 1) {
Expand Down Expand Up @@ -294,6 +303,7 @@ int main(int argc, char** argv)
string gridType;
unsigned int compactSupport;
unsigned int gaussian;
string vect_distance;
float radius0 = 0;
float radiusN = 0;
string radiusCooling;
Expand All @@ -312,7 +322,7 @@ int main(int argc, char** argv)
&scale0, &scaleN, &scaleCooling,
&nSomX, &nSomY,
&kernelType, &mapType, &snapshots,
&gridType, &compactSupport, &gaussian, &std_coeff,
&gridType, &compactSupport, &gaussian, &vect_distance, &std_coeff,
&verbose,
&initialCodebookFilename);
#ifndef CUDA
Expand Down Expand Up @@ -419,14 +429,27 @@ int main(int argc, char** argv)
cout << endl;
}
}
som map = {
Distance *d = 0;
if (vect_distance == "norm-inf") {
d = new NormInfDistance(nDimensions);
} else if (strncmp(vect_distance.c_str(), "norm-", 5) == 0) {
float p;
if (sscanf("%f", vect_distance.c_str() + 5, &p) == 1 && p > 0) {
d = new NormPDistance(nDimensions, p);
} else {
cerr << "norm-p needs a positive p value (falling back to Euclidean)" << endl;
}
}
if (d == 0)
d = new EuclideanDistance(nDimensions);
som map = {
nSomX,
nSomY,
nDimensions,
nVectors,
mapType,
gridType,
*(new EuclideanDistance(nDimensions)),
*d,
NULL,
new float[nSomY * nSomX * nDimensions],
NULL };
Expand Down
18 changes: 17 additions & 1 deletion src/somoclu.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,22 @@ class EuclideanDistance: public Distance{
virtual float operator()(float* vec1, float* vec2) const;
};

class NormPDistance: public Distance{
private:
float p;
public:
NormPDistance(unsigned int d, float p):Distance(d), p(p){}
virtual ~NormPDistance(){}
virtual float operator()(float* vec1, float* vec2) const;
};

class NormInfDistance: public Distance{
public:
NormInfDistance(unsigned int d):Distance(d){}
virtual ~NormInfDistance(){}
virtual float operator()(float* vec1, float* vec2) const;
};

/// Som parameters
struct som {
unsigned int nSomX;
Expand Down Expand Up @@ -130,7 +146,7 @@ void train(float *data, int data_length,
float std_coeff, unsigned int verbose,
float* codebook, int codebook_size,
int* globalBmus, int globalBmus_size,
float* uMatrix, int uMatrix_size);
float* uMatrix, int uMatrix_size, string vect_distance = "euclidean");
void train(int itask, float *data, svm_node **sparseData,
som map, unsigned int nVectorsPerRank, unsigned int nEpoch,
float radius0, float radiusN,
Expand Down
15 changes: 13 additions & 2 deletions src/training.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <cmath>
#include <cstdlib>
#include <cstring>
#ifndef HAVE_MPI
#include <stdexcept>
#endif
Expand Down Expand Up @@ -89,7 +90,7 @@ void train(float *data, int data_length, unsigned int nEpoch,
float std_coeff, unsigned int verbose,
float *codebook, int codebook_size,
int *globalBmus, int globalBmus_size,
float *uMatrix, int uMatrix_size) {
float *uMatrix, int uMatrix_size, string vect_distance) {
#ifdef HAVE_R
#ifndef CUDA
if(kernelType == DENSE_GPU){
Expand All @@ -98,14 +99,24 @@ void train(float *data, int data_length, unsigned int nEpoch,
}
#endif // CUDA
#endif // HAVE_R
Distance *d = 0;
float p;
if (vect_distance == "norm-inf")
d = new NormInfDistance(nDimensions);
else if (sscanf(vect_distance.c_str(), "norm-%f", &p) == 1 && p > 0)
d = new NormPDistance(nDimensions, p);
else if (vect_distance != "euclidean")
fprintf(stderr, "Warning: incorrect vect_distance: %s (falling back to euclidean)\n", vect_distance.c_str());
if (d == 0)
d = new EuclideanDistance(nDimensions);
som map = {
nSomX,
nSomY,
nDimensions,
nVectors,
mapType,
gridType,
*(new EuclideanDistance(nDimensions)),
*d,
uMatrix,
codebook,
globalBmus};
Expand Down