Machine Learning Library
Functions
CDatasetAlgorithm.h File Reference
#include "CDenseVector.h"
#include "CMatrix.h"
#include "CMetric.h"
#include "CDataset.h"
#include "CDatasetItem.h"
#include <vector>
#include <algorithm>
#include <ctime>
#include <iostream>
#include "CObject.h"

Go to the source code of this file.

Functions

template<class Type >
CDenseVector< Type > minElements (const CDataset< Type > &tDs)
 
template<class Type >
CDenseVector< Type > maxElements (const CDataset< Type > &tDs)
 
template<class Type >
CDataset< Type > centerInput (const CDataset< Type > &tDs, CDenseVector< Type > &tMean)
 
template<class Type >
CDataset< Type > zscore (const CDataset< Type > &tDs, CDenseVector< Type > &tMean, CDenseVector< Type > &tVariance)
 
template<class Type >
CDataset< Type > applyZScore (const CDataset< Type > &tDs, const CDenseVector< Type > &tMean, const CDenseVector< Type > &tVariance)
 
template<class Type >
CDenseVector< Type > applyZScore (const CDenseVector< Type > &tDs, const CDenseVector< Type > &tMean, const CDenseVector< Type > &tVariance)
 
template<class Type >
CDataset< Type > scaleRange (const CDataset< Type > &tDs, const CDenseVector< Type > tNewMin, const CDenseVector< Type > tNewMax, CDenseVector< Type > &tShift, CDenseVector< Type > &tScale)
 
template<class Type >
CDataset< Type > applyScaleRange (const CDataset< Type > &tDs, const CDenseVector< Type > &tShift, const CDenseVector< Type > &tScale)
 
template<class Type >
CDenseVector< Type > applyScaleRange (const CDenseVector< Type > &tDs, const CDenseVector< Type > &tShift, const CDenseVector< Type > &tScale)
 
template<class Type >
CDataset< Type > normalize (const CDataset< Type > &rtDs, const CMetric< Type > &rtMetric)
 
template<class Type >
CDataset< Type > removeProjection (const CDataset< Type > &rtDataset, const CDenseVector< Type > &rtVec)
 
template<class Type >
void applyWeights (CDataset< Type > &rtDataset, const CDenseVector< Type > &rtWeights)
 
template<class Type >
void normalizeInputDataEuclidean (CDataset< Type > &rtDs)
 

Function Documentation

template<class Type >
CDataset<Type> applyScaleRange ( const CDataset< Type > &  tDs,
const CDenseVector< Type > &  tShift,
const CDenseVector< Type > &  tScale 
)

Scales each component of the input data to the range given by tNewMin and tNewMax. In contrast to scaleRange(..), this function uses shift and scale given by the functions parameters

Parameters
tDsDataset
tShiftShift vector
tScaleScale vector
Returns
dataset
template<class Type >
CDenseVector<Type> applyScaleRange ( const CDenseVector< Type > &  tDs,
const CDenseVector< Type > &  tShift,
const CDenseVector< Type > &  tScale 
)

Scales each component of the input data to the range given by tNewMin and tNewMax. In contrast to scaleRange(..), this function uses shift and scale given by the functions parameters

Parameters
tDsDataset
tShiftShift vector
tScaleScale vector
Returns
dataset
template<class Type >
void applyWeights ( CDataset< Type > &  rtDataset,
const CDenseVector< Type > &  rtWeights 
)

Apply a weight vector by scaling each (input) component with desired factor.

Parameters
rtDatasetReference to a dataset object.
rtWeightsweight vector.
template<class Type >
CDataset<Type> applyZScore ( const CDataset< Type > &  tDs,
const CDenseVector< Type > &  tMean,
const CDenseVector< Type > &  tVariance 
)

Normalizes data to zero mean and zero variance. In contrast to zscore(..), this function uses the mean and variance given by the function's parameters.

Parameters
tDsDataset
tMeanMean vector
tVarianceVariance vector
Returns
dataset
template<class Type >
CDenseVector<Type> applyZScore ( const CDenseVector< Type > &  tDs,
const CDenseVector< Type > &  tMean,
const CDenseVector< Type > &  tVariance 
)

Normalizes data to zero mean and zero variance. In contrast to zscore(..), this function uses the mean and variance given by the function's parameters.

Parameters
tDsVector
tMeanMean vector
tVarianceVariance vector
Returns
dataset
template<class Type >
CDataset<Type> centerInput ( const CDataset< Type > &  tDs,
CDenseVector< Type > &  tMean 
)

Normalizes data to zero mean

Parameters
tDsDataset
tMeanTo be filled with subracted mean
Returns
Centered dataset
template<class Type >
CDenseVector<Type> maxElements ( const CDataset< Type > &  tDs)

Find the minimal element of each component of the input vectors

Parameters
Dataset
Returns
Vector with minimal elements
template<class Type >
CDenseVector<Type> minElements ( const CDataset< Type > &  tDs)

Find the maximal element of each component of the input vectors

Parameters
Dataset
Returns
Vector with maximal elements
template<class Type >
CDataset<Type> normalize ( const CDataset< Type > &  rtDs,
const CMetric< Type > &  rtMetric 
)

Normalizes the length of each (input) vector according to a given metric (using a vector with zero components).

Parameters
rtDatasetReference to a dataset object.
rtMetricReference to a metric object.
template<class Type >
void normalizeInputDataEuclidean ( CDataset< Type > &  rtDs)

Normalizes the euclidean length of each (input) vector

template<class Type >
CDataset<Type> removeProjection ( const CDataset< Type > &  rtDataset,
const CDenseVector< Type > &  rtVec 
)

Removes the projection of rtVec from all vectors in the dataset.

Parameters
rtDatasetReference to a dataset object.
rtVecReference to a the vector which should be eliminated
template<class Type >
CDataset<Type> scaleRange ( const CDataset< Type > &  tDs,
const CDenseVector< Type >  tNewMin,
const CDenseVector< Type >  tNewMax,
CDenseVector< Type > &  tShift,
CDenseVector< Type > &  tScale 
)

Scales each component of the input data to the range given by tNewMin and tNewMax

Parameters
tDsDataset
tNewMinNew min values of the input components
tNewMaxNew max values of the input components
tShiftTo filled with shift vector
tScaleTo filled with scale vector
Returns
dataset
template<class Type >
CDataset<Type> zscore ( const CDataset< Type > &  tDs,
CDenseVector< Type > &  tMean,
CDenseVector< Type > &  tVariance 
)

Normalizes data to zero mean and zero variance

Parameters
tDsDataset
tMeanTo be filled with subracted mean
tVarianceTo be filled with variance
Returns
Centered dataset