36 #ifndef VIGRA_SAMPLING_HXX 
   37 #define VIGRA_SAMPLING_HXX 
   39 #include "array_vector.hxx" 
   67     double sample_proportion;
 
   68     unsigned int sample_size;
 
   69     bool   sample_with_replacement;
 
   70     bool   stratified_sampling;
 
   73     : sample_proportion(1.0),
 
   75       sample_with_replacement(
true),
 
   76       stratified_sampling(
false)
 
   85         sample_with_replacement = in;
 
   95         sample_with_replacement = !in;
 
  125         vigra_precondition(proportion >= 0.0,
 
  126                "SamplerOptions::sampleProportion(): argument must not be negative.");
 
  127         sample_proportion = proportion;
 
  143         stratified_sampling = in;
 
  231 template<
class Random = MersenneTwister >
 
  249     typedef std::map<IndexType, IndexArrayType> StrataIndicesType;
 
  250     typedef std::map<IndexType, int>            StrataSizesType;
 
  254     static const int        oobInvalid = -1;
 
  256     int                     total_count_, sample_size_;
 
  257     mutable int             current_oob_count_;
 
  258     StrataIndicesType       strata_indices_;
 
  259     StrataSizesType         strata_sample_size_;
 
  263     Random                  default_random_;
 
  264     Random 
const &          random_;
 
  267     void initStrataCount()
 
  272         int strata_total_count = strata_sample_size * 
strataCount();
 
  274         for(StrataIndicesType::iterator i = strata_indices_.begin();
 
  275              i != strata_indices_.end(); ++i)
 
  277             if(strata_total_count > sample_size_)
 
  279                 strata_sample_size_[i->first] = strata_sample_size - 1;
 
  280                 --strata_total_count;
 
  284                 strata_sample_size_[i->first] = strata_sample_size;
 
  298             Random 
const * rnd = 0)
 
  299     : total_count_(totalCount),
 
  300       sample_size_(opt.sample_size == 0
 
  301                    ? static_cast<int>((std::
ceil(total_count_ * opt.sample_proportion)))
 
  303       current_oob_count_(oobInvalid),
 
  304       current_sample_(sample_size_),
 
  305       current_oob_sample_(total_count_),
 
  306       is_used_(total_count_),
 
  307       default_random_(RandomSeed),
 
  308       random_(rnd ? *rnd : default_random_),
 
  311         vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
 
  312           "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
 
  314         vigra_precondition(!opt.stratified_sampling,
 
  315           "Sampler(): Stratified sampling requested, but no strata given.");
 
  318         strata_indices_[0].resize(total_count_);
 
  319         for(
int i=0; i<total_count_; ++i)
 
  320             strata_indices_[0][i] = i;
 
  336     template <
class Iterator>
 
  338             Random 
const * rnd = 0)
 
  339     : total_count_(strataEnd - strataBegin),
 
  340       sample_size_(opt.sample_size == 0
 
  341                    ? static_cast<int>((std::
ceil(total_count_ * opt.sample_proportion)))
 
  343       current_oob_count_(oobInvalid),
 
  344       current_sample_(sample_size_),
 
  345       current_oob_sample_(total_count_),
 
  346       is_used_(total_count_),
 
  347       default_random_(RandomSeed),
 
  348       random_(rnd ? *rnd : default_random_),
 
  351         vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
 
  352           "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
 
  355         if(opt.stratified_sampling)
 
  357             for(
int i = 0; strataBegin != strataEnd; ++i, ++strataBegin)
 
  359                 strata_indices_[*strataBegin].push_back(i);
 
  364             strata_indices_[0].resize(total_count_);
 
  365             for(
int i=0; i<total_count_; ++i)
 
  366                 strata_indices_[0][i] = i;
 
  369         vigra_precondition(sample_size_ >= static_cast<int>(strata_indices_.size()),
 
  370             "Sampler(): Requested sample count must be at least as large as the number of strata.");
 
  381         return current_sample_[k];
 
  415         return strata_indices_.size();
 
  423         return options_.stratified_sampling;
 
  430         return options_.sample_with_replacement;
 
  437         return current_sample_;
 
  445         if(current_oob_count_ == oobInvalid)
 
  447             current_oob_count_ = 0;
 
  448             for(
int i = 0; i<total_count_; ++i)
 
  452                     current_oob_sample_[current_oob_count_] = i;
 
  453                     ++current_oob_count_;
 
  457         return current_oob_sample_.
subarray(0, current_oob_count_);
 
  459     IsUsedArrayType 
const & is_used()
 const 
  466 template<
class Random>
 
  469     current_oob_count_ = oobInvalid;
 
  470     is_used_.init(
false);
 
  472     if(options_.sample_with_replacement)
 
  476         StrataIndicesType::iterator iter;
 
  477         for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
 
  480             int stratum_size = iter->second.size();
 
  481             for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
 
  483                 current_sample_[j] = iter->second[random_.uniformInt(stratum_size)];
 
  484                 is_used_[current_sample_[j]] = 
true;
 
  492         StrataIndicesType::iterator iter;
 
  493         for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
 
  496             int stratum_size = iter->second.size();
 
  497             for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
 
  499                 std::swap(iter->second[i], iter->second[i+ random_.uniformInt(stratum_size - i)]);
 
  500                 current_sample_[j] = iter->second[i];
 
  501                 is_used_[current_sample_[j]] = 
true;
 
  507 template<
class Random =RandomTT800 >
 
  512     typedef Int32                               IndexType;
 
  514     IndexArrayType        used_indices_;
 
  519     PoissonSampler(
double lambda,IndexType minIndex,IndexType maxIndex)
 
  527         used_indices_.clear();
 
  529         for(i=minIndex;i<maxIndex;++i)
 
  534             double L=
exp(-lambda);
 
  538                 p*=randfloat.uniform53();
 
  545                 used_indices_.push_back(i);
 
  551     IndexType 
const & operator[](
int in)
 const 
  553         return used_indices_[in];
 
  556     int numOfSamples()
 const 
  558         return used_indices_.size();
 
ArrayVectorView< IndexType > IndexArrayViewType
Definition: sampling.hxx:246
IndexType operator[](int k) const 
Definition: sampling.hxx:379
int strataCount() const 
Definition: sampling.hxx:413
Sampler(UInt32 totalCount, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:297
Create random samples from a sequence of indices. 
Definition: sampling.hxx:232
SamplerOptions & sampleProportion(double proportion)
Determine the number of samples to draw as a proportion of the total number. That is...
Definition: sampling.hxx:123
void sample()
Definition: sampling.hxx:467
linalg::TemporaryMatrix< T > exp(MultiArrayView< 2, T, C > const &v)
bool withReplacement() const 
Definition: sampling.hxx:428
Sampler(Iterator strataBegin, Iterator strataEnd, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:337
SamplerOptions & sampleSize(unsigned int size)
Draw the given number of samples. If stratifiedSampling is true, the size is equally distributed acro...
Definition: sampling.hxx:106
int sampleSize() const 
Definition: sampling.hxx:397
int size() const 
Definition: sampling.hxx:404
bool stratifiedSampling() const 
Definition: sampling.hxx:421
detail::SelectIntegerType< 32, detail::SignedIntTypes >::type Int32
32-bit signed int 
Definition: sized_int.hxx:175
IndexArrayViewType sampledIndices() const 
Definition: sampling.hxx:435
int totalCount() const 
Definition: sampling.hxx:390
this_type subarray(size_type begin, size_type end) const 
Definition: array_vector.hxx:200
Int32 IndexType
Definition: sampling.hxx:239
SamplerOptions & stratified(bool in=true)
Draw equally many samples from each "stratum". A stratum is a group of like entities, e.g. pixels belonging to the same object class. This is useful to create balanced samples when the class probabilities are very unbalanced (e.g. when there are many background and few foreground pixels). Stratified sampling thus avoids that a trained classifier is biased towards the majority class. 
Definition: sampling.hxx:141
SamplerOptions & withReplacement(bool in=true)
Sample from training population with replacement. 
Definition: sampling.hxx:83
detail::SelectIntegerType< 32, detail::UnsignedIntTypes >::type UInt32
32-bit unsigned int 
Definition: sized_int.hxx:183
int ceil(FixedPoint< IntBits, FracBits > v)
rounding up. 
Definition: fixedpoint.hxx:675
Options object for the Sampler class. 
Definition: sampling.hxx:63
IndexArrayViewType oobIndices() const 
Definition: sampling.hxx:443
SamplerOptions & withoutReplacement(bool in=true)
Sample from training population without replacement. 
Definition: sampling.hxx:93