21 #include <unordered_map>    24 #include "UTF8StringSlice.hpp"    30   typedef UTF8StringSlice::LengthType LengthType;
    38   void Extract(
const string& text) {
    42     CalculateSuffixEntropy();
    45     CalculatePrefixEntropy();
    47     ExtractWordCandidates();
    52   void SetFullText(
const string& fullText) {
    56   void SetFullText(
const char* fullText) {
    60   void SetFullText(
const UTF8StringSlice& fullText) { utf8FullText = fullText; }
    62   void SetWordMinLength(
const LengthType _wordMinLength) {
    63     wordMinLength = _wordMinLength;
    66   void SetWordMaxLength(
const LengthType _wordMaxLength) {
    67     wordMaxLength = _wordMaxLength;
    70   void SetPrefixSetLength(
const LengthType _prefixSetLength) {
    71     prefixSetLength = _prefixSetLength;
    74   void SetSuffixSetLength(
const LengthType _suffixSetLength) {
    75     suffixSetLength = _suffixSetLength;
    79   void SetPreCalculationFilter(
const std::function<
    81     preCalculationFilter = filter;
    84   void SetPostCalculationFilter(
const std::function<
    86     postCalculationFilter = filter;
    89   void ReleaseSuffixes() { vector<UTF8StringSlice8Bit>().swap(suffixes); }
    91   void ReleasePrefixes() { vector<UTF8StringSlice8Bit>().swap(prefixes); }
    93   const vector<UTF8StringSlice8Bit>& Words()
 const { 
return words; }
    95   const vector<UTF8StringSlice8Bit>& WordCandidates()
 const {
    96     return wordCandidates;
   102     double suffixEntropy;
   103     double prefixEntropy;
   124   void ExtractSuffixes();
   126   void ExtractPrefixes();
   128   void ExtractWordCandidates();
   130   void CalculateFrequency();
   132   void CalculateCohesions();
   134   void CalculateSuffixEntropy();
   136   void CalculatePrefixEntropy();
   158   double CalculateEntropy(
const std::unordered_map<
   161   LengthType wordMinLength;
   162   LengthType wordMaxLength;
   163   LengthType prefixSetLength;
   164   LengthType suffixSetLength;
   165   std::function<bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>
   166       preCalculationFilter;
   167   std::function<bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>
   168       postCalculationFilter;
   170   bool prefixesExtracted;
   171   bool suffixesExtracted;
   172   bool frequenciesCalculated;
   173   bool wordCandidatesExtracted;
   174   bool cohesionsCalculated;
   175   bool prefixEntropiesCalculated;
   176   bool suffixEntropiesCalculated;
   180   size_t totalOccurrence;
   181   double logTotalOccurrence;
   182   vector<UTF8StringSlice8Bit> prefixes;
   183   vector<UTF8StringSlice8Bit> suffixes;
   184   vector<UTF8StringSlice8Bit> wordCandidates;
   185   vector<UTF8StringSlice8Bit> words;
   188   friend class PhraseExtractTest;
 
Definition: UTF8StringSlice.hpp:200
Definition: BinaryDict.hpp:24
Definition: UTF8StringSlice.hpp:52