-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Calculate various string metrics efficiently
--   
--   Calculate various string metrics efficiently.
@package text-metrics
@version 0.2.0


-- | The module provides efficient implementations of various strings
--   metrics. It works with strict <a>Text</a> values and returns either
--   <a>Natural</a> numbers (because the metrics cannot be negative), or
--   <tt><a>Ratio</a> <a>Natural</a></tt> values because returned values
--   are rational non-negative numbers by definition.
--   
--   The functions provided here are the fastest implementations available
--   for use in Haskell programs. In fact the functions are implemented in
--   C for maximal efficiency, but this leads to a minor flaw. When we work
--   with <a>Text</a> values in C, they are represented as UTF-16 encoded
--   strings of two-byte values. The algorithms treat the strings as if a
--   character corresponds to one element in such strings, which is true
--   for almost all modern text data. However, there are characters that
--   are represented by two adjoined elements in UTF-16: emoji, historic
--   scripts, less used Chinese ideographs, and some more. If input
--   <a>Text</a> of the functions contains such characters, the functions
--   may return slightly incorrect result. Decide for yourself if this is
--   acceptable for your use case, but chances are you will never run into
--   situations when the functions produce incorrect results.
module Data.Text.Metrics

-- | Return Levenshtein distance between two <a>Text</a> values. Classic
--   Levenshtein distance between two strings is minimal number of
--   operations necessary to transform one string into another. For
--   Levenshtein distance allowed operations are: deletion, insertion, and
--   substitution.
--   
--   See also: <a>https://en.wikipedia.org/wiki/Levenshtein_distance</a>.
levenshtein :: Text -> Text -> Natural

-- | Return normalized Levenshtein distance between two <a>Text</a> values.
--   Result is a non-negative rational number (represented as
--   <tt><a>Ratio</a> <a>Natural</a></tt>), where 0 signifies no similarity
--   between the strings, while 1 means exact match. The operation is
--   virtually as fast as <a>levenshtein</a>.
--   
--   See also: <a>https://en.wikipedia.org/wiki/Levenshtein_distance</a>.
levenshteinNorm :: Text -> Text -> Ratio Natural

-- | Return Damerau-Levenshtein distance between two <a>Text</a> values.
--   The function works like <a>levenshtein</a>, but the collection of
--   allowed operations also includes transposition of two <i>adjacent</i>
--   characters. The function is about 20% slower than <a>levenshtein</a>,
--   but still pretty fast.
--   
--   See also:
--   <a>https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance</a>.
damerauLevenshtein :: Text -> Text -> Natural

-- | Return normalized Damerau-Levenshtein distance between two <a>Text</a>
--   values. Result is a non-negative rational number (represented as
--   <tt><a>Ratio</a> <a>Natural</a></tt>), where 0 signifies no similarity
--   between the strings, while 1 means exact match. The operation is
--   virtually as fast as <a>damerauLevenshtein</a>.
--   
--   See also:
--   <a>https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance</a>.
damerauLevenshteinNorm :: Text -> Text -> Ratio Natural

-- | <i>O(n)</i> Return Hamming distance between two <a>Text</a> values.
--   Hamming distance is defined as number of positions at which the
--   corresponding symbols are different. The input <a>Text</a> values
--   should be of equal length or <a>Nothing</a> will be returned.
--   
--   See also: <a>https://en.wikipedia.org/wiki/Hamming_distance</a>.
hamming :: Text -> Text -> Maybe Natural

-- | Return Jaro distance between two <a>Text</a> values. Returned value is
--   in range from 0 (no similarity) to 1 (exact match).
--   
--   While the algorithm is pretty clear for artificial examples (like
--   those from the linked Wikipedia article), for <i>arbitrary</i>
--   strings, it may be hard to decide which of two strings should be
--   considered as one having “reference” order of characters (since order
--   of matching characters in an essential part of the definition of the
--   algorithm). This makes us consider the first string the “reference”
--   string (with correct order of characters). Thus generally,
--   
--   <pre>
--   jaro a b ≠ jaro b a
--   </pre>
--   
--   This asymmetry can be found in all implementations of the algorithm on
--   the internet, AFAIK.
--   
--   See also:
--   <a>http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>
jaro :: Text -> Text -> Ratio Natural

-- | Return Jaro-Winkler distance between two <a>Text</a> values. Returned
--   value is in range from 0 (no similarity) to 1 (exact match).
--   
--   See also:
--   <a>http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>
jaroWinkler :: Text -> Text -> Ratio Natural
