-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Bindings to the ICU library
--   
--   Haskell bindings to the International Components for Unicode (ICU)
--   libraries. These libraries provide robust and full-featured Unicode
--   services on a wide variety of platforms.
--   
--   Features include:
--   
--   <ul>
--   <li>Both pure and impure bindings, to allow for fine control over
--   efficiency and ease of use.</li>
--   <li>Breaking of strings on character, word, sentence, and line
--   boundaries.</li>
--   <li>Access to the Unicode Character Database (UCD) of character
--   metadata.</li>
--   <li>String collation functions, for locales where the conventions for
--   lexicographic ordering differ from the simple numeric ordering of
--   character codes.</li>
--   <li>Character set conversion functions, allowing conversion between
--   Unicode and over 220 character encodings.</li>
--   <li>Unicode normalization. (When implementations keep strings in a
--   normalized form, they can be assured that equivalent strings have a
--   unique binary representation.)</li>
--   <li>Regular expression search and replace.</li>
--   </ul>
@package text-icu
@version 0.7.0.1


-- | Character set normalization functions for Unicode, implemented as
--   bindings to the International Components for Unicode (ICU) libraries.
module Data.Text.ICU.Normalize

-- | Normalization modes.
data NormalizationMode

-- | No decomposition/composition.
None :: NormalizationMode

-- | Canonical decomposition.
NFD :: NormalizationMode

-- | Compatibility decomposition.
NFKD :: NormalizationMode

-- | Canonical decomposition followed by canonical composition.
NFC :: NormalizationMode

-- | Compatibility decomposition followed by canonical composition.
NFKC :: NormalizationMode

-- | "Fast C or D" form.
FCD :: NormalizationMode

-- | Normalize a string according the specified normalization mode.
normalize :: NormalizationMode -> Text -> Text

-- | Perform an efficient check on a string, to quickly determine if the
--   string is in a particular normalization form.
--   
--   A <a>Nothing</a> result indicates that a definite answer could not be
--   determined quickly, and a more thorough check is required, e.g. with
--   <a>isNormalized</a>. The user may have to convert the string to its
--   normalized form and compare the results.
--   
--   A result of <a>Just</a> <a>True</a> or <a>Just</a> <a>False</a>
--   indicates that the string definitely is, or is not, in the given
--   normalization form.
quickCheck :: NormalizationMode -> Text -> Maybe Bool

-- | Indicate whether a string is in a given normalization form.
--   
--   Unlike <a>quickCheck</a>, this function returns a definitive result.
--   For <a>NFD</a>, <a>NFKD</a>, and <a>FCD</a> normalization forms, both
--   functions work in exactly the same ways. For <a>NFC</a> and
--   <a>NFKC</a> forms, where <a>quickCheck</a> may return <a>Nothing</a>,
--   this function will perform further tests to arrive at a definitive
--   result.
isNormalized :: NormalizationMode -> Text -> Bool

-- | Options to <a>compare</a>.
data CompareOption

-- | The caller knows that both strings fulfill the <a>FCD</a> conditions.
--   If <i>not</i> set, <a>compare</a> will <a>quickCheck</a> for
--   <a>FCD</a> and normalize if necessary.
InputIsFCD :: CompareOption

-- | Compare strings case-insensitively using case folding, instead of
--   case-sensitively. If set, then the following case folding options are
--   used.
CompareIgnoreCase :: CompareOption

-- | When case folding, exclude the special I character. For use with
--   Turkic (Turkish/Azerbaijani) text data.
FoldCaseExcludeSpecialI :: CompareOption

-- | Compare two strings for canonical equivalence. Further options include
--   case-insensitive comparison and code point order (as opposed to code
--   unit order).
--   
--   Canonical equivalence between two strings is defined as their
--   normalized forms (<a>NFD</a> or <a>NFC</a>) being identical. This
--   function compares strings incrementally instead of normalizing (and
--   optionally case-folding) both strings entirely, improving performance
--   significantly.
--   
--   Bulk normalization is only necessary if the strings do not fulfill the
--   <a>FCD</a> conditions. Only in this case, and only if the strings are
--   relatively long, is memory allocated temporarily. For <a>FCD</a>
--   strings and short non-<a>FCD</a> strings there is no memory
--   allocation.
compare :: [CompareOption] -> Text -> Text -> Ordering
instance GHC.Enum.Enum Data.Text.ICU.Normalize.NormalizationMode
instance GHC.Show.Show Data.Text.ICU.Normalize.NormalizationMode
instance GHC.Classes.Eq Data.Text.ICU.Normalize.NormalizationMode
instance GHC.Enum.Enum Data.Text.ICU.Normalize.CompareOption
instance GHC.Show.Show Data.Text.ICU.Normalize.CompareOption
instance GHC.Classes.Eq Data.Text.ICU.Normalize.CompareOption


-- | Types for use when manipulating Unicode text, using the bindings to
--   the International Components for Unicode (ICU) libraries.
module Data.Text.ICU.Types

-- | The name of a locale.
data LocaleName

-- | The root locale. For a description of resource bundles and the root
--   resource, see
--   <a>http://userguide.icu-project.org/locale/resources</a>.
Root :: LocaleName

-- | A specific locale.
Locale :: String -> LocaleName

-- | The program's current locale.
Current :: LocaleName

-- | Detailed information about parsing errors. Used by ICU parsing engines
--   that parse long rules, patterns, or programs, where the text being
--   parsed is long enough that more information than an <a>ICUError</a> is
--   needed to localize the error.
data ParseError


module Data.Text.ICU.Error

-- | ICU error type. This is an instance of the <a>Exception</a> type
--   class. A value of this type may be thrown as an exception by most ICU
--   functions.
data ICUError

-- | Detailed information about parsing errors. Used by ICU parsing engines
--   that parse long rules, patterns, or programs, where the text being
--   parsed is long enough that more information than an <a>ICUError</a> is
--   needed to localize the error.
data ParseError

-- | Indicate whether the given error code is a success.
isSuccess :: ICUError -> Bool

-- | Indicate whether the given error code is a failure.
isFailure :: ICUError -> Bool

-- | Return a string representing the name of the given error code.
errorName :: ICUError -> String
isRegexError :: ICUError -> Bool
u_USING_FALLBACK_WARNING :: ICUError
u_USING_DEFAULT_WARNING :: ICUError
u_SAFECLONE_ALLOCATED_WARNING :: ICUError
u_STATE_OLD_WARNING :: ICUError
u_STRING_NOT_TERMINATED_WARNING :: ICUError
u_SORT_KEY_TOO_SHORT_WARNING :: ICUError
u_AMBIGUOUS_ALIAS_WARNING :: ICUError
u_DIFFERENT_UCA_VERSION :: ICUError
u_ILLEGAL_ARGUMENT_ERROR :: ICUError
u_MISSING_RESOURCE_ERROR :: ICUError
u_INVALID_FORMAT_ERROR :: ICUError
u_FILE_ACCESS_ERROR :: ICUError
u_INTERNAL_PROGRAM_ERROR :: ICUError
u_MESSAGE_PARSE_ERROR :: ICUError
u_MEMORY_ALLOCATION_ERROR :: ICUError
u_INDEX_OUTOFBOUNDS_ERROR :: ICUError
u_PARSE_ERROR :: ICUError
u_INVALID_CHAR_FOUND :: ICUError
u_TRUNCATED_CHAR_FOUND :: ICUError
u_ILLEGAL_CHAR_FOUND :: ICUError
u_INVALID_TABLE_FORMAT :: ICUError
u_INVALID_TABLE_FILE :: ICUError
u_BUFFER_OVERFLOW_ERROR :: ICUError
u_UNSUPPORTED_ERROR :: ICUError
u_RESOURCE_TYPE_MISMATCH :: ICUError
u_ILLEGAL_ESCAPE_SEQUENCE :: ICUError
u_UNSUPPORTED_ESCAPE_SEQUENCE :: ICUError
u_NO_SPACE_AVAILABLE :: ICUError
u_CE_NOT_FOUND_ERROR :: ICUError
u_PRIMARY_TOO_LONG_ERROR :: ICUError
u_STATE_TOO_OLD_ERROR :: ICUError
u_TOO_MANY_ALIASES_ERROR :: ICUError
u_ENUM_OUT_OF_SYNC_ERROR :: ICUError
u_INVARIANT_CONVERSION_ERROR :: ICUError
u_INVALID_STATE_ERROR :: ICUError
u_COLLATOR_VERSION_MISMATCH :: ICUError
u_USELESS_COLLATOR_ERROR :: ICUError
u_NO_WRITE_PERMISSION :: ICUError
u_BAD_VARIABLE_DEFINITION :: ICUError
u_MALFORMED_RULE :: ICUError
u_MALFORMED_SET :: ICUError
u_MALFORMED_UNICODE_ESCAPE :: ICUError
u_MALFORMED_VARIABLE_DEFINITION :: ICUError
u_MALFORMED_VARIABLE_REFERENCE :: ICUError
u_MISPLACED_CURSOR_OFFSET :: ICUError
u_MISPLACED_QUANTIFIER :: ICUError
u_MISSING_OPERATOR :: ICUError
u_MULTIPLE_ANTE_CONTEXTS :: ICUError
u_MULTIPLE_CURSORS :: ICUError
u_MULTIPLE_POST_CONTEXTS :: ICUError
u_TRAILING_BACKSLASH :: ICUError
u_UNDEFINED_SEGMENT_REFERENCE :: ICUError
u_UNDEFINED_VARIABLE :: ICUError
u_UNQUOTED_SPECIAL :: ICUError
u_UNTERMINATED_QUOTE :: ICUError
u_RULE_MASK_ERROR :: ICUError
u_MISPLACED_COMPOUND_FILTER :: ICUError
u_MULTIPLE_COMPOUND_FILTERS :: ICUError
u_INVALID_RBT_SYNTAX :: ICUError
u_MALFORMED_PRAGMA :: ICUError
u_UNCLOSED_SEGMENT :: ICUError
u_VARIABLE_RANGE_EXHAUSTED :: ICUError
u_VARIABLE_RANGE_OVERLAP :: ICUError
u_ILLEGAL_CHARACTER :: ICUError
u_INTERNAL_TRANSLITERATOR_ERROR :: ICUError
u_INVALID_ID :: ICUError
u_INVALID_FUNCTION :: ICUError
u_UNEXPECTED_TOKEN :: ICUError
u_MULTIPLE_DECIMAL_SEPARATORS :: ICUError
u_MULTIPLE_EXPONENTIAL_SYMBOLS :: ICUError
u_MALFORMED_EXPONENTIAL_PATTERN :: ICUError
u_MULTIPLE_PERCENT_SYMBOLS :: ICUError
u_MULTIPLE_PERMILL_SYMBOLS :: ICUError
u_MULTIPLE_PAD_SPECIFIERS :: ICUError
u_PATTERN_SYNTAX_ERROR :: ICUError
u_ILLEGAL_PAD_POSITION :: ICUError
u_UNMATCHED_BRACES :: ICUError
u_ARGUMENT_TYPE_MISMATCH :: ICUError
u_DUPLICATE_KEYWORD :: ICUError
u_UNDEFINED_KEYWORD :: ICUError
u_DEFAULT_KEYWORD_MISSING :: ICUError
u_BRK_INTERNAL_ERROR :: ICUError
u_BRK_HEX_DIGITS_EXPECTED :: ICUError
u_BRK_SEMICOLON_EXPECTED :: ICUError
u_BRK_RULE_SYNTAX :: ICUError
u_BRK_UNCLOSED_SET :: ICUError
u_BRK_ASSIGN_ERROR :: ICUError
u_BRK_VARIABLE_REDFINITION :: ICUError
u_BRK_MISMATCHED_PAREN :: ICUError
u_BRK_NEW_LINE_IN_QUOTED_STRING :: ICUError
u_BRK_UNDEFINED_VARIABLE :: ICUError
u_BRK_INIT_ERROR :: ICUError
u_BRK_RULE_EMPTY_SET :: ICUError
u_BRK_UNRECOGNIZED_OPTION :: ICUError
u_BRK_MALFORMED_RULE_TAG :: ICUError
u_REGEX_INTERNAL_ERROR :: ICUError
u_REGEX_RULE_SYNTAX :: ICUError
u_REGEX_INVALID_STATE :: ICUError
u_REGEX_BAD_ESCAPE_SEQUENCE :: ICUError
u_REGEX_PROPERTY_SYNTAX :: ICUError
u_REGEX_UNIMPLEMENTED :: ICUError
u_REGEX_MISMATCHED_PAREN :: ICUError
u_REGEX_NUMBER_TOO_BIG :: ICUError
u_REGEX_BAD_INTERVAL :: ICUError
u_REGEX_MAX_LT_MIN :: ICUError
u_REGEX_INVALID_BACK_REF :: ICUError
u_REGEX_INVALID_FLAG :: ICUError
u_REGEX_SET_CONTAINS_STRING :: ICUError
u_REGEX_OCTAL_TOO_BIG :: ICUError
u_REGEX_INVALID_RANGE :: ICUError
u_REGEX_STACK_OVERFLOW :: ICUError
u_REGEX_TIME_OUT :: ICUError
u_REGEX_STOPPED_BY_CALLER :: ICUError
u_IDNA_PROHIBITED_ERROR :: ICUError
u_IDNA_UNASSIGNED_ERROR :: ICUError
u_IDNA_CHECK_BIDI_ERROR :: ICUError
u_IDNA_STD3_ASCII_RULES_ERROR :: ICUError
u_IDNA_ACE_PREFIX_ERROR :: ICUError
u_IDNA_VERIFICATION_ERROR :: ICUError
u_IDNA_LABEL_TOO_LONG_ERROR :: ICUError
u_IDNA_ZERO_LENGTH_LABEL_ERROR :: ICUError
u_IDNA_DOMAIN_NAME_TOO_LONG_ERROR :: ICUError


-- | Regular expression support for Unicode, implemented as bindings to the
--   International Components for Unicode (ICU) libraries.
--   
--   The syntax and behaviour of ICU regular expressions are Perl-like. For
--   complete details, see the ICU User Guide entry at
--   <a>http://userguide.icu-project.org/strings/regexp</a>.
--   
--   <i>Note</i>: The functions in this module are not thread safe. For
--   thread safe use, see <a>clone</a> below, or use the pure functions in
--   <a>Data.Text.ICU</a>.
module Data.Text.ICU.Regex

-- | Options for controlling matching behaviour.
data MatchOption

-- | Enable case insensitive matching.
CaseInsensitive :: MatchOption

-- | Allow comments and white space within patterns.
Comments :: MatchOption

-- | If set, <tt>'.'</tt> matches line terminators. Otherwise <tt>'.'</tt>
--   matching stops at line end.
DotAll :: MatchOption

-- | If set, treat the entire pattern as a literal string. Metacharacters
--   or escape sequences in the input sequence will be given no special
--   meaning.
--   
--   The option <a>CaseInsensitive</a> retains its meanings on matching
--   when used in conjunction with this option. Other options become
--   superfluous.
Literal :: MatchOption

-- | Control behaviour of <tt>'$'</tt> and <tt>'^'</tt>. If set, recognize
--   line terminators within string, Otherwise, match only at start and end
--   of input string.
Multiline :: MatchOption

-- | Haskell-only line endings. When this mode is enabled, only
--   <tt>'\n'</tt> is recognized as a line ending in the behavior of
--   <tt>'.'</tt>, <tt>'^'</tt>, and <tt>'$'</tt>.
HaskellLines :: MatchOption

-- | Unicode word boundaries. If set, <tt>'\\b'</tt> uses the Unicode TR 29
--   definition of word boundaries.
--   
--   <i>Warning</i>: Unicode word boundaries are quite different from
--   traditional regular expression word boundaries. See
--   <a>http://unicode.org/reports/tr29/#Word_Boundaries</a>.
UnicodeWord :: MatchOption

-- | Throw an error on unrecognized backslash escapes. If set, fail with an
--   error on patterns that contain backslash-escaped ASCII letters without
--   a known special meaning. If this flag is not set, these escaped
--   letters represent themselves.
ErrorOnUnknownEscapes :: MatchOption

-- | Set a processing limit for match operations.
--   
--   Some patterns, when matching certain strings, can run in exponential
--   time. For practical purposes, the match operation may appear to be in
--   an infinite loop. When a limit is set a match operation will fail with
--   an error if the limit is exceeded.
--   
--   The units of the limit are steps of the match engine. Correspondence
--   with actual processor time will depend on the speed of the processor
--   and the details of the specific pattern, but will typically be on the
--   order of milliseconds.
--   
--   By default, the matching time is not limited.
WorkLimit :: Int -> MatchOption

-- | Set the amount of heap storage avaliable for use by the match
--   backtracking stack.
--   
--   ICU uses a backtracking regular expression engine, with the backtrack
--   stack maintained on the heap. This function sets the limit to the
--   amount of memory that can be used for this purpose. A backtracking
--   stack overflow will result in an error from the match operation that
--   caused it.
--   
--   A limit is desirable because a malicious or poorly designed pattern
--   can use excessive memory, potentially crashing the process. A limit is
--   enabled by default.
StackLimit :: Int -> MatchOption

-- | Detailed information about parsing errors. Used by ICU parsing engines
--   that parse long rules, patterns, or programs, where the text being
--   parsed is long enough that more information than an <a>ICUError</a> is
--   needed to localize the error.
data ParseError

-- | A compiled regular expression.
--   
--   <a>Regex</a> values are usually constructed using the <a>regex</a> or
--   <tt>regex'</tt> functions. This type is also an instance of
--   <tt>IsString</tt>, so if you have the <tt>OverloadedStrings</tt>
--   language extension enabled, you can construct a <a>Regex</a> by simply
--   writing the pattern in quotes (though this does not allow you to
--   specify any <tt>Option</tt>s).
data Regex

-- | Compile a regular expression with the given options. This function
--   throws a <tt>ParseError</tt> if the pattern is invalid.
--   
--   The <a>Regex</a> is initialized with empty text to search against.
regex :: [MatchOption] -> Text -> IO Regex

-- | Compile a regular expression with the given options. This is safest to
--   use when the pattern is constructed at run time.
regex' :: [MatchOption] -> Text -> IO (Either ParseError Regex)

-- | Make a copy of a compiled regular expression. Cloning a regular
--   expression is faster than opening a second instance from the source
--   form of the expression, and requires less memory.
--   
--   Note that the current input string and the position of any matched
--   text within it are not cloned; only the pattern itself and and the
--   match mode flags are copied.
--   
--   Cloning can be particularly useful to threaded applications that
--   perform multiple match operations in parallel. Each concurrent RE
--   operation requires its own instance of a <a>Regex</a>.
clone :: Regex -> IO Regex

-- | Set the subject text string upon which the regular expression will
--   look for matches. This function may be called any number of times,
--   allowing the regular expression pattern to be applied to different
--   strings.
setText :: Regex -> Text -> IO ()

-- | Get the subject text that is currently associated with this regular
--   expression object.
getText :: Regex -> IO (ForeignPtr Word16, I16)

-- | Return the source form of the pattern used to construct this regular
--   expression or match.
pattern :: Regex -> Text

-- | Find the first matching substring of the input string that matches the
--   pattern.
--   
--   If <i>n</i> is non-negative, the search for a match begins at the
--   specified index, and any match region is reset.
--   
--   If <i>n</i> is -1, the search begins at the start of the input region,
--   or at the start of the full string if no region has been specified.
--   
--   If a match is found, <a>start</a>, <a>end</a>, and <tt>group</tt> will
--   provide more information regarding the match.
find :: Regex -> I16 -> IO Bool

-- | Find the next pattern match in the input string. Begin searching the
--   input at the location following the end of he previous match, or at
--   the start of the string (or region) if there is no previous match.
--   
--   If a match is found, <a>start</a>, <a>end</a>, and <tt>group</tt> will
--   provide more information regarding the match.
findNext :: Regex -> IO Bool

-- | Return the number of capturing groups in this regular expression's
--   pattern.
groupCount :: Regex -> IO Int

-- | Returns the index in the input string of the start of the text matched
--   by the specified capture group during the previous match operation.
--   Returns <a>Nothing</a> if the capture group was not part of the last
--   match.
start :: Regex -> Int -> IO (Maybe I16)

-- | Returns the index in the input string of the end of the text matched
--   by the specified capture group during the previous match operation.
--   Returns <a>Nothing</a> if the capture group was not part of the last
--   match.
end :: Regex -> Int -> IO (Maybe I16)

-- | Returns the index in the input string of the start of the text matched
--   by the specified capture group during the previous match operation.
--   Returns <tt>-1</tt> if the capture group was not part of the last
--   match.
start_ :: Regex -> Int -> IO I16

-- | Returns the index in the input string of the end of the text matched
--   by the specified capture group during the previous match operation.
--   Returns <tt>-1</tt> if the capture group was not part of the last
--   match.
end_ :: Regex -> Int -> IO I16
instance GHC.Show.Show Data.Text.ICU.Regex.Internal.Regex


-- | Character set conversion functions for Unicode, implemented as
--   bindings to the International Components for Unicode (ICU) libraries.
module Data.Text.ICU.Convert

-- | Character set converter type. <i>Note</i>: this structure is not
--   thread safe. It is <i>not</i> safe to use value of this type
--   simultaneously from multiple threads.
data Converter

-- | Create a <a>Converter</a> with the name of a coded character set
--   specified as a string. The actual name will be resolved with the alias
--   file using a case-insensitive string comparison that ignores leading
--   zeroes and all non-alphanumeric characters. E.g., the names
--   <tt>"UTF8"</tt>, <tt>"utf-8"</tt>, <tt>"u*T@f08"</tt> and <tt>"Utf
--   8"</tt> are all equivalent (see also <a>compareNames</a>). If an empty
--   string is passed for the converter name, it will create one with the
--   <a>getDefaultName</a> return value.
--   
--   A converter name may contain options like a locale specification to
--   control the specific behavior of the newly instantiated converter. The
--   meaning of the options depends on the particular converter. If an
--   option is not defined for or recognized by a given converter, then it
--   is ignored.
--   
--   Options are appended to the converter name string, with a comma
--   between the name and the first option and also between adjacent
--   options.
--   
--   If the alias is ambiguous, then the preferred converter is used.
--   
--   The conversion behavior and names can vary between platforms. ICU may
--   convert some characters differently from other platforms. Details on
--   this topic are in the ICU User's Guide at
--   <a>http://icu-project.org/userguide/conversion.html</a>. Aliases
--   starting with a <tt>"cp"</tt> prefix have no specific meaning other
--   than its an alias starting with the letters <tt>"cp"</tt>. Please do
--   not associate any meaning to these aliases.
open :: String -> Maybe Bool -> IO Converter

-- | Encode a Unicode string into a codepage string using the given
--   converter.
fromUnicode :: Converter -> Text -> ByteString

-- | Decode an encoded string into a Unicode string using the given
--   converter.
toUnicode :: Converter -> ByteString -> Text

-- | Gets the internal, canonical name of the converter.
getName :: Converter -> String

-- | Determines whether the converter uses fallback mappings or not. This
--   flag has restrictions. Regardless of this flag, the converter will
--   always use fallbacks from Unicode Private Use code points, as well as
--   reverse fallbacks (to Unicode). For details see ".ucm File Format" in
--   the Conversion Data chapter of the ICU User Guide:
--   <a>http://www.icu-project.org/userguide/conversion-data.html#ucmformat</a>
usesFallback :: Converter -> Bool

-- | Indicates whether the converter contains ambiguous mappings of the
--   same character or not.
isAmbiguous :: Converter -> Bool

-- | Returns the current default converter name. If you want to <a>open</a>
--   a default converter, you do not need to use this function. It is
--   faster to pass the empty string to <a>open</a> the default converter.
getDefaultName :: IO String

-- | Sets the current default converter name. If this function needs to be
--   called, it should be called during application initialization. Most of
--   the time, the results from <a>getDefaultName</a> or <a>open</a> with
--   an empty string argument is sufficient for your application.
--   
--   <i>Note</i>: this function is not thread safe. <i>Do not</i> call this
--   function when <i>any</i> ICU function is being used from more than one
--   thread!
setDefaultName :: String -> IO ()

-- | Do a fuzzy compare of two converter/alias names. The comparison is
--   case-insensitive, ignores leading zeroes if they are not followed by
--   further digits, and ignores all but letters and digits. Thus the
--   strings <tt>"UTF-8"</tt>, <tt>"utf_8"</tt>, <tt>"u*T@f08"</tt> and
--   <tt>"Utf 8"</tt> are exactly equivalent. See section 1.4, Charset
--   Alias Matching in Unicode Technical Standard #22 at
--   <a>http://www.unicode.org/reports/tr22/</a>
compareNames :: String -> String -> Ordering

-- | Return the aliases for a given converter or alias name.
aliases :: String -> [String]

-- | A list of the canonical names of all available converters.
converterNames :: [String]

-- | The list of supported standard names.
standardNames :: [String]


-- | String collation functions for Unicode, implemented as bindings to the
--   International Components for Unicode (ICU) libraries.
module Data.Text.ICU.Collate

-- | String collator type.
data MCollator
data Attribute

-- | Direction of secondary weights, used in French. <a>True</a>, results
--   in secondary weights being considered backwards, while <a>False</a>
--   treats secondary weights in the order in which they appear.
French :: Bool -> Attribute

-- | For handling variable elements. <a>NonIgnorable</a> is default.
AlternateHandling :: AlternateHandling -> Attribute

-- | Control the ordering of upper and lower case letters. <a>Nothing</a>
--   (the default) orders upper and lower case letters in accordance to
--   their tertiary weights.
CaseFirst :: (Maybe CaseFirst) -> Attribute

-- | Controls whether an extra case level (positioned before the third
--   level) is generated or not. When <a>False</a> (default), case level is
--   not generated; when <a>True</a>, the case level is generated. Contents
--   of the case level are affected by the value of the <a>CaseFirst</a>
--   attribute. A simple way to ignore accent differences in a string is to
--   set the strength to <a>Primary</a> and enable case level.
CaseLevel :: Bool -> Attribute

-- | Controls whether the normalization check and necessary normalizations
--   are performed. When <a>False</a> (default) no normalization check is
--   performed. The correctness of the result is guaranteed only if the
--   input data is in so-called <tt>FCD</tt> form (see users manual for
--   more info). When <a>True</a>, an incremental check is performed to see
--   whether the input data is in <tt>FCD</tt> form. If the data is not in
--   <tt>FCD</tt> form, incremental <tt>NFD</tt> normalization is
--   performed.
NormalizationMode :: Bool -> Attribute
Strength :: Strength -> Attribute

-- | When turned on, this attribute positions Hiragana before all
--   non-ignorables on quaternary level. This is a sneaky way to produce
--   JIS sort order.
HiraganaQuaternaryMode :: Bool -> Attribute

-- | When enabled, this attribute generates a collation key for the numeric
--   value of substrings of digits. This is a way to get '100' to sort
--   <i>after</i> '2'.
Numeric :: Bool -> Attribute

-- | Control the handling of variable weight elements.
data AlternateHandling

-- | Treat all codepoints with non-ignorable primary weights in the same
--   way.
NonIgnorable :: AlternateHandling

-- | Cause codepoints with primary weights that are equal to or below the
--   variable top value to be ignored on primary level and moved to the
--   quaternary level.
Shifted :: AlternateHandling

-- | Control the ordering of upper and lower case letters.
data CaseFirst

-- | Force upper case letters to sort before lower case.
UpperFirst :: CaseFirst

-- | Force lower case letters to sort before upper case.
LowerFirst :: CaseFirst

-- | The strength attribute. The usual strength for most locales (except
--   Japanese) is tertiary. Quaternary strength is useful when combined
--   with shifted setting for alternate handling attribute and for JIS x
--   4061 collation, when it is used to distinguish between Katakana and
--   Hiragana (this is achieved by setting <a>HiraganaQuaternaryMode</a>
--   mode to <a>True</a>). Otherwise, quaternary level is affected only by
--   the number of non ignorable code points in the string. Identical
--   strength is rarely useful, as it amounts to codepoints of the
--   <tt>NFD</tt> form of the string.
data Strength
Primary :: Strength
Secondary :: Strength
Tertiary :: Strength
Quaternary :: Strength
Identical :: Strength

-- | Open a <a>Collator</a> for comparing strings.
open :: LocaleName -> IO MCollator

-- | Compare two strings.
collate :: MCollator -> Text -> Text -> IO Ordering

-- | Compare two <a>CharIterator</a>s.
--   
--   If either iterator was constructed from a <a>ByteString</a>, it does
--   not need to be copied or converted internally, so this function can be
--   quite cheap.
collateIter :: MCollator -> CharIterator -> CharIterator -> IO Ordering

-- | Get the value of an <a>MCollator</a> attribute.
--   
--   It is safe to provide a dummy argument to an <a>Attribute</a>
--   constructor when using this function, so the following will work:
--   
--   <pre>
--   getAttribute mcol (NormalizationMode undefined)
--   </pre>
getAttribute :: MCollator -> Attribute -> IO Attribute

-- | Set the value of an <a>MCollator</a> attribute.
setAttribute :: MCollator -> Attribute -> IO ()

-- | Create a key for sorting the <a>Text</a> using the given
--   <a>Collator</a>. The result of comparing two <a>ByteString</a>s that
--   have been transformed with <a>sortKey</a> will be the same as the
--   result of <a>collate</a> on the two untransformed <a>Text</a>s.
sortKey :: MCollator -> Text -> IO ByteString

-- | Make a copy of a mutable <a>MCollator</a>. Subsequent changes to the
--   input <a>MCollator</a> will not affect the state of the returned
--   <a>MCollator</a>.
clone :: MCollator -> IO MCollator

-- | Make a safe copy of a mutable <a>MCollator</a> for use in pure code.
--   Subsequent changes to the <a>MCollator</a> will not affect the state
--   of the returned <a>Collator</a>.
freeze :: MCollator -> IO Collator
instance GHC.Show.Show Data.Text.ICU.Collate.Attribute
instance GHC.Classes.Eq Data.Text.ICU.Collate.Attribute
instance GHC.Show.Show Data.Text.ICU.Collate.Strength
instance GHC.Enum.Enum Data.Text.ICU.Collate.Strength
instance GHC.Enum.Bounded Data.Text.ICU.Collate.Strength
instance GHC.Classes.Eq Data.Text.ICU.Collate.Strength
instance GHC.Show.Show Data.Text.ICU.Collate.CaseFirst
instance GHC.Enum.Enum Data.Text.ICU.Collate.CaseFirst
instance GHC.Enum.Bounded Data.Text.ICU.Collate.CaseFirst
instance GHC.Classes.Eq Data.Text.ICU.Collate.CaseFirst
instance GHC.Show.Show Data.Text.ICU.Collate.AlternateHandling
instance GHC.Enum.Enum Data.Text.ICU.Collate.AlternateHandling
instance GHC.Enum.Bounded Data.Text.ICU.Collate.AlternateHandling
instance GHC.Classes.Eq Data.Text.ICU.Collate.AlternateHandling
instance Control.DeepSeq.NFData Data.Text.ICU.Collate.AlternateHandling
instance Control.DeepSeq.NFData Data.Text.ICU.Collate.CaseFirst
instance Control.DeepSeq.NFData Data.Text.ICU.Collate.Strength
instance Control.DeepSeq.NFData Data.Text.ICU.Collate.Attribute


-- | Access to the Unicode Character Database, implemented as bindings to
--   the International Components for Unicode (ICU) libraries.
--   
--   Unicode assigns each code point (not just assigned character) values
--   for many properties. Most are simple boolean flags, or constants from
--   a small enumerated list. For some, values are relatively more complex
--   types.
--   
--   For more information see "About the Unicode Character Database"
--   <a>http://www.unicode.org/ucd/</a> and the ICU User Guide chapter on
--   Properties <a>http://icu-project.org/userguide/properties.html</a>.
module Data.Text.ICU.Char
class Property p v | p -> v
data BidiClass_
BidiClass :: BidiClass_
data Block_
Block :: Block_
data Bool_
Alphabetic :: Bool_

-- | 0-9, A-F, a-f
ASCIIHexDigit :: Bool_

-- | Format controls which have specific functions in the Bidi Algorithm.
BidiControl :: Bool_

-- | Characters that may change display in RTL text.
BidiMirrored :: Bool_

-- | Variations of dashes.
Dash :: Bool_

-- | Ignorable in most processing.
DefaultIgnorable :: Bool_

-- | The usage of deprecated characters is strongly discouraged.
Deprecated :: Bool_

-- | Characters that linguistically modify the meaning of another character
--   to which they apply.
Diacritic :: Bool_

-- | Extend the value or shape of a preceding alphabetic character, e.g.
--   length and iteration marks.
Extender :: Bool_
FullCompositionExclusion :: Bool_

-- | For programmatic determination of grapheme cluster boundaries.
GraphemeBase :: Bool_

-- | For programmatic determination of grapheme cluster boundaries.
GraphemeExtend :: Bool_

-- | For programmatic determination of grapheme cluster boundaries.
GraphemeLink :: Bool_

-- | Characters commonly used for hexadecimal numbers.
HexDigit :: Bool_

-- | Dashes used to mark connections between pieces of words, plus the
--   Katakana middle dot.
Hyphen :: Bool_

-- | Characters that can continue an identifier.
IDContinue :: Bool_

-- | Characters that can start an identifier.
IDStart :: Bool_

-- | CJKV ideographs.
Ideographic :: Bool_

-- | For programmatic determination of Ideographic Description Sequences.
IDSBinaryOperator :: Bool_
IDSTrinaryOperator :: Bool_

-- | Format controls for cursive joining and ligation.
JoinControl :: Bool_

-- | Characters that do not use logical order and require special handling
--   in most processing.
LogicalOrderException :: Bool_
Lowercase :: Bool_
Math :: Bool_

-- | Code points that are explicitly defined as illegal for the encoding of
--   characters.
NonCharacter :: Bool_
QuotationMark :: Bool_

-- | For programmatic determination of Ideographic Description Sequences.
Radical :: Bool_

-- | Characters with a "soft dot", like i or j. An accent placed on these
--   characters causes the dot to disappear.
SoftDotted :: Bool_

-- | Punctuation characters that generally mark the end of textual units.
TerminalPunctuation :: Bool_

-- | For programmatic determination of Ideographic Description Sequences.
UnifiedIdeograph :: Bool_
Uppercase :: Bool_
WhiteSpace :: Bool_

-- | <a>IDContinue</a> modified to allow closure under normalization forms
--   NFKC and NFKD.
XidContinue :: Bool_

-- | <a>IDStart</a> modified to allow closure under normalization forms
--   NFKC and NFKD.
XidStart :: Bool_

-- | Either the source of a case mapping or <i>in</i> the target of a case
--   mapping. Not the same as the general category <tt>Cased_Letter</tt>.
CaseSensitive :: Bool_

-- | Sentence Terminal. Used in UAX #29: Text Boundaries
--   <a>http://www.unicode.org/reports/tr29/</a>.
STerm :: Bool_

-- | Indicates all those characters that qualify as Variation Selectors.
--   For details on the behavior of these characters, see
--   <a>http://unicode.org/Public/UNIDATA/StandardizedVariants.html</a> and
--   15.6 Variation Selectors.
VariationSelector :: Bool_

-- | ICU-specific property for characters that are inert under NFD, i.e.
--   they do not interact with adjacent characters. Used for example in
--   normalizing transforms in incremental mode to find the boundary of
--   safely normalizable text despite possible text additions.
NFDInert :: Bool_

-- | ICU-specific property for characters that are inert under NFKD, i.e.
--   they do not interact with adjacent characters.
NFKDInert :: Bool_

-- | ICU-specific property for characters that are inert under NFC, i.e.
--   they do not interact with adjacent characters.
NFCInert :: Bool_

-- | ICU-specific property for characters that are inert under NFKC, i.e.
--   they do not interact with adjacent characters.
NFKCInert :: Bool_

-- | ICU-specific property for characters that are starters in terms of
--   Unicode normalization and combining character sequences.
SegmentStarter :: Bool_

-- | See UAX #31 Identifier and Pattern Syntax
--   <a>http://www.unicode.org/reports/tr31/</a>.
PatternSyntax :: Bool_

-- | See UAX #31 Identifier and Pattern Syntax
--   <a>http://www.unicode.org/reports/tr31/</a>.
PatternWhiteSpace :: Bool_

-- | Alphanumeric character class.
POSIXAlNum :: Bool_

-- | Blank character class.
POSIXBlank :: Bool_

-- | Graph character class.
POSIXGraph :: Bool_

-- | Printable character class.
POSIXPrint :: Bool_

-- | Hex digit character class.
POSIXXDigit :: Bool_
data Decomposition_
Decomposition :: Decomposition_
data EastAsianWidth_
EastAsianWidth :: EastAsianWidth_
data GeneralCategory_
GeneralCategory :: GeneralCategory_
data HangulSyllableType_
HangulSyllableType :: HangulSyllableType_
data JoiningGroup_
JoiningGroup :: JoiningGroup_
data JoiningType_
JoiningType :: JoiningType_
data NumericType_
NumericType :: NumericType_
data CanonicalCombiningClass_
CanonicalCombiningClass :: CanonicalCombiningClass_
data LeadCanonicalCombiningClass_
LeadCanonicalCombiningClass :: LeadCanonicalCombiningClass_
data TrailingCanonicalCombiningClass_
TrailingCanonicalCombiningClass :: TrailingCanonicalCombiningClass_
data NFCQuickCheck_
NFCQuickCheck :: NFCQuickCheck_
data NFDQuickCheck_
NFDQuickCheck :: NFDQuickCheck_
data NFKCQuickCheck_
NFKCQuickCheck :: NFKCQuickCheck_
data NFKDQuickCheck_
NFKDQuickCheck :: NFKDQuickCheck_
data GraphemeClusterBreak_
GraphemeClusterBreak :: GraphemeClusterBreak_
data LineBreak_
LineBreak :: LineBreak_
data SentenceBreak_
SentenceBreak :: SentenceBreak_
data WordBreak_
WordBreak :: WordBreak_

-- | Descriptions of Unicode blocks.
data BlockCode
NoBlock :: BlockCode
BasicLatin :: BlockCode
Latin1Supplement :: BlockCode
LatinExtendedA :: BlockCode
LatinExtendedB :: BlockCode
IPAExtensions :: BlockCode
SpacingModifierLetters :: BlockCode
CombiningDiacriticalMarks :: BlockCode
GreekAndCoptic :: BlockCode
Cyrillic :: BlockCode
Armenian :: BlockCode
Hebrew :: BlockCode
Arabic :: BlockCode
Syriac :: BlockCode
Thaana :: BlockCode
Devanagari :: BlockCode
Bengali :: BlockCode
Gurmukhi :: BlockCode
Gujarati :: BlockCode
Oriya :: BlockCode
Tamil :: BlockCode
Telugu :: BlockCode
Kannada :: BlockCode
Malayalam :: BlockCode
Sinhala :: BlockCode
Thai :: BlockCode
Lao :: BlockCode
Tibetan :: BlockCode
Myanmar :: BlockCode
Georgian :: BlockCode
HangulJamo :: BlockCode
Ethiopic :: BlockCode
Cherokee :: BlockCode
UnifiedCanadianAboriginalSyllabics :: BlockCode
Ogham :: BlockCode
Runic :: BlockCode
Khmer :: BlockCode
Mongolian :: BlockCode
LatinExtendedAdditional :: BlockCode
GreekExtended :: BlockCode
GeneralPunctuation :: BlockCode
SuperscriptsAndSubscripts :: BlockCode
CurrencySymbols :: BlockCode
CombiningDiacriticalMarksForSymbols :: BlockCode
LetterlikeSymbols :: BlockCode
NumberForms :: BlockCode
Arrows :: BlockCode
MathematicalOperators :: BlockCode
MiscellaneousTechnical :: BlockCode
ControlPictures :: BlockCode
OpticalCharacterRecognition :: BlockCode
EnclosedAlphanumerics :: BlockCode
BoxDrawing :: BlockCode
BlockElements :: BlockCode
GeometricShapes :: BlockCode
MiscellaneousSymbols :: BlockCode
Dingbats :: BlockCode
BraillePatterns :: BlockCode
CJKRadicalsSupplement :: BlockCode
KangxiRadicals :: BlockCode
IdeographicDescriptionCharacters :: BlockCode
CJKSymbolsAndPunctuation :: BlockCode
Hiragana :: BlockCode
Katakana :: BlockCode
Bopomofo :: BlockCode
HangulCompatibilityJamo :: BlockCode
Kanbun :: BlockCode
BopomofoExtended :: BlockCode
EnclosedCJKLettersAndMonths :: BlockCode
CJKCompatibility :: BlockCode
CJKUnifiedIdeographsExtensionA :: BlockCode
CJKUnifiedIdeographs :: BlockCode
YiSyllables :: BlockCode
YiRadicals :: BlockCode
HangulSyllables :: BlockCode
HighSurrogates :: BlockCode
HighPrivateUseSurrogates :: BlockCode
LowSurrogates :: BlockCode
PrivateUseArea :: BlockCode
CJKCompatibilityIdeographs :: BlockCode
AlphabeticPresentationForms :: BlockCode
ArabicPresentationFormsA :: BlockCode
CombiningHalfMarks :: BlockCode
CJKCompatibilityForms :: BlockCode
SmallFormVariants :: BlockCode
ArabicPresentationFormsB :: BlockCode
Specials :: BlockCode
HalfwidthAndFullwidthForms :: BlockCode
OldItalic :: BlockCode
Gothic :: BlockCode
Deseret :: BlockCode
ByzantineMusicalSymbols :: BlockCode
MusicalSymbols :: BlockCode
MathematicalAlphanumericSymbols :: BlockCode
CJKUnifiedIdeographsExtensionB :: BlockCode
CJKCompatibilityIdeographsSupplement :: BlockCode
Tags :: BlockCode
CyrillicSupplement :: BlockCode
Tagalog :: BlockCode
Hanunoo :: BlockCode
Buhid :: BlockCode
Tagbanwa :: BlockCode
MiscellaneousMathematicalSymbolsA :: BlockCode
SupplementalArrowsA :: BlockCode
SupplementalArrowsB :: BlockCode
MiscellaneousMathematicalSymbolsB :: BlockCode
SupplementalMathematicalOperators :: BlockCode
KatakanaPhoneticExtensions :: BlockCode
VariationSelectors :: BlockCode
SupplementaryPrivateUseAreaA :: BlockCode
SupplementaryPrivateUseAreaB :: BlockCode
Limbu :: BlockCode
TaiLe :: BlockCode
KhmerSymbols :: BlockCode
PhoneticExtensions :: BlockCode
MiscellaneousSymbolsAndArrows :: BlockCode
YijingHexagramSymbols :: BlockCode
LinearBSyllabary :: BlockCode
LinearBIdeograms :: BlockCode
AegeanNumbers :: BlockCode
Ugaritic :: BlockCode
Shavian :: BlockCode
Osmanya :: BlockCode
CypriotSyllabary :: BlockCode
TaiXuanJingSymbols :: BlockCode
VariationSelectorsSupplement :: BlockCode
AncientGreekMusicalNotation :: BlockCode
AncientGreekNumbers :: BlockCode
ArabicSupplement :: BlockCode
Buginese :: BlockCode
CJKStrokes :: BlockCode
CombiningDiacriticalMarksSupplement :: BlockCode
Coptic :: BlockCode
EthiopicExtended :: BlockCode
EthiopicSupplement :: BlockCode
GeorgianSupplement :: BlockCode
Glagolitic :: BlockCode
Kharoshthi :: BlockCode
ModifierToneLetters :: BlockCode
NewTaiLue :: BlockCode
OldPersian :: BlockCode
PhoneticExtensionsSupplement :: BlockCode
SupplementalPunctuation :: BlockCode
SylotiNagri :: BlockCode
Tifinagh :: BlockCode
VerticalForms :: BlockCode
N'Ko :: BlockCode
Balinese :: BlockCode
LatinExtendedC :: BlockCode
LatinExtendedD :: BlockCode
PhagsPa :: BlockCode
Phoenician :: BlockCode
Cuneiform :: BlockCode
CuneiformNumbersAndPunctuation :: BlockCode
CountingRodNumerals :: BlockCode
Sundanese :: BlockCode
Lepcha :: BlockCode
OlChiki :: BlockCode
CyrillicExtendedA :: BlockCode
Vai :: BlockCode
CyrillicExtendedB :: BlockCode
Saurashtra :: BlockCode
KayahLi :: BlockCode
Rejang :: BlockCode
Cham :: BlockCode
AncientSymbols :: BlockCode
PhaistosDisc :: BlockCode
Lycian :: BlockCode
Carian :: BlockCode
Lydian :: BlockCode
MahjongTiles :: BlockCode
DominoTiles :: BlockCode
Samaritan :: BlockCode
UnifiedCanadianAboriginalSyllabicsExtended :: BlockCode
TaiTham :: BlockCode
VedicExtensions :: BlockCode
Lisu :: BlockCode
Bamum :: BlockCode
CommonIndicNumberForms :: BlockCode
DevanagariExtended :: BlockCode
HangulJamoExtendedA :: BlockCode
Javanese :: BlockCode
MyanmarExtendedA :: BlockCode
TaiViet :: BlockCode
MeeteiMayek :: BlockCode
HangulJamoExtendedB :: BlockCode
ImperialAramaic :: BlockCode
OldSouthArabian :: BlockCode
Avestan :: BlockCode
InscriptionalParthian :: BlockCode
InscriptionalPahlavi :: BlockCode
OldTurkic :: BlockCode
RumiNumeralSymbols :: BlockCode
Kaithi :: BlockCode
EgyptianHieroglyphs :: BlockCode
EnclosedAlphanumericSupplement :: BlockCode
EnclosedIdeographicSupplement :: BlockCode
CJKUnifiedIdeographsExtensionC :: BlockCode
Mandaic :: BlockCode
Batak :: BlockCode
EthiopicExtendedA :: BlockCode
Brahmi :: BlockCode
BamumSupplement :: BlockCode
KanaSupplement :: BlockCode
PlayingCards :: BlockCode
MiscellaneousSymbolsAndPictographs :: BlockCode
Emoticons :: BlockCode
TransportAndMapSymbols :: BlockCode
AlchemicalSymbols :: BlockCode
CJKUnifiedIdeographsExtensionD :: BlockCode
ArabicExtendedA :: BlockCode
ArabicMathematicalAlphabeticSymbols :: BlockCode
Chakma :: BlockCode
MeeteiMayekExtensions :: BlockCode
MeroiticCursive :: BlockCode
MeroiticHieroglyphs :: BlockCode
Miao :: BlockCode
Sharada :: BlockCode
SoraSompeng :: BlockCode
SundaneseSupplement :: BlockCode
Takri :: BlockCode

-- | The language directional property of a character set.
data Direction
LeftToRight :: Direction
RightToLeft :: Direction
EuropeanNumber :: Direction
EuropeanNumberSeparator :: Direction
EuropeanNumberTerminator :: Direction
ArabicNumber :: Direction
CommonNumberSeparator :: Direction
BlockSeparator :: Direction
SegmentSeparator :: Direction
WhiteSpaceNeutral :: Direction
OtherNeutral :: Direction
LeftToRightEmbedding :: Direction
LeftToRightOverride :: Direction
RightToLeftArabic :: Direction
RightToLeftEmbedding :: Direction
RightToLeftOverride :: Direction
PopDirectionalFormat :: Direction
DirNonSpacingMark :: Direction
BoundaryNeutral :: Direction
data Decomposition
Canonical :: Decomposition
Compat :: Decomposition
Circle :: Decomposition
Final :: Decomposition
Font :: Decomposition
Fraction :: Decomposition
Initial :: Decomposition
Isolated :: Decomposition
Medial :: Decomposition
Narrow :: Decomposition
NoBreak :: Decomposition
Small :: Decomposition
Square :: Decomposition
Sub :: Decomposition
Super :: Decomposition
Vertical :: Decomposition
Wide :: Decomposition
Count :: Decomposition
data EastAsianWidth
EANeutral :: EastAsianWidth
EAAmbiguous :: EastAsianWidth
EAHalf :: EastAsianWidth
EAFull :: EastAsianWidth
EANarrow :: EastAsianWidth
EAWide :: EastAsianWidth
EACount :: EastAsianWidth
data GeneralCategory
GeneralOtherType :: GeneralCategory
UppercaseLetter :: GeneralCategory
LowercaseLetter :: GeneralCategory
TitlecaseLetter :: GeneralCategory
ModifierLetter :: GeneralCategory
OtherLetter :: GeneralCategory
NonSpacingMark :: GeneralCategory
EnclosingMark :: GeneralCategory
CombiningSpacingMark :: GeneralCategory
DecimalDigitNumber :: GeneralCategory
LetterNumber :: GeneralCategory
OtherNumber :: GeneralCategory
SpaceSeparator :: GeneralCategory
LineSeparator :: GeneralCategory
ParagraphSeparator :: GeneralCategory
ControlChar :: GeneralCategory
FormatChar :: GeneralCategory
PrivateUseChar :: GeneralCategory
Surrogate :: GeneralCategory
DashPunctuation :: GeneralCategory
StartPunctuation :: GeneralCategory
EndPunctuation :: GeneralCategory
ConnectorPunctuation :: GeneralCategory
OtherPunctuation :: GeneralCategory
MathSymbol :: GeneralCategory
CurrencySymbol :: GeneralCategory
ModifierSymbol :: GeneralCategory
OtherSymbol :: GeneralCategory
InitialPunctuation :: GeneralCategory
FinalPunctuation :: GeneralCategory
data HangulSyllableType
LeadingJamo :: HangulSyllableType
VowelJamo :: HangulSyllableType
TrailingJamo :: HangulSyllableType
LVSyllable :: HangulSyllableType
LVTSyllable :: HangulSyllableType
data JoiningGroup
Ain :: JoiningGroup
Alaph :: JoiningGroup
Alef :: JoiningGroup
Beh :: JoiningGroup
Beth :: JoiningGroup
Dal :: JoiningGroup
DalathRish :: JoiningGroup
E :: JoiningGroup
Feh :: JoiningGroup
FinalSemkath :: JoiningGroup
Gaf :: JoiningGroup
Gamal :: JoiningGroup
Hah :: JoiningGroup
HamzaOnHehGoal :: JoiningGroup
He :: JoiningGroup
Heh :: JoiningGroup
HehGoal :: JoiningGroup
Heth :: JoiningGroup
Kaf :: JoiningGroup
Kaph :: JoiningGroup
KnottedHeh :: JoiningGroup
Lam :: JoiningGroup
Lamadh :: JoiningGroup
Meem :: JoiningGroup
Mim :: JoiningGroup
Noon :: JoiningGroup
Nun :: JoiningGroup
Pe :: JoiningGroup
Qaf :: JoiningGroup
Qaph :: JoiningGroup
Reh :: JoiningGroup
ReversedPe :: JoiningGroup
Sad :: JoiningGroup
Sadhe :: JoiningGroup
Seen :: JoiningGroup
Semkath :: JoiningGroup
Shin :: JoiningGroup
SwashKaf :: JoiningGroup
SyriacWaw :: JoiningGroup
Tah :: JoiningGroup
Taw :: JoiningGroup
TehMarbuta :: JoiningGroup
Teth :: JoiningGroup
Waw :: JoiningGroup
Yeh :: JoiningGroup
YehBarree :: JoiningGroup
YehWithTail :: JoiningGroup
Yudh :: JoiningGroup
YudhHe :: JoiningGroup
Zain :: JoiningGroup
Fe :: JoiningGroup
Khaph :: JoiningGroup
Zhain :: JoiningGroup
BurushaskiYehBarree :: JoiningGroup
data JoiningType
JoinCausing :: JoiningType
DualJoining :: JoiningType
LeftJoining :: JoiningType
RightJoining :: JoiningType
Transparent :: JoiningType
data NumericType
NTDecimal :: NumericType
NTDigit :: NumericType
NTNumeric :: NumericType
data GraphemeClusterBreak
Control :: GraphemeClusterBreak
CR :: GraphemeClusterBreak
Extend :: GraphemeClusterBreak
L :: GraphemeClusterBreak
LF :: GraphemeClusterBreak
LV :: GraphemeClusterBreak
LVT :: GraphemeClusterBreak
T :: GraphemeClusterBreak
V :: GraphemeClusterBreak
SpacingMark :: GraphemeClusterBreak
Prepend :: GraphemeClusterBreak
data LineBreak
Ambiguous :: LineBreak
LBAlphabetic :: LineBreak
BreakBoth :: LineBreak
BreakAfter :: LineBreak
BreakBefore :: LineBreak
MandatoryBreak :: LineBreak
ContingentBreak :: LineBreak
ClosePunctuation :: LineBreak
CombiningMark :: LineBreak
CarriageReturn :: LineBreak
Exclamation :: LineBreak
Glue :: LineBreak
LBHyphen :: LineBreak
LBIdeographic :: LineBreak
Inseparable :: LineBreak
InfixNumeric :: LineBreak
LineFeed :: LineBreak
Nonstarter :: LineBreak
Numeric :: LineBreak
OpenPunctuation :: LineBreak
PostfixNumeric :: LineBreak
PrefixNumeric :: LineBreak
Quotation :: LineBreak
ComplexContext :: LineBreak
LBSurrogate :: LineBreak
Space :: LineBreak
BreakSymbols :: LineBreak
Zwspace :: LineBreak
NextLine :: LineBreak
WordJoiner :: LineBreak
H2 :: LineBreak
H3 :: LineBreak
JL :: LineBreak
JT :: LineBreak
JV :: LineBreak
data SentenceBreak
SBATerm :: SentenceBreak
SBClose :: SentenceBreak
SBFormat :: SentenceBreak
SBLower :: SentenceBreak
SBNumeric :: SentenceBreak
SBOLetter :: SentenceBreak
SBSep :: SentenceBreak
SBSP :: SentenceBreak
SBSTerm :: SentenceBreak
SBUpper :: SentenceBreak
SBCR :: SentenceBreak
SBExtend :: SentenceBreak
SBLF :: SentenceBreak
SBSContinue :: SentenceBreak
data WordBreak
WBALetter :: WordBreak
WBFormat :: WordBreak
WBKatakana :: WordBreak
WBMidLetter :: WordBreak
WBMidNum :: WordBreak
WBNumeric :: WordBreak
WBExtendNumLet :: WordBreak
WBCR :: WordBreak
WBExtend :: WordBreak
WBLF :: WordBreak
WBMidNumLet :: WordBreak
WBNewline :: WordBreak

-- | Return the Unicode allocation block that contains the given character.
blockCode :: Char -> BlockCode

-- | Return the full name of a Unicode character.
--   
--   Compared to <a>charName</a>, this function gives each Unicode code
--   point a unique extended name. Extended names are lowercase followed by
--   an uppercase hexadecimal number, within angle brackets.
charFullName :: Char -> String

-- | Return the name of a Unicode character.
--   
--   The names of all unassigned characters are empty.
--   
--   The name contains only "invariant" characters like A-Z, 0-9, space,
--   and '-'.
charName :: Char -> String

-- | Find a Unicode character by its full or extended name, and return its
--   code point value.
--   
--   The name is matched exactly and completely.
--   
--   A Unicode 1.0 name is matched only if it differs from the modern name.
--   
--   Compared to <a>charFromName</a>, this function gives each Unicode code
--   point a unique extended name. Extended names are lowercase followed by
--   an uppercase hexadecimal number, within angle brackets.
charFromFullName :: String -> Maybe Char

-- | Find a Unicode character by its full name, and return its code point
--   value.
--   
--   The name is matched exactly and completely.
--   
--   A Unicode 1.0 name is matched only if it differs from the modern name.
--   Unicode names are all uppercase.
charFromName :: String -> Maybe Char
combiningClass :: Char -> Int

-- | Return the bidirectional category value for the code point, which is
--   used in the Unicode bidirectional algorithm (UAX #9
--   <a>http://www.unicode.org/reports/tr9/</a>).
direction :: Char -> Direction
property :: Property p v => p -> Char -> v

-- | Determine whether the code point has the <a>BidiMirrored</a> property.
--   This property is set for characters that are commonly used in
--   Right-To-Left contexts and need to be displayed with a "mirrored"
--   glyph.
isMirrored :: Char -> Bool
mirror :: Char -> Char

-- | Return the decimal digit value of a decimal digit character. Such
--   characters have the general category <tt>Nd</tt> (decimal digit
--   numbers) and a <a>NumericType</a> of <a>NTDecimal</a>.
--   
--   No digit values are returned for any Han characters, because Han
--   number characters are often used with a special Chinese-style number
--   format (with characters for powers of 10 in between) instead of in
--   decimal-positional notation. Unicode 4 explicitly assigns Han number
--   characters a <a>NumericType</a> of <a>NTNumeric</a> instead of
--   <a>NTDecimal</a>.
digitToInt :: Char -> Maybe Int

-- | Return the numeric value for a Unicode code point as defined in the
--   Unicode Character Database.
--   
--   A <a>Double</a> return type is necessary because some numeric values
--   are fractions, negative, or too large to fit in a fixed-width integral
--   type.
numericValue :: Char -> Maybe Double
instance GHC.Show.Show Data.Text.ICU.Char.WordBreak
instance GHC.Enum.Enum Data.Text.ICU.Char.WordBreak
instance GHC.Classes.Eq Data.Text.ICU.Char.WordBreak
instance GHC.Show.Show Data.Text.ICU.Char.WordBreak_
instance GHC.Show.Show Data.Text.ICU.Char.SentenceBreak
instance GHC.Enum.Enum Data.Text.ICU.Char.SentenceBreak
instance GHC.Classes.Eq Data.Text.ICU.Char.SentenceBreak
instance GHC.Show.Show Data.Text.ICU.Char.SentenceBreak_
instance GHC.Show.Show Data.Text.ICU.Char.GraphemeClusterBreak
instance GHC.Enum.Enum Data.Text.ICU.Char.GraphemeClusterBreak
instance GHC.Classes.Eq Data.Text.ICU.Char.GraphemeClusterBreak
instance GHC.Show.Show Data.Text.ICU.Char.GraphemeClusterBreak_
instance GHC.Show.Show Data.Text.ICU.Char.TrailingCanonicalCombiningClass_
instance GHC.Show.Show Data.Text.ICU.Char.LeadCanonicalCombiningClass_
instance GHC.Show.Show Data.Text.ICU.Char.NFKDQuickCheck_
instance GHC.Show.Show Data.Text.ICU.Char.NFKCQuickCheck_
instance GHC.Show.Show Data.Text.ICU.Char.NFDQuickCheck_
instance GHC.Show.Show Data.Text.ICU.Char.NFCQuickCheck_
instance GHC.Show.Show Data.Text.ICU.Char.HangulSyllableType
instance GHC.Enum.Enum Data.Text.ICU.Char.HangulSyllableType
instance GHC.Classes.Eq Data.Text.ICU.Char.HangulSyllableType
instance GHC.Show.Show Data.Text.ICU.Char.HangulSyllableType_
instance GHC.Show.Show Data.Text.ICU.Char.NumericType
instance GHC.Enum.Enum Data.Text.ICU.Char.NumericType
instance GHC.Classes.Eq Data.Text.ICU.Char.NumericType
instance GHC.Show.Show Data.Text.ICU.Char.NumericType_
instance GHC.Show.Show Data.Text.ICU.Char.LineBreak
instance GHC.Enum.Enum Data.Text.ICU.Char.LineBreak
instance GHC.Classes.Eq Data.Text.ICU.Char.LineBreak
instance GHC.Show.Show Data.Text.ICU.Char.LineBreak_
instance GHC.Show.Show Data.Text.ICU.Char.JoiningType
instance GHC.Enum.Enum Data.Text.ICU.Char.JoiningType
instance GHC.Classes.Eq Data.Text.ICU.Char.JoiningType
instance GHC.Show.Show Data.Text.ICU.Char.JoiningType_
instance GHC.Show.Show Data.Text.ICU.Char.JoiningGroup
instance GHC.Enum.Enum Data.Text.ICU.Char.JoiningGroup
instance GHC.Classes.Eq Data.Text.ICU.Char.JoiningGroup
instance GHC.Show.Show Data.Text.ICU.Char.JoiningGroup_
instance GHC.Show.Show Data.Text.ICU.Char.GeneralCategory
instance GHC.Enum.Enum Data.Text.ICU.Char.GeneralCategory
instance GHC.Classes.Eq Data.Text.ICU.Char.GeneralCategory
instance GHC.Show.Show Data.Text.ICU.Char.GeneralCategory_
instance GHC.Show.Show Data.Text.ICU.Char.EastAsianWidth
instance GHC.Enum.Enum Data.Text.ICU.Char.EastAsianWidth
instance GHC.Classes.Eq Data.Text.ICU.Char.EastAsianWidth
instance GHC.Show.Show Data.Text.ICU.Char.EastAsianWidth_
instance GHC.Show.Show Data.Text.ICU.Char.Decomposition
instance GHC.Enum.Enum Data.Text.ICU.Char.Decomposition
instance GHC.Classes.Eq Data.Text.ICU.Char.Decomposition
instance GHC.Show.Show Data.Text.ICU.Char.Decomposition_
instance GHC.Show.Show Data.Text.ICU.Char.CanonicalCombiningClass_
instance GHC.Show.Show Data.Text.ICU.Char.BidiClass_
instance GHC.Show.Show Data.Text.ICU.Char.Bool_
instance GHC.Enum.Enum Data.Text.ICU.Char.Bool_
instance GHC.Classes.Eq Data.Text.ICU.Char.Bool_
instance GHC.Show.Show Data.Text.ICU.Char.BlockCode
instance GHC.Enum.Bounded Data.Text.ICU.Char.BlockCode
instance GHC.Enum.Enum Data.Text.ICU.Char.BlockCode
instance GHC.Classes.Eq Data.Text.ICU.Char.BlockCode
instance GHC.Show.Show Data.Text.ICU.Char.Direction
instance GHC.Enum.Enum Data.Text.ICU.Char.Direction
instance GHC.Classes.Eq Data.Text.ICU.Char.Direction
instance Control.DeepSeq.NFData Data.Text.ICU.Char.Direction
instance Control.DeepSeq.NFData Data.Text.ICU.Char.BlockCode
instance Control.DeepSeq.NFData Data.Text.ICU.Char.Bool_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.BidiClass_
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.BidiClass_ Data.Text.ICU.Char.Direction
instance Control.DeepSeq.NFData Data.Text.ICU.Char.Block_
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.Block_ Data.Text.ICU.Char.BlockCode
instance Control.DeepSeq.NFData Data.Text.ICU.Char.CanonicalCombiningClass_
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.CanonicalCombiningClass_ GHC.Types.Int
instance Control.DeepSeq.NFData Data.Text.ICU.Char.Decomposition_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.Decomposition
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.Decomposition_ (GHC.Base.Maybe Data.Text.ICU.Char.Decomposition)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.EastAsianWidth_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.EastAsianWidth
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.EastAsianWidth_ Data.Text.ICU.Char.EastAsianWidth
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.Bool_ GHC.Types.Bool
instance Control.DeepSeq.NFData Data.Text.ICU.Char.GeneralCategory_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.GeneralCategory
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.GeneralCategory_ Data.Text.ICU.Char.GeneralCategory
instance Control.DeepSeq.NFData Data.Text.ICU.Char.JoiningGroup_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.JoiningGroup
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.JoiningGroup_ (GHC.Base.Maybe Data.Text.ICU.Char.JoiningGroup)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.JoiningType_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.JoiningType
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.JoiningType_ (GHC.Base.Maybe Data.Text.ICU.Char.JoiningType)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.LineBreak_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.LineBreak
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.LineBreak_ (GHC.Base.Maybe Data.Text.ICU.Char.LineBreak)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.NumericType_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.NumericType
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.NumericType_ (GHC.Base.Maybe Data.Text.ICU.Char.NumericType)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.HangulSyllableType_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.HangulSyllableType
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.HangulSyllableType_ (GHC.Base.Maybe Data.Text.ICU.Char.HangulSyllableType)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.NFCQuickCheck_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.NFDQuickCheck_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.NFKCQuickCheck_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.NFKDQuickCheck_
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.NFCQuickCheck_ (GHC.Base.Maybe GHC.Types.Bool)
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.NFDQuickCheck_ (GHC.Base.Maybe GHC.Types.Bool)
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.NFKCQuickCheck_ (GHC.Base.Maybe GHC.Types.Bool)
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.NFKDQuickCheck_ (GHC.Base.Maybe GHC.Types.Bool)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.LeadCanonicalCombiningClass_
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.LeadCanonicalCombiningClass_ GHC.Types.Int
instance Control.DeepSeq.NFData Data.Text.ICU.Char.TrailingCanonicalCombiningClass_
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.TrailingCanonicalCombiningClass_ GHC.Types.Int
instance Control.DeepSeq.NFData Data.Text.ICU.Char.GraphemeClusterBreak_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.GraphemeClusterBreak
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.GraphemeClusterBreak_ (GHC.Base.Maybe Data.Text.ICU.Char.GraphemeClusterBreak)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.SentenceBreak_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.SentenceBreak
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.SentenceBreak_ (GHC.Base.Maybe Data.Text.ICU.Char.SentenceBreak)
instance Control.DeepSeq.NFData Data.Text.ICU.Char.WordBreak_
instance Control.DeepSeq.NFData Data.Text.ICU.Char.WordBreak
instance Data.Text.ICU.Char.Property Data.Text.ICU.Char.WordBreak_ (GHC.Base.Maybe Data.Text.ICU.Char.WordBreak)


-- | String breaking functions for Unicode, implemented as bindings to the
--   International Components for Unicode (ICU) libraries.
--   
--   The text boundary positions are found according to the rules described
--   in Unicode Standard Annex #29, Text Boundaries, and Unicode Standard
--   Annex #14, Line Breaking Properties. These are available at
--   <a>http://www.unicode.org/reports/tr14/</a> and
--   <a>http://www.unicode.org/reports/tr29/</a>.
module Data.Text.ICU.Break
data BreakIterator a

-- | Line break status.
data Line

-- | A soft line break is a position at which a line break is acceptable,
--   but not required.
Soft :: Line
Hard :: Line

-- | Word break status.
data Word

-- | A "word" that does not fit into another category. Includes spaces and
--   most punctuation.
Uncategorized :: Word

-- | A word that appears to be a number.
Number :: Word

-- | A word containing letters, excluding hiragana, katakana or ideographic
--   characters.
Letter :: Word

-- | A word containing kana characters.
Kana :: Word

-- | A word containing ideographic characters.
Ideograph :: Word

-- | Break a string on character boundaries.
--   
--   Character boundary analysis identifies the boundaries of "Extended
--   Grapheme Clusters", which are groupings of codepoints that should be
--   treated as character-like units for many text operations. Please see
--   Unicode Standard Annex #29, Unicode Text Segmentation,
--   <a>http://www.unicode.org/reports/tr29/</a> for additional information
--   on grapheme clusters and guidelines on their use.
breakCharacter :: LocaleName -> Text -> IO (BreakIterator ())

-- | Break a string on line boundaries.
--   
--   Line boundary analysis determines where a text string can be broken
--   when line wrapping. The mechanism correctly handles punctuation and
--   hyphenated words.
breakLine :: LocaleName -> Text -> IO (BreakIterator Line)

-- | Break a string on sentence boundaries.
--   
--   Sentence boundary analysis allows selection with correct
--   interpretation of periods within numbers and abbreviations, and
--   trailing punctuation marks such as quotation marks and parentheses.
breakSentence :: LocaleName -> Text -> IO (BreakIterator ())

-- | Break a string on word boundaries.
--   
--   Word boundary analysis is used by search and replace functions, as
--   well as within text editing applications that allow the user to select
--   words with a double click. Word selection provides correct
--   interpretation of punctuation marks within and following words.
--   Characters that are not part of a word, such as symbols or punctuation
--   marks, have word breaks on both sides.
breakWord :: LocaleName -> Text -> IO (BreakIterator Word)

-- | Thread safe cloning operation. This is substantially faster than
--   creating a new <a>BreakIterator</a> from scratch.
clone :: BreakIterator a -> IO (BreakIterator a)

-- | Point an existing <a>BreakIterator</a> at a new piece of text.
setText :: BreakIterator a -> Text -> IO ()

-- | Return the character index most recently returned by <a>next</a>,
--   <a>previous</a>, <a>first</a>, or <a>last</a>.
current :: BreakIterator a -> IO (Maybe I16)

-- | Reset the breaker to the beginning of the text to be scanned.
first :: BreakIterator a -> IO I16

-- | Reset the breaker to the end of the text to be scanned.
last :: BreakIterator a -> IO I16

-- | Advance the iterator and break at the text boundary that follows the
--   current text boundary.
next :: BreakIterator a -> IO (Maybe I16)

-- | Advance the iterator and break at the text boundary that precedes the
--   current text boundary.
previous :: BreakIterator a -> IO (Maybe I16)

-- | Determine the text boundary preceding the specified offset.
preceding :: BreakIterator a -> Int -> IO (Maybe I16)

-- | Determine the text boundary following the specified offset.
following :: BreakIterator a -> Int -> IO (Maybe I16)

-- | Determine whether the specfied position is a boundary position. As a
--   side effect, leaves the iterator pointing to the first boundary
--   position at or after the given offset.
isBoundary :: BreakIterator a -> Int -> IO Bool

-- | Return the status from the break rule that determined the most
--   recently returned break position. For rules that do not specify a
--   status, a default value of <tt>()</tt> is returned.
getStatus :: BreakIterator a -> IO a

-- | Return statuses from all of the break rules that determined the most
--   recently returned break position.
getStatuses :: BreakIterator a -> IO [a]

-- | Locales for which text breaking information is available. A
--   <a>BreakIterator</a> in a locale in this list will perform the correct
--   text breaking for the locale.
available :: [LocaleName]
instance GHC.Enum.Enum Data.Text.ICU.Break.Word
instance GHC.Show.Show Data.Text.ICU.Break.Word
instance GHC.Classes.Eq Data.Text.ICU.Break.Word
instance GHC.Enum.Enum Data.Text.ICU.Break.Line
instance GHC.Show.Show Data.Text.ICU.Break.Line
instance GHC.Classes.Eq Data.Text.ICU.Break.Line
instance Control.DeepSeq.NFData Data.Text.ICU.Break.Line
instance Control.DeepSeq.NFData Data.Text.ICU.Break.Word


-- | Commonly used functions for Unicode, implemented as bindings to the
--   International Components for Unicode (ICU) libraries.
--   
--   This module contains only the most commonly used types and functions.
--   Other modules in this package expose richer interfaces.
module Data.Text.ICU

-- | The name of a locale.
data LocaleName

-- | The root locale. For a description of resource bundles and the root
--   resource, see
--   <a>http://userguide.icu-project.org/locale/resources</a>.
Root :: LocaleName

-- | A specific locale.
Locale :: String -> LocaleName

-- | The program's current locale.
Current :: LocaleName

-- | A boundary analyser.
data Breaker a

-- | A break in a string.
data Break a

-- | Prefix of the current break.
brkPrefix :: Break a -> Text

-- | Text of the current break.
brkBreak :: Break a -> Text

-- | Suffix of the current break.
brkSuffix :: Break a -> Text

-- | Status of the current break (only meaningful if <a>Line</a> or
--   <a>Word</a>).
brkStatus :: Break a -> a

-- | Line break status.
data Line

-- | A soft line break is a position at which a line break is acceptable,
--   but not required.
Soft :: Line
Hard :: Line

-- | Word break status.
data Word

-- | A "word" that does not fit into another category. Includes spaces and
--   most punctuation.
Uncategorized :: Word

-- | A word that appears to be a number.
Number :: Word

-- | A word containing letters, excluding hiragana, katakana or ideographic
--   characters.
Letter :: Word

-- | A word containing kana characters.
Kana :: Word

-- | A word containing ideographic characters.
Ideograph :: Word

-- | Break a string on character boundaries.
--   
--   Character boundary analysis identifies the boundaries of "Extended
--   Grapheme Clusters", which are groupings of codepoints that should be
--   treated as character-like units for many text operations. Please see
--   Unicode Standard Annex #29, Unicode Text Segmentation,
--   <a>http://www.unicode.org/reports/tr29/</a> for additional information
--   on grapheme clusters and guidelines on their use.
breakCharacter :: LocaleName -> Breaker ()

-- | Break a string on line boundaries.
--   
--   Line boundary analysis determines where a text string can be broken
--   when line wrapping. The mechanism correctly handles punctuation and
--   hyphenated words.
breakLine :: LocaleName -> Breaker Line

-- | Break a string on sentence boundaries.
--   
--   Sentence boundary analysis allows selection with correct
--   interpretation of periods within numbers and abbreviations, and
--   trailing punctuation marks such as quotation marks and parentheses.
breakSentence :: LocaleName -> Breaker ()

-- | Break a string on word boundaries.
--   
--   Word boundary analysis is used by search and replace functions, as
--   well as within text editing applications that allow the user to select
--   words with a double click. Word selection provides correct
--   interpretation of punctuation marks within and following words.
--   Characters that are not part of a word, such as symbols or punctuation
--   marks, have word breaks on both sides.
breakWord :: LocaleName -> Breaker Word

-- | Return a list of all breaks in a string, from left to right.
breaks :: Breaker a -> Text -> [Break a]

-- | Return a list of all breaks in a string, from right to left.
breaksRight :: Breaker a -> Text -> [Break a]

-- | Case-fold the characters in a string.
--   
--   Case folding is locale independent and not context sensitive, but
--   there is an option for treating the letter I specially for Turkic
--   languages. The result may be longer or shorter than the original.
toCaseFold :: Bool -> Text -> Text

-- | Lowercase the characters in a string.
--   
--   Casing is locale dependent and context sensitive. The result may be
--   longer or shorter than the original.
toLower :: LocaleName -> Text -> Text

-- | Uppercase the characters in a string.
--   
--   Casing is locale dependent and context sensitive. The result may be
--   longer or shorter than the original.
toUpper :: LocaleName -> Text -> Text

-- | A type that supports efficient iteration over Unicode characters.
--   
--   As an example of where this may be useful, a function using this type
--   may be able to iterate over a UTF-8 <a>ByteString</a> directly, rather
--   than first copying and converting it to an intermediate form. This
--   type also allows e.g. comparison between <a>Text</a> and
--   <a>ByteString</a>, with minimal overhead.
data CharIterator

-- | Construct a <a>CharIterator</a> from a Unicode string.
fromString :: String -> CharIterator

-- | Construct a <a>CharIterator</a> from a Unicode string.
fromText :: Text -> CharIterator

-- | Construct a <a>CharIterator</a> from a Unicode string encoded as a
--   UTF-8 <a>ByteString</a>. The validity of the encoded string is *not*
--   checked.
fromUtf8 :: ByteString -> CharIterator

-- | Normalization modes.
data NormalizationMode

-- | No decomposition/composition.
None :: NormalizationMode

-- | Canonical decomposition.
NFD :: NormalizationMode

-- | Compatibility decomposition.
NFKD :: NormalizationMode

-- | Canonical decomposition followed by canonical composition.
NFC :: NormalizationMode

-- | Compatibility decomposition followed by canonical composition.
NFKC :: NormalizationMode

-- | "Fast C or D" form.
FCD :: NormalizationMode

-- | Normalize a string according the specified normalization mode.
normalize :: NormalizationMode -> Text -> Text

-- | Perform an efficient check on a string, to quickly determine if the
--   string is in a particular normalization form.
--   
--   A <a>Nothing</a> result indicates that a definite answer could not be
--   determined quickly, and a more thorough check is required, e.g. with
--   <a>isNormalized</a>. The user may have to convert the string to its
--   normalized form and compare the results.
--   
--   A result of <a>Just</a> <a>True</a> or <a>Just</a> <a>False</a>
--   indicates that the string definitely is, or is not, in the given
--   normalization form.
quickCheck :: NormalizationMode -> Text -> Maybe Bool

-- | Indicate whether a string is in a given normalization form.
--   
--   Unlike <a>quickCheck</a>, this function returns a definitive result.
--   For <a>NFD</a>, <a>NFKD</a>, and <a>FCD</a> normalization forms, both
--   functions work in exactly the same ways. For <a>NFC</a> and
--   <a>NFKC</a> forms, where <a>quickCheck</a> may return <a>Nothing</a>,
--   this function will perform further tests to arrive at a definitive
--   result.
isNormalized :: NormalizationMode -> Text -> Bool

-- | Options to <a>compare</a>.
data CompareOption

-- | The caller knows that both strings fulfill the <a>FCD</a> conditions.
--   If <i>not</i> set, <a>compare</a> will <a>quickCheck</a> for
--   <a>FCD</a> and normalize if necessary.
InputIsFCD :: CompareOption

-- | Compare strings case-insensitively using case folding, instead of
--   case-sensitively. If set, then the following case folding options are
--   used.
CompareIgnoreCase :: CompareOption

-- | When case folding, exclude the special I character. For use with
--   Turkic (Turkish/Azerbaijani) text data.
FoldCaseExcludeSpecialI :: CompareOption

-- | Compare two strings for canonical equivalence. Further options include
--   case-insensitive comparison and code point order (as opposed to code
--   unit order).
--   
--   Canonical equivalence between two strings is defined as their
--   normalized forms (<a>NFD</a> or <a>NFC</a>) being identical. This
--   function compares strings incrementally instead of normalizing (and
--   optionally case-folding) both strings entirely, improving performance
--   significantly.
--   
--   Bulk normalization is only necessary if the strings do not fulfill the
--   <a>FCD</a> conditions. Only in this case, and only if the strings are
--   relatively long, is memory allocated temporarily. For <a>FCD</a>
--   strings and short non-<a>FCD</a> strings there is no memory
--   allocation.
compare :: [CompareOption] -> Text -> Text -> Ordering

-- | String collator type. <a>Collator</a>s are considered equal if they
--   will sort strings identically.
data Collator

-- | Create an immutable <a>Collator</a> for comparing strings.
--   
--   If <a>Root</a> is passed as the locale, UCA collation rules will be
--   used.
collator :: LocaleName -> Collator

-- | Create an immutable <a>Collator</a> with the given
--   <tt>Attribute</tt>s.
collatorWith :: LocaleName -> [Attribute] -> Collator

-- | Compare two strings.
collate :: Collator -> Text -> Text -> Ordering

-- | Compare two <a>CharIterator</a>s.
--   
--   If either iterator was constructed from a <a>ByteString</a>, it does
--   not need to be copied or converted beforehand, so this function can be
--   quite cheap.
collateIter :: Collator -> CharIterator -> CharIterator -> Ordering

-- | Create a key for sorting the <a>Text</a> using the given
--   <a>Collator</a>. The result of comparing two <a>ByteString</a>s that
--   have been transformed with <a>sortKey</a> will be the same as the
--   result of <a>collate</a> on the two untransformed <a>Text</a>s.
sortKey :: Collator -> Text -> ByteString

-- | A <a>Collator</a> that uses the Unicode Collation Algorithm (UCA).
uca :: Collator

-- | Options for controlling matching behaviour.
data MatchOption

-- | Enable case insensitive matching.
CaseInsensitive :: MatchOption

-- | Allow comments and white space within patterns.
Comments :: MatchOption

-- | If set, <tt>'.'</tt> matches line terminators. Otherwise <tt>'.'</tt>
--   matching stops at line end.
DotAll :: MatchOption

-- | If set, treat the entire pattern as a literal string. Metacharacters
--   or escape sequences in the input sequence will be given no special
--   meaning.
--   
--   The option <a>CaseInsensitive</a> retains its meanings on matching
--   when used in conjunction with this option. Other options become
--   superfluous.
Literal :: MatchOption

-- | Control behaviour of <tt>'$'</tt> and <tt>'^'</tt>. If set, recognize
--   line terminators within string, Otherwise, match only at start and end
--   of input string.
Multiline :: MatchOption

-- | Haskell-only line endings. When this mode is enabled, only
--   <tt>'\n'</tt> is recognized as a line ending in the behavior of
--   <tt>'.'</tt>, <tt>'^'</tt>, and <tt>'$'</tt>.
HaskellLines :: MatchOption

-- | Unicode word boundaries. If set, <tt>'\\b'</tt> uses the Unicode TR 29
--   definition of word boundaries.
--   
--   <i>Warning</i>: Unicode word boundaries are quite different from
--   traditional regular expression word boundaries. See
--   <a>http://unicode.org/reports/tr29/#Word_Boundaries</a>.
UnicodeWord :: MatchOption

-- | Throw an error on unrecognized backslash escapes. If set, fail with an
--   error on patterns that contain backslash-escaped ASCII letters without
--   a known special meaning. If this flag is not set, these escaped
--   letters represent themselves.
ErrorOnUnknownEscapes :: MatchOption

-- | Set a processing limit for match operations.
--   
--   Some patterns, when matching certain strings, can run in exponential
--   time. For practical purposes, the match operation may appear to be in
--   an infinite loop. When a limit is set a match operation will fail with
--   an error if the limit is exceeded.
--   
--   The units of the limit are steps of the match engine. Correspondence
--   with actual processor time will depend on the speed of the processor
--   and the details of the specific pattern, but will typically be on the
--   order of milliseconds.
--   
--   By default, the matching time is not limited.
WorkLimit :: Int -> MatchOption

-- | Set the amount of heap storage avaliable for use by the match
--   backtracking stack.
--   
--   ICU uses a backtracking regular expression engine, with the backtrack
--   stack maintained on the heap. This function sets the limit to the
--   amount of memory that can be used for this purpose. A backtracking
--   stack overflow will result in an error from the match operation that
--   caused it.
--   
--   A limit is desirable because a malicious or poorly designed pattern
--   can use excessive memory, potentially crashing the process. A limit is
--   enabled by default.
StackLimit :: Int -> MatchOption

-- | Detailed information about parsing errors. Used by ICU parsing engines
--   that parse long rules, patterns, or programs, where the text being
--   parsed is long enough that more information than an <a>ICUError</a> is
--   needed to localize the error.
data ParseError

-- | A match for a regular expression.
data Match

-- | A compiled regular expression.
--   
--   <a>Regex</a> values are usually constructed using the <a>regex</a> or
--   <a>regex'</a> functions. This type is also an instance of
--   <a>IsString</a>, so if you have the <tt>OverloadedStrings</tt>
--   language extension enabled, you can construct a <a>Regex</a> by simply
--   writing the pattern in quotes (though this does not allow you to
--   specify any <tt>Option</tt>s).
data Regex

-- | A typeclass for functions common to both <a>Match</a> and <a>Regex</a>
--   types.
class Regular r where regFp = Internal.reRe . regRe

-- | Compile a regular expression with the given options. This function
--   throws a <a>ParseError</a> if the pattern is invalid, so it is best
--   for use when the pattern is statically known.
regex :: [MatchOption] -> Text -> Regex

-- | Compile a regular expression with the given options. This is safest to
--   use when the pattern is constructed at run time.
regex' :: [MatchOption] -> Text -> Either ParseError Regex

-- | Return the source form of the pattern used to construct this regular
--   expression or match.
pattern :: Regular r => r -> Text

-- | Find the first match for the regular expression in the given text.
find :: Regex -> Text -> Maybe Match

-- | Lazily find all matches for the regular expression in the given text.
findAll :: Regex -> Text -> [Match]

-- | Return the number of capturing groups in this regular expression or
--   match's pattern.
groupCount :: Regular r => r -> Int

-- | A combinator for returning a list of all capturing groups on a
--   <a>Match</a>.
unfold :: (Int -> Match -> Maybe Text) -> Match -> [Text]

-- | Return the span of text between the end of the previous match and the
--   beginning of the current match.
span :: Match -> Text

-- | Return the <i>n</i>th capturing group in a match, or <a>Nothing</a> if
--   <i>n</i> is out of bounds.
group :: Int -> Match -> Maybe Text

-- | Return the prefix of the <i>n</i>th capturing group in a match (the
--   text from the start of the string to the start of the match), or
--   <a>Nothing</a> if <i>n</i> is out of bounds.
prefix :: Int -> Match -> Maybe Text

-- | Return the suffix of the <i>n</i>th capturing group in a match (the
--   text from the end of the match to the end of the string), or
--   <a>Nothing</a> if <i>n</i> is out of bounds.
suffix :: Int -> Match -> Maybe Text
