ICU 68.2  68.2
translit.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/17/99 aliu Creation.
10 **********************************************************************
11 */
12 #ifndef TRANSLIT_H
13 #define TRANSLIT_H
14 
15 #include "unicode/utypes.h"
16 
17 #if U_SHOW_CPLUSPLUS_API
18 
24 #if !UCONFIG_NO_TRANSLITERATION
25 
26 #include "unicode/uobject.h"
27 #include "unicode/unistr.h"
28 #include "unicode/parseerr.h"
29 #include "unicode/utrans.h" // UTransPosition, UTransDirection
30 #include "unicode/strenum.h"
31 
32 U_NAMESPACE_BEGIN
33 
34 class UnicodeFilter;
35 class UnicodeSet;
36 class TransliteratorParser;
37 class NormalizationTransliterator;
38 class TransliteratorIDParser;
39 
491 
492 private:
493 
497  UnicodeString ID;
498 
505  UnicodeFilter* filter;
506 
507  int32_t maximumContextLength;
508 
509  public:
510 
516  union Token {
521  int32_t integer;
526  void* pointer;
527  };
528 
529 #ifndef U_HIDE_INTERNAL_API
530 
535  inline static Token integerToken(int32_t);
536 
542  inline static Token pointerToken(void*);
543 #endif /* U_HIDE_INTERNAL_API */
544 
560  typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
561 
562 protected:
563 
573  Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
574 
580 
586 
599  const UnicodeString* canon);
600 
601  friend class TransliteratorParser; // for parseID()
602  friend class TransliteratorIDParser; // for createBasicInstance()
603  friend class TransliteratorAlias; // for setID()
604 
605 public:
606 
611  virtual ~Transliterator();
612 
627  virtual Transliterator* clone() const;
628 
644  virtual int32_t transliterate(Replaceable& text,
645  int32_t start, int32_t limit) const;
646 
652  virtual void transliterate(Replaceable& text) const;
653 
718  virtual void transliterate(Replaceable& text, UTransPosition& index,
719  const UnicodeString& insertion,
720  UErrorCode& status) const;
721 
737  virtual void transliterate(Replaceable& text, UTransPosition& index,
738  UChar32 insertion,
739  UErrorCode& status) const;
740 
754  virtual void transliterate(Replaceable& text, UTransPosition& index,
755  UErrorCode& status) const;
756 
768  virtual void finishTransliteration(Replaceable& text,
769  UTransPosition& index) const;
770 
771 private:
772 
788  void _transliterate(Replaceable& text,
789  UTransPosition& index,
790  const UnicodeString* insertion,
791  UErrorCode &status) const;
792 
793 protected:
794 
874  virtual void handleTransliterate(Replaceable& text,
875  UTransPosition& pos,
876  UBool incremental) const = 0;
877 
878 public:
890  virtual void filteredTransliterate(Replaceable& text,
891  UTransPosition& index,
892  UBool incremental) const;
893 
894 private:
895 
923  virtual void filteredTransliterate(Replaceable& text,
924  UTransPosition& index,
925  UBool incremental,
926  UBool rollback) const;
927 
928 public:
929 
943  int32_t getMaximumContextLength(void) const;
944 
945 protected:
946 
953  void setMaximumContextLength(int32_t maxContextLength);
954 
955 public:
956 
967  virtual const UnicodeString& getID(void) const;
968 
978  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
979  UnicodeString& result);
980 
1002  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
1003  const Locale& inLocale,
1004  UnicodeString& result);
1005 
1013  const UnicodeFilter* getFilter(void) const;
1014 
1025 
1036  void adoptFilter(UnicodeFilter* adoptedFilter);
1037 
1058 
1075  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
1076  UTransDirection dir,
1077  UParseError& parseError,
1078  UErrorCode& status);
1079 
1090  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
1091  UTransDirection dir,
1092  UErrorCode& status);
1093 
1111  static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
1112  const UnicodeString& rules,
1113  UTransDirection dir,
1114  UParseError& parseError,
1115  UErrorCode& status);
1116 
1129  UBool escapeUnprintable) const;
1130 
1143  int32_t countElements() const;
1144 
1164  const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
1165 
1182 
1197  virtual void handleGetSourceSet(UnicodeSet& result) const;
1198 
1212  virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
1213 
1214 public:
1215 
1232  static void U_EXPORT2 registerFactory(const UnicodeString& id,
1233  Factory factory,
1234  Token context);
1235 
1257  static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
1258 
1273  static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
1274  const UnicodeString& realID);
1275 
1276 protected:
1277 
1278 #ifndef U_HIDE_INTERNAL_API
1279 
1288  static void _registerFactory(const UnicodeString& id,
1289  Factory factory,
1290  Token context);
1291 
1295  static void _registerInstance(Transliterator* adoptedObj);
1296 
1300  static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
1301 
1335  static void _registerSpecialInverse(const UnicodeString& target,
1336  const UnicodeString& inverseTarget,
1337  UBool bidirectional);
1338 #endif /* U_HIDE_INTERNAL_API */
1339 
1340 public:
1341 
1359  static void U_EXPORT2 unregister(const UnicodeString& ID);
1360 
1361 public:
1362 
1373 
1379  static int32_t U_EXPORT2 countAvailableSources(void);
1380 
1390  static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
1391  UnicodeString& result);
1392 
1401  static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
1402 
1414  static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
1415  const UnicodeString& source,
1416  UnicodeString& result);
1417 
1425  static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
1426  const UnicodeString& target);
1427 
1441  static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
1442  const UnicodeString& source,
1443  const UnicodeString& target,
1444  UnicodeString& result);
1445 
1446 protected:
1447 
1448 #ifndef U_HIDE_INTERNAL_API
1449 
1453  static int32_t _countAvailableSources(void);
1454 
1459  static UnicodeString& _getAvailableSource(int32_t index,
1460  UnicodeString& result);
1461 
1466  static int32_t _countAvailableTargets(const UnicodeString& source);
1467 
1472  static UnicodeString& _getAvailableTarget(int32_t index,
1473  const UnicodeString& source,
1474  UnicodeString& result);
1475 
1480  static int32_t _countAvailableVariants(const UnicodeString& source,
1481  const UnicodeString& target);
1482 
1487  static UnicodeString& _getAvailableVariant(int32_t index,
1488  const UnicodeString& source,
1489  const UnicodeString& target,
1490  UnicodeString& result);
1491 #endif /* U_HIDE_INTERNAL_API */
1492 
1493 protected:
1494 
1501  void setID(const UnicodeString& id);
1502 
1503 public:
1504 
1515  static UClassID U_EXPORT2 getStaticClassID(void);
1516 
1532  virtual UClassID getDynamicClassID(void) const = 0;
1533 
1534 private:
1535  static UBool initializeRegistry(UErrorCode &status);
1536 
1537 public:
1538 #ifndef U_HIDE_OBSOLETE_API
1539 
1546  static int32_t U_EXPORT2 countAvailableIDs(void);
1547 
1560  static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
1561 #endif /* U_HIDE_OBSOLETE_API */
1562 };
1563 
1564 inline int32_t Transliterator::getMaximumContextLength(void) const {
1565  return maximumContextLength;
1566 }
1567 
1568 inline void Transliterator::setID(const UnicodeString& id) {
1569  ID = id;
1570  // NUL-terminate the ID string, which is a non-aliased copy.
1571  ID.append((char16_t)0);
1572  ID.truncate(ID.length()-1);
1573 }
1574 
1575 #ifndef U_HIDE_INTERNAL_API
1576 inline Transliterator::Token Transliterator::integerToken(int32_t i) {
1577  Token t;
1578  t.integer = i;
1579  return t;
1580 }
1581 
1582 inline Transliterator::Token Transliterator::pointerToken(void* p) {
1583  Token t;
1584  t.pointer = p;
1585  return t;
1586 }
1587 #endif /* U_HIDE_INTERNAL_API */
1588 
1589 U_NAMESPACE_END
1590 
1591 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1592 
1593 #endif /* U_SHOW_CPLUSPLUS_API */
1594 
1595 #endif
icu::Transliterator::transliterate
virtual void transliterate(Replaceable &text, UTransPosition &index, const UnicodeString &insertion, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly after new text ...
icu::Transliterator::transliterate
virtual int32_t transliterate(Replaceable &text, int32_t start, int32_t limit) const
Transliterates a segment of a string, with optional filtering.
UTransPosition
Position structure for utrans_transIncremental() incremental transliteration.
Definition: utrans.h:125
icu::Transliterator::Token::integer
int32_t integer
This token, interpreted as a 32-bit integer.
Definition: translit.h:521
parseerr.h
C API: Parse Error Information.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::Transliterator::Token::pointer
void * pointer
This token, interpreted as a native pointer.
Definition: translit.h:526
icu::UnicodeSet
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:281
U_I18N_API
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:301
icu::Transliterator::_countAvailableSources
static int32_t _countAvailableSources(void)
Non-mutexed internal method.
UBool
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
icu::Replaceable
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
icu::Transliterator::~Transliterator
virtual ~Transliterator()
Destructor.
icu::Transliterator::_getAvailableVariant
static UnicodeString & _getAvailableVariant(int32_t index, const UnicodeString &source, const UnicodeString &target, UnicodeString &result)
Non-mutexed internal method.
icu::Transliterator::getDynamicClassID
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID polymorphically.
icu::UnicodeString::append
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4622
icu::Transliterator::orphanFilter
UnicodeFilter * orphanFilter(void)
Returns the filter used by this transliterator, or NULL if this transliterator uses no filter.
icu::Transliterator::countAvailableIDs
static int32_t countAvailableIDs(void)
Return the number of IDs currently registered with the system.
icu::Transliterator::countAvailableTargets
static int32_t countAvailableTargets(const UnicodeString &source)
Return the number of registered target specifiers for a given source specifier.
icu::Transliterator::unregister
static void unregister(const UnicodeString &ID)
Unregisters a transliterator or class.
icu::Transliterator::_registerFactory
static void _registerFactory(const UnicodeString &id, Factory factory, Token context)
icu::Transliterator::getDisplayName
static UnicodeString & getDisplayName(const UnicodeString &ID, UnicodeString &result)
Returns a name for this transliterator that is appropriate for display to the user in the default loc...
icu::Transliterator::getAvailableVariant
static UnicodeString & getAvailableVariant(int32_t index, const UnicodeString &source, const UnicodeString &target, UnicodeString &result)
Return a registered variant specifier for a given source-target pair.
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
icu::Transliterator::handleTransliterate
virtual void handleTransliterate(Replaceable &text, UTransPosition &pos, UBool incremental) const =0
Abstract method that concrete subclasses define to implement their transliteration algorithm.
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
icu::Transliterator::registerAlias
static void registerAlias(const UnicodeString &aliasID, const UnicodeString &realID)
Registers an ID string as an alias of another ID string.
icu::Transliterator::_registerInstance
static void _registerInstance(Transliterator *adoptedObj)
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:467
icu::UObject
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
icu::Transliterator::getDisplayName
static UnicodeString & getDisplayName(const UnicodeString &ID, const Locale &inLocale, UnicodeString &result)
Returns a name for this transliterator that is appropriate for display to the user in the given local...
icu::Transliterator::Token
A context integer or pointer for a factory function, passed by value.
Definition: translit.h:516
icu::Transliterator::getFilter
const UnicodeFilter * getFilter(void) const
Returns the filter used by this transliterator, or NULL if this transliterator uses no filter.
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
UErrorCode
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
icu::Transliterator::getElement
const Transliterator & getElement(int32_t index, UErrorCode &ec) const
Return an element that makes up this transliterator.
icu::StringEnumeration
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:61
icu::Transliterator::_countAvailableTargets
static int32_t _countAvailableTargets(const UnicodeString &source)
Non-mutexed internal method.
icu::Transliterator::transliterate
virtual void transliterate(Replaceable &text, UTransPosition &index, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly.
icu::Transliterator::setMaximumContextLength
void setMaximumContextLength(int32_t maxContextLength)
Method for subclasses to use to set the maximum context length.
icu::Transliterator::operator=
Transliterator & operator=(const Transliterator &)
Assignment operator.
icu::Transliterator::createBasicInstance
static Transliterator * createBasicInstance(const UnicodeString &id, const UnicodeString *canon)
Create a transliterator from a basic ID.
icu::Transliterator::getAvailableID
static const UnicodeString & getAvailableID(int32_t index)
Return the index-th available ID.
icu::Transliterator::transliterate
virtual void transliterate(Replaceable &text, UTransPosition &index, UChar32 insertion, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly after a new cha...
UTransDirection
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules o...
Definition: utrans.h:83
icu::Transliterator::createInstance
static Transliterator * createInstance(const UnicodeString &ID, UTransDirection dir, UErrorCode &status)
Returns a Transliterator object given its ID.
icu::Transliterator::Factory
Transliterator *(* Factory)(const UnicodeString &ID, Token context)
A function that creates and returns a Transliterator.
Definition: translit.h:560
icu::Transliterator
Transliterator is an abstract class that transliterates text from one format to another.
Definition: translit.h:490
icu::Transliterator::Transliterator
Transliterator(const Transliterator &)
Copy constructor.
icu::Transliterator::toRules
virtual UnicodeString & toRules(UnicodeString &result, UBool escapeUnprintable) const
Create a rule string that can be passed to createFromRules() to recreate this transliterator.
icu::Transliterator::getStaticClassID
static UClassID getStaticClassID(void)
Return the class ID for this class.
icu::Transliterator::_countAvailableVariants
static int32_t _countAvailableVariants(const UnicodeString &source, const UnicodeString &target)
Non-mutexed internal method.
icu::Transliterator::createInstance
static Transliterator * createInstance(const UnicodeString &ID, UTransDirection dir, UParseError &parseError, UErrorCode &status)
Returns a Transliterator object given its ID.
icu::Transliterator::getTargetSet
virtual UnicodeSet & getTargetSet(UnicodeSet &result) const
Returns the set of all characters that may be generated as replacement text by this transliterator.
icu::Transliterator::registerFactory
static void registerFactory(const UnicodeString &id, Factory factory, Token context)
Registers a factory function that creates transliterators of a given ID.
icu::Transliterator::getAvailableSource
static UnicodeString & getAvailableSource(int32_t index, UnicodeString &result)
Return a registered source specifier.
icu::Transliterator::createFromRules
static Transliterator * createFromRules(const UnicodeString &ID, const UnicodeString &rules, UTransDirection dir, UParseError &parseError, UErrorCode &status)
Returns a Transliterator object constructed from the given rule string.
icu::Transliterator::getAvailableTarget
static UnicodeString & getAvailableTarget(int32_t index, const UnicodeString &source, UnicodeString &result)
Return a registered target specifier for a given source.
icu::Transliterator::_registerSpecialInverse
static void _registerSpecialInverse(const UnicodeString &target, const UnicodeString &inverseTarget, UBool bidirectional)
Register two targets as being inverses of one another.
icu::Transliterator::adoptFilter
void adoptFilter(UnicodeFilter *adoptedFilter)
Changes the filter used by this transliterator.
icu::Transliterator::getID
virtual const UnicodeString & getID(void) const
Returns a programmatic identifier for this transliterator.
icu::Transliterator::transliterate
virtual void transliterate(Replaceable &text) const
Transliterates an entire string in place.
icu::Transliterator::handleGetSourceSet
virtual void handleGetSourceSet(UnicodeSet &result) const
Framework method that returns the set of all characters that may be modified in the input text by thi...
icu::Transliterator::getSourceSet
UnicodeSet & getSourceSet(UnicodeSet &result) const
Returns the set of all characters that may be modified in the input text by this Transliterator.
icu::Transliterator::_registerAlias
static void _registerAlias(const UnicodeString &aliasID, const UnicodeString &realID)
icu::Transliterator::Transliterator
Transliterator(const UnicodeString &ID, UnicodeFilter *adoptedFilter)
Default constructor.
icu::Transliterator::countElements
int32_t countElements() const
Return the number of elements that make up this transliterator.
strenum.h
C++ API: String Enumeration.
icu::Transliterator::clone
virtual Transliterator * clone() const
Implements Cloneable.
utrans.h
C API: Transliterator.
icu::Transliterator::finishTransliteration
virtual void finishTransliteration(Replaceable &text, UTransPosition &index) const
Finishes any pending transliterations that were waiting for more characters.
icu::UnicodeString::truncate
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4730
icu::Transliterator::createInverse
Transliterator * createInverse(UErrorCode &status) const
Returns this transliterator's inverse.
uobject.h
C++ API: Common ICU base class UObject.
icu::Transliterator::countAvailableVariants
static int32_t countAvailableVariants(const UnicodeString &source, const UnicodeString &target)
Return the number of registered variant specifiers for a given source-target pair.
icu::Transliterator::_getAvailableTarget
static UnicodeString & _getAvailableTarget(int32_t index, const UnicodeString &source, UnicodeString &result)
Non-mutexed internal method.
icu::Transliterator::registerInstance
static void registerInstance(Transliterator *adoptedObj)
Registers an instance obj of a subclass of Transliterator with the system.
icu::UnicodeFilter
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:65
icu::Transliterator::countAvailableSources
static int32_t countAvailableSources(void)
Return the number of registered source specifiers.
icu::Locale
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
icu::Transliterator::getAvailableIDs
static StringEnumeration * getAvailableIDs(UErrorCode &ec)
Return a StringEnumeration over the IDs available at the time of the call, including user-registered ...
unistr.h
C++ API: Unicode String.
icu::Transliterator::filteredTransliterate
virtual void filteredTransliterate(Replaceable &text, UTransPosition &index, UBool incremental) const
Transliterate a substring of text, as specified by index, taking filters into account.
icu::Transliterator::_getAvailableSource
static UnicodeString & _getAvailableSource(int32_t index, UnicodeString &result)
Non-mutexed internal method.