summaryrefslogtreecommitdiffstats
path: root/src/lib/base/Unicode.h
blob: 1391c1e7249b8190671e543a26b9cfc716e44857 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*
 * barrier -- mouse and keyboard sharing utility
 * Copyright (C) 2012-2016 Symless Ltd.
 * Copyright (C) 2002 Chris Schoeneman
 * 
 * This package is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * found in the file LICENSE that should have accompanied this file.
 * 
 * This package is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#pragma once

#include "base/String.h"
#include "common/basic_types.h"

//! Unicode utility functions
/*!
This class provides functions for converting between various Unicode
encodings and the current locale encoding.
*/
class Unicode {
public:
    //! @name accessors
    //@{

    //! Test UTF-8 string for validity
    /*!
    Returns true iff the string contains a valid sequence of UTF-8
    encoded characters.
    */
    static bool            isUTF8(const String&);

    //! Convert from UTF-8 to UCS-2 encoding
    /*!
    Convert from UTF-8 to UCS-2.  If errors is not NULL then *errors
    is set to true iff any character could not be encoded in UCS-2.
    Decoding errors do not set *errors.
    */
    static String        UTF8ToUCS2(const String&, bool* errors = NULL);

    //! Convert from UTF-8 to UCS-4 encoding
    /*!
    Convert from UTF-8 to UCS-4.  If errors is not NULL then *errors
    is set to true iff any character could not be encoded in UCS-4.
    Decoding errors do not set *errors.
    */
    static String        UTF8ToUCS4(const String&, bool* errors = NULL);

    //! Convert from UTF-8 to UTF-16 encoding
    /*!
    Convert from UTF-8 to UTF-16.  If errors is not NULL then *errors
    is set to true iff any character could not be encoded in UTF-16.
    Decoding errors do not set *errors.
    */
    static String        UTF8ToUTF16(const String&, bool* errors = NULL);

    //! Convert from UTF-8 to UTF-32 encoding
    /*!
    Convert from UTF-8 to UTF-32.  If errors is not NULL then *errors
    is set to true iff any character could not be encoded in UTF-32.
    Decoding errors do not set *errors.
    */
    static String        UTF8ToUTF32(const String&, bool* errors = NULL);

    //! Convert from UTF-8 to the current locale encoding
    /*!
    Convert from UTF-8 to the current locale encoding.  If errors is not
    NULL then *errors is set to true iff any character could not be encoded.
    Decoding errors do not set *errors.
    */
    static String        UTF8ToText(const String&, bool* errors = NULL);

    //! Convert from UCS-2 to UTF-8
    /*!
    Convert from UCS-2 to UTF-8.  If errors is not NULL then *errors is
    set to true iff any character could not be decoded.
    */
    static String        UCS2ToUTF8(const String&, bool* errors = NULL);

    //! Convert from UCS-4 to UTF-8
    /*!
    Convert from UCS-4 to UTF-8.  If errors is not NULL then *errors is
    set to true iff any character could not be decoded.
    */
    static String        UCS4ToUTF8(const String&, bool* errors = NULL);

    //! Convert from UTF-16 to UTF-8
    /*!
    Convert from UTF-16 to UTF-8.  If errors is not NULL then *errors is
    set to true iff any character could not be decoded.
    */
    static String        UTF16ToUTF8(const String&, bool* errors = NULL);

    //! Convert from UTF-32 to UTF-8
    /*!
    Convert from UTF-32 to UTF-8.  If errors is not NULL then *errors is
    set to true iff any character could not be decoded.
    */
    static String        UTF32ToUTF8(const String&, bool* errors = NULL);

    //! Convert from the current locale encoding to UTF-8
    /*!
    Convert from the current locale encoding to UTF-8.  If errors is not
    NULL then *errors is set to true iff any character could not be decoded.
    */
    static String        textToUTF8(const String&, bool* errors = NULL);

    //@}

private:
    // convert UTF8 to wchar_t string (using whatever encoding is native
    // to the platform).  caller must delete[] the returned string.  the
    // string is *not* nul terminated;  the length (in characters) is
    // returned in size.
    static wchar_t*        UTF8ToWideChar(const String&,
                            UInt32& size, bool* errors);

    // convert nul terminated wchar_t string (in platform's native
    // encoding) to UTF8.
    static String        wideCharToUTF8(const wchar_t*,
                            UInt32 size, bool* errors);

    // internal conversion to UTF8
    static String        doUCS2ToUTF8(const UInt8* src, UInt32 n, bool* errors);
    static String        doUCS4ToUTF8(const UInt8* src, UInt32 n, bool* errors);
    static String        doUTF16ToUTF8(const UInt8* src, UInt32 n, bool* errors);
    static String        doUTF32ToUTF8(const UInt8* src, UInt32 n, bool* errors);

    // convert characters to/from UTF8
    static UInt32        fromUTF8(const UInt8*& src, UInt32& size);
    static void            toUTF8(String& dst, UInt32 c, bool* errors);

private:
    static UInt32        s_invalid;
    static UInt32        s_replacement;
};