Subversion
svn_utf.h
Go to the documentation of this file.
1 /**
2  * @copyright
3  * ====================================================================
4  * Licensed to the Apache Software Foundation (ASF) under one
5  * or more contributor license agreements. See the NOTICE file
6  * distributed with this work for additional information
7  * regarding copyright ownership. The ASF licenses this file
8  * to you under the Apache License, Version 2.0 (the
9  * "License"); you may not use this file except in compliance
10  * with the License. You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing,
15  * software distributed under the License is distributed on an
16  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  * KIND, either express or implied. See the License for the
18  * specific language governing permissions and limitations
19  * under the License.
20  * ====================================================================
21  * @endcopyright
22  *
23  * @file svn_utf.h
24  * @brief UTF-8 conversion routines
25  *
26  * Whenever a conversion routine cannot convert to or from UTF-8, the
27  * error returned has code @c APR_EINVAL.
28  */
29 
30 
31 
32 #ifndef SVN_UTF_H
33 #define SVN_UTF_H
34 
35 #include <apr_pools.h>
36 #include <apr_xlate.h> /* for APR_*_CHARSET */
37 
38 #include "svn_types.h"
39 #include "svn_string.h"
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif /* __cplusplus */
44 
45 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
46 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
47 
48 /**
49  * Initialize the UTF-8 encoding/decoding routines.
50  * Allocate cached translation handles in a subpool of @a pool.
51  *
52  * If @a assume_native_utf8 is TRUE, the native character set is
53  * assumed to be UTF-8, i.e. conversion is a no-op. This is useful
54  * in contexts where the native character set is ASCII but UTF-8
55  * should be used regardless (e.g. for mod_dav_svn which runs within
56  * httpd and always uses the "C" locale).
57  *
58  * @note It is optional to call this function, but if it is used, no other
59  * svn function may be in use in other threads during the call of this
60  * function or when @a pool is cleared or destroyed.
61  * Initializing the UTF-8 routines will improve performance.
62  *
63  * @since New in 1.8.
64  */
65 void
66 svn_utf_initialize2(svn_boolean_t assume_native_utf8,
67  apr_pool_t *pool);
68 
69 /**
70  * Like svn_utf_initialize2() but without the ability to force the
71  * native encoding to UTF-8.
72  *
73  * @deprecated Provided for backward compatibility with the 1.7 API.
74  */
76 void
77 svn_utf_initialize(apr_pool_t *pool);
78 
79 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
80  * allocate @a *dest in @a pool.
81  */
84  const svn_stringbuf_t *src,
85  apr_pool_t *pool);
86 
87 
88 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
89  * @a *dest in @a pool.
90  */
93  const svn_string_t *src,
94  apr_pool_t *pool);
95 
96 
97 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
98  * allocate @a *dest in @a pool.
99  */
100 svn_error_t *
101 svn_utf_cstring_to_utf8(const char **dest,
102  const char *src,
103  apr_pool_t *pool);
104 
105 
106 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
107  * string @a src; allocate @a *dest in @a pool.
108  *
109  * @since New in 1.4.
110  */
111 svn_error_t *
112 svn_utf_cstring_to_utf8_ex2(const char **dest,
113  const char *src,
114  const char *frompage,
115  apr_pool_t *pool);
116 
117 
118 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
119  * ignored.
120  *
121  * @deprecated Provided for backward compatibility with the 1.3 API.
122  */
124 svn_error_t *
125 svn_utf_cstring_to_utf8_ex(const char **dest,
126  const char *src,
127  const char *frompage,
128  const char *convset_key,
129  apr_pool_t *pool);
130 
131 
132 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
133  * allocate @a *dest in @a pool.
134  */
135 svn_error_t *
137  const svn_stringbuf_t *src,
138  apr_pool_t *pool);
139 
140 
141 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
142  * allocate @a *dest in @a pool.
143  */
144 svn_error_t *
146  const svn_string_t *src,
147  apr_pool_t *pool);
148 
149 
150 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
151  * allocate @a *dest in @a pool.
152  */
153 svn_error_t *
154 svn_utf_cstring_from_utf8(const char **dest,
155  const char *src,
156  apr_pool_t *pool);
157 
158 
159 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
160  * @a src; allocate @a *dest in @a pool.
161  *
162  * @since New in 1.4.
163  */
164 svn_error_t *
165 svn_utf_cstring_from_utf8_ex2(const char **dest,
166  const char *src,
167  const char *topage,
168  apr_pool_t *pool);
169 
170 
171 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
172  * ignored.
173  *
174  * @deprecated Provided for backward compatibility with the 1.3 API.
175  */
177 svn_error_t *
178 svn_utf_cstring_from_utf8_ex(const char **dest,
179  const char *src,
180  const char *topage,
181  const char *convset_key,
182  apr_pool_t *pool);
183 
184 
185 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
186  * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii
187  * characters the same, and substitutes "?\\XXX" for others, where XXX
188  * is the unsigned decimal code for that character.
189  *
190  * This function cannot error; it is guaranteed to return something.
191  * First it will recode as described above and then attempt to convert
192  * the (new) 7-bit UTF-8 string to native encoding. If that fails, it
193  * will return the raw fuzzily recoded string, which may or may not be
194  * meaningful in the client's locale, but is (presumably) better than
195  * nothing.
196  *
197  * ### Notes:
198  *
199  * Improvement is possible, even imminent. The original problem was
200  * that if you converted a UTF-8 string (say, a log message) into a
201  * locale that couldn't represent all the characters, you'd just get a
202  * static placeholder saying "[unconvertible log message]". Then
203  * Justin Erenkrantz pointed out how on platforms that didn't support
204  * conversion at all, "svn log" would still fail completely when it
205  * encountered unconvertible data.
206  *
207  * Now for both cases, the caller can at least fall back on this
208  * function, which converts the message as best it can, substituting
209  * "?\\XXX" escape codes for the non-ascii characters.
210  *
211  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
212  * so when we can detect that at configure time, things will change.
213  * Also, this should (?) be moved to apr/apu eventually.
214  *
215  * See https://issues.apache.org/jira/browse/SVN-807 for
216  * details.
217  */
218 const char *
219 svn_utf_cstring_from_utf8_fuzzy(const char *src,
220  apr_pool_t *pool);
221 
222 
223 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
224  * allocate @a *dest in @a pool.
225  */
226 svn_error_t *
227 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
228  const svn_stringbuf_t *src,
229  apr_pool_t *pool);
230 
231 
232 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
233  * allocate @a *dest in @a pool.
234  */
235 svn_error_t *
236 svn_utf_cstring_from_utf8_string(const char **dest,
237  const svn_string_t *src,
238  apr_pool_t *pool);
239 
240 /** Return the display width of UTF-8-encoded C string @a cstr.
241  * If the string is not printable or invalid UTF-8, return -1.
242  *
243  * @since New in 1.8.
244  */
245 int
246 svn_utf_cstring_utf8_width(const char *cstr);
247 
248 #ifdef __cplusplus
249 }
250 #endif /* __cplusplus */
251 
252 #endif /* SVN_UTF_H */
svn_utf_cstring_from_utf8_stringbuf
svn_error_t * svn_utf_cstring_from_utf8_stringbuf(const char **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 stringbuf src; allocate *dest in pool.
svn_error_t
Subversion error object.
Definition: svn_types.h:180
svn_utf_initialize
void svn_utf_initialize(apr_pool_t *pool)
Like svn_utf_initialize2() but without the ability to force the native encoding to UTF-8.
svn_utf_cstring_from_utf8_ex
svn_error_t * svn_utf_cstring_from_utf8_ex(const char **dest, const char *src, const char *topage, const char *convset_key, apr_pool_t *pool)
Like svn_utf_cstring_from_utf8_ex2() but with convset_key which is ignored.
svn_utf_cstring_from_utf8_ex2
svn_error_t * svn_utf_cstring_from_utf8_ex2(const char **dest, const char *src, const char *topage, apr_pool_t *pool)
Set *dest to a topage encoded C string from utf8 encoded C string src; allocate *dest in pool.
svn_string.h
Counted-length strings for Subversion, plus some C string goodies.
svn_utf_string_from_utf8
svn_error_t * svn_utf_string_from_utf8(const svn_string_t **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded string from utf8 string src; allocate *dest in pool.
svn_utf_cstring_from_utf8_fuzzy
const char * svn_utf_cstring_from_utf8_fuzzy(const char *src, apr_pool_t *pool)
Return a fuzzily native-encoded C string from utf8 C string src, allocated in pool.
svn_error_t::pool
apr_pool_t * pool
The pool in which this error object is allocated.
Definition: svn_types.h:210
svn_string_t
A simple counted string.
Definition: svn_string.h:96
svn_utf_string_to_utf8
svn_error_t * svn_utf_string_to_utf8(const svn_string_t **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a utf8-encoded string from native string src; allocate *dest in pool.
svn_types.h
Subversion's data types.
svn_utf_cstring_from_utf8_string
svn_error_t * svn_utf_cstring_from_utf8_string(const char **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 string src; allocate *dest in pool.
svn_utf_cstring_to_utf8_ex2
svn_error_t * svn_utf_cstring_to_utf8_ex2(const char **dest, const char *src, const char *frompage, apr_pool_t *pool)
Set *dest to a utf8 encoded C string from frompage encoded C string src; allocate *dest in pool.
svn_utf_cstring_to_utf8_ex
svn_error_t * svn_utf_cstring_to_utf8_ex(const char **dest, const char *src, const char *frompage, const char *convset_key, apr_pool_t *pool)
Like svn_utf_cstring_to_utf8_ex2() but with convset_key which is ignored.
svn_boolean_t
int svn_boolean_t
YABT: Yet Another Boolean Type.
Definition: svn_types.h:141
SVN_DEPRECATED
#define SVN_DEPRECATED
Macro used to mark deprecated functions.
Definition: svn_types.h:62
svn_utf_stringbuf_to_utf8
svn_error_t * svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a utf8-encoded stringbuf from native stringbuf src; allocate *dest in pool.
svn_utf_cstring_to_utf8
svn_error_t * svn_utf_cstring_to_utf8(const char **dest, const char *src, apr_pool_t *pool)
Set *dest to a utf8-encoded C string from native C string src; allocate *dest in pool.
svn_utf_cstring_from_utf8
svn_error_t * svn_utf_cstring_from_utf8(const char **dest, const char *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 C string src; allocate *dest in pool.
svn_utf_initialize2
void svn_utf_initialize2(svn_boolean_t assume_native_utf8, apr_pool_t *pool)
Initialize the UTF-8 encoding/decoding routines.
svn_utf_cstring_utf8_width
int svn_utf_cstring_utf8_width(const char *cstr)
Return the display width of UTF-8-encoded C string cstr.
svn_stringbuf_t
A buffered string, capable of appending without an allocation and copy for each append.
Definition: svn_string.h:104
svn_utf_stringbuf_from_utf8
svn_error_t * svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded stringbuf from utf8 stringbuf src; allocate *dest in pool.