Subversion
|
00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Licensed to the Apache Software Foundation (ASF) under one 00005 * or more contributor license agreements. See the NOTICE file 00006 * distributed with this work for additional information 00007 * regarding copyright ownership. The ASF licenses this file 00008 * to you under the Apache License, Version 2.0 (the 00009 * "License"); you may not use this file except in compliance 00010 * with the License. You may obtain a copy of the License at 00011 * 00012 * http://www.apache.org/licenses/LICENSE-2.0 00013 * 00014 * Unless required by applicable law or agreed to in writing, 00015 * software distributed under the License is distributed on an 00016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 00017 * KIND, either express or implied. See the License for the 00018 * specific language governing permissions and limitations 00019 * under the License. 00020 * ==================================================================== 00021 * @endcopyright 00022 * 00023 * @file svn_utf.h 00024 * @brief UTF-8 conversion routines 00025 * 00026 * Whenever a conversion routine cannot convert to or from UTF-8, the 00027 * error returned has code @c APR_EINVAL. 00028 */ 00029 00030 00031 00032 #ifndef SVN_UTF_H 00033 #define SVN_UTF_H 00034 00035 #include <apr_pools.h> 00036 #include <apr_xlate.h> /* for APR_*_CHARSET */ 00037 00038 #include "svn_types.h" 00039 #include "svn_string.h" 00040 00041 #ifdef __cplusplus 00042 extern "C" { 00043 #endif /* __cplusplus */ 00044 00045 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET 00046 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET 00047 00048 /** 00049 * Initialize the UTF-8 encoding/decoding routines. 00050 * Allocate cached translation handles in a subpool of @a pool. 00051 * 00052 * @note It is optional to call this function, but if it is used, no other 00053 * svn function may be in use in other threads during the call of this 00054 * function or when @a pool is cleared or destroyed. 00055 * Initializing the UTF-8 routines will improve performance. 00056 * 00057 * @since New in 1.1. 00058 */ 00059 void 00060 svn_utf_initialize(apr_pool_t *pool); 00061 00062 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00063 * allocate @a *dest in @a pool. 00064 */ 00065 svn_error_t * 00066 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, 00067 const svn_stringbuf_t *src, 00068 apr_pool_t *pool); 00069 00070 00071 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00072 * @a *dest in @a pool. 00073 */ 00074 svn_error_t * 00075 svn_utf_string_to_utf8(const svn_string_t **dest, 00076 const svn_string_t *src, 00077 apr_pool_t *pool); 00078 00079 00080 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00081 * allocate @a *dest in @a pool. 00082 */ 00083 svn_error_t * 00084 svn_utf_cstring_to_utf8(const char **dest, 00085 const char *src, 00086 apr_pool_t *pool); 00087 00088 00089 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C 00090 * string @a src; allocate @a *dest in @a pool. 00091 * 00092 * @since New in 1.4. 00093 */ 00094 svn_error_t * 00095 svn_utf_cstring_to_utf8_ex2(const char **dest, 00096 const char *src, 00097 const char *frompage, 00098 apr_pool_t *pool); 00099 00100 00101 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is 00102 * ignored. 00103 * 00104 * @deprecated Provided for backward compatibility with the 1.3 API. 00105 */ 00106 SVN_DEPRECATED 00107 svn_error_t * 00108 svn_utf_cstring_to_utf8_ex(const char **dest, 00109 const char *src, 00110 const char *frompage, 00111 const char *convset_key, 00112 apr_pool_t *pool); 00113 00114 00115 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00116 * allocate @a *dest in @a pool. 00117 */ 00118 svn_error_t * 00119 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, 00120 const svn_stringbuf_t *src, 00121 apr_pool_t *pool); 00122 00123 00124 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00125 * allocate @a *dest in @a pool. 00126 */ 00127 svn_error_t * 00128 svn_utf_string_from_utf8(const svn_string_t **dest, 00129 const svn_string_t *src, 00130 apr_pool_t *pool); 00131 00132 00133 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00134 * allocate @a *dest in @a pool. 00135 */ 00136 svn_error_t * 00137 svn_utf_cstring_from_utf8(const char **dest, 00138 const char *src, 00139 apr_pool_t *pool); 00140 00141 00142 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string 00143 * @a src; allocate @a *dest in @a pool. 00144 * 00145 * @since New in 1.4. 00146 */ 00147 svn_error_t * 00148 svn_utf_cstring_from_utf8_ex2(const char **dest, 00149 const char *src, 00150 const char *topage, 00151 apr_pool_t *pool); 00152 00153 00154 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is 00155 * ignored. 00156 * 00157 * @deprecated Provided for backward compatibility with the 1.3 API. 00158 */ 00159 SVN_DEPRECATED 00160 svn_error_t * 00161 svn_utf_cstring_from_utf8_ex(const char **dest, 00162 const char *src, 00163 const char *topage, 00164 const char *convset_key, 00165 apr_pool_t *pool); 00166 00167 00168 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00169 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00170 * characters the same, and substitutes "?\\XXX" for others, where XXX 00171 * is the unsigned decimal code for that character. 00172 * 00173 * This function cannot error; it is guaranteed to return something. 00174 * First it will recode as described above and then attempt to convert 00175 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00176 * will return the raw fuzzily recoded string, which may or may not be 00177 * meaningful in the client's locale, but is (presumably) better than 00178 * nothing. 00179 * 00180 * ### Notes: 00181 * 00182 * Improvement is possible, even imminent. The original problem was 00183 * that if you converted a UTF-8 string (say, a log message) into a 00184 * locale that couldn't represent all the characters, you'd just get a 00185 * static placeholder saying "[unconvertible log message]". Then 00186 * Justin Erenkrantz pointed out how on platforms that didn't support 00187 * conversion at all, "svn log" would still fail completely when it 00188 * encountered unconvertible data. 00189 * 00190 * Now for both cases, the caller can at least fall back on this 00191 * function, which converts the message as best it can, substituting 00192 * "?\\XXX" escape codes for the non-ascii characters. 00193 * 00194 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00195 * so when we can detect that at configure time, things will change. 00196 * Also, this should (?) be moved to apr/apu eventually. 00197 * 00198 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00199 * details. 00200 */ 00201 const char * 00202 svn_utf_cstring_from_utf8_fuzzy(const char *src, 00203 apr_pool_t *pool); 00204 00205 00206 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00207 * allocate @a *dest in @a pool. 00208 */ 00209 svn_error_t * 00210 svn_utf_cstring_from_utf8_stringbuf(const char **dest, 00211 const svn_stringbuf_t *src, 00212 apr_pool_t *pool); 00213 00214 00215 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00216 * allocate @a *dest in @a pool. 00217 */ 00218 svn_error_t * 00219 svn_utf_cstring_from_utf8_string(const char **dest, 00220 const svn_string_t *src, 00221 apr_pool_t *pool); 00222 00223 #ifdef __cplusplus 00224 } 00225 #endif /* __cplusplus */ 00226 00227 #endif /* SVN_UTF_H */