Subversion 1.6.16
|
00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004, 2008 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_utf.h 00019 * @brief UTF-8 conversion routines 00020 * Whenever a conversion routine cannot convert to or from UTF-8, the 00021 * error returned has code @c APR_EINVAL. 00022 */ 00023 00024 00025 00026 #ifndef SVN_UTF_H 00027 #define SVN_UTF_H 00028 00029 #include <apr_pools.h> 00030 #include <apr_xlate.h> /* for APR_*_CHARSET */ 00031 00032 #include "svn_types.h" 00033 #include "svn_string.h" 00034 00035 #ifdef __cplusplus 00036 extern "C" { 00037 #endif /* __cplusplus */ 00038 00039 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET 00040 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET 00041 00042 /** 00043 * Initialize the UTF-8 encoding/decoding routines. 00044 * Allocate cached translation handles in a subpool of @a pool. 00045 * 00046 * @note It is optional to call this function, but if it is used, no other 00047 * svn function may be in use in other threads during the call of this 00048 * function or when @a pool is cleared or destroyed. 00049 * Initializing the UTF-8 routines will improve performance. 00050 * 00051 * @since New in 1.1. 00052 */ 00053 void 00054 svn_utf_initialize(apr_pool_t *pool); 00055 00056 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00057 * allocate @a *dest in @a pool. 00058 */ 00059 svn_error_t * 00060 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, 00061 const svn_stringbuf_t *src, 00062 apr_pool_t *pool); 00063 00064 00065 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00066 * @a *dest in @a pool. 00067 */ 00068 svn_error_t * 00069 svn_utf_string_to_utf8(const svn_string_t **dest, 00070 const svn_string_t *src, 00071 apr_pool_t *pool); 00072 00073 00074 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00075 * allocate @a *dest in @a pool. 00076 */ 00077 svn_error_t * 00078 svn_utf_cstring_to_utf8(const char **dest, 00079 const char *src, 00080 apr_pool_t *pool); 00081 00082 00083 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C 00084 * string @a src; allocate @a *dest in @a pool. 00085 * 00086 * @since New in 1.4. 00087 */ 00088 svn_error_t * 00089 svn_utf_cstring_to_utf8_ex2(const char **dest, 00090 const char *src, 00091 const char *frompage, 00092 apr_pool_t *pool); 00093 00094 00095 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is 00096 * ignored. 00097 * 00098 * @deprecated Provided for backward compatibility with the 1.3 API. 00099 */ 00100 SVN_DEPRECATED 00101 svn_error_t * 00102 svn_utf_cstring_to_utf8_ex(const char **dest, 00103 const char *src, 00104 const char *frompage, 00105 const char *convset_key, 00106 apr_pool_t *pool); 00107 00108 00109 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00110 * allocate @a *dest in @a pool. 00111 */ 00112 svn_error_t * 00113 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, 00114 const svn_stringbuf_t *src, 00115 apr_pool_t *pool); 00116 00117 00118 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00119 * allocate @a *dest in @a pool. 00120 */ 00121 svn_error_t * 00122 svn_utf_string_from_utf8(const svn_string_t **dest, 00123 const svn_string_t *src, 00124 apr_pool_t *pool); 00125 00126 00127 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00128 * allocate @a *dest in @a pool. 00129 */ 00130 svn_error_t * 00131 svn_utf_cstring_from_utf8(const char **dest, 00132 const char *src, 00133 apr_pool_t *pool); 00134 00135 00136 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string 00137 * @a src; allocate @a *dest in @a pool. 00138 * 00139 * @since New in 1.4. 00140 */ 00141 svn_error_t * 00142 svn_utf_cstring_from_utf8_ex2(const char **dest, 00143 const char *src, 00144 const char *topage, 00145 apr_pool_t *pool); 00146 00147 00148 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is 00149 * ignored. 00150 * 00151 * @deprecated Provided for backward compatibility with the 1.3 API. 00152 */ 00153 SVN_DEPRECATED 00154 svn_error_t * 00155 svn_utf_cstring_from_utf8_ex(const char **dest, 00156 const char *src, 00157 const char *topage, 00158 const char *convset_key, 00159 apr_pool_t *pool); 00160 00161 00162 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00163 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00164 * characters the same, and substitutes "?\\XXX" for others, where XXX 00165 * is the unsigned decimal code for that character. 00166 * 00167 * This function cannot error; it is guaranteed to return something. 00168 * First it will recode as described above and then attempt to convert 00169 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00170 * will return the raw fuzzily recoded string, which may or may not be 00171 * meaningful in the client's locale, but is (presumably) better than 00172 * nothing. 00173 * 00174 * ### Notes: 00175 * 00176 * Improvement is possible, even imminent. The original problem was 00177 * that if you converted a UTF-8 string (say, a log message) into a 00178 * locale that couldn't represent all the characters, you'd just get a 00179 * static placeholder saying "[unconvertible log message]". Then 00180 * Justin Erenkrantz pointed out how on platforms that didn't support 00181 * conversion at all, "svn log" would still fail completely when it 00182 * encountered unconvertible data. 00183 * 00184 * Now for both cases, the caller can at least fall back on this 00185 * function, which converts the message as best it can, substituting 00186 * "?\\XXX" escape codes for the non-ascii characters. 00187 * 00188 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00189 * so when we can detect that at configure time, things will change. 00190 * Also, this should (?) be moved to apr/apu eventually. 00191 * 00192 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00193 * details. 00194 */ 00195 const char * 00196 svn_utf_cstring_from_utf8_fuzzy(const char *src, 00197 apr_pool_t *pool); 00198 00199 00200 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00201 * allocate @a *dest in @a pool. 00202 */ 00203 svn_error_t * 00204 svn_utf_cstring_from_utf8_stringbuf(const char **dest, 00205 const svn_stringbuf_t *src, 00206 apr_pool_t *pool); 00207 00208 00209 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00210 * allocate @a *dest in @a pool. 00211 */ 00212 svn_error_t * 00213 svn_utf_cstring_from_utf8_string(const char **dest, 00214 const svn_string_t *src, 00215 apr_pool_t *pool); 00216 00217 #ifdef __cplusplus 00218 } 00219 #endif /* __cplusplus */ 00220 00221 #endif /* SVN_UTF_H */