Subversion
svn_utf.h
Go to the documentation of this file.
00001 /**
00002  * @copyright
00003  * ====================================================================
00004  *    Licensed to the Apache Software Foundation (ASF) under one
00005  *    or more contributor license agreements.  See the NOTICE file
00006  *    distributed with this work for additional information
00007  *    regarding copyright ownership.  The ASF licenses this file
00008  *    to you under the Apache License, Version 2.0 (the
00009  *    "License"); you may not use this file except in compliance
00010  *    with the License.  You may obtain a copy of the License at
00011  *
00012  *      http://www.apache.org/licenses/LICENSE-2.0
00013  *
00014  *    Unless required by applicable law or agreed to in writing,
00015  *    software distributed under the License is distributed on an
00016  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
00017  *    KIND, either express or implied.  See the License for the
00018  *    specific language governing permissions and limitations
00019  *    under the License.
00020  * ====================================================================
00021  * @endcopyright
00022  *
00023  * @file svn_utf.h
00024  * @brief UTF-8 conversion routines
00025  *
00026  * Whenever a conversion routine cannot convert to or from UTF-8, the
00027  * error returned has code @c APR_EINVAL.
00028  */
00029 
00030 
00031 
00032 #ifndef SVN_UTF_H
00033 #define SVN_UTF_H
00034 
00035 #include <apr_pools.h>
00036 #include <apr_xlate.h>  /* for APR_*_CHARSET */
00037 
00038 #include "svn_types.h"
00039 #include "svn_string.h"
00040 
00041 #ifdef __cplusplus
00042 extern "C" {
00043 #endif /* __cplusplus */
00044 
00045 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
00046 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
00047 
00048 /**
00049  * Initialize the UTF-8 encoding/decoding routines.
00050  * Allocate cached translation handles in a subpool of @a pool.
00051  *
00052  * @note It is optional to call this function, but if it is used, no other
00053  * svn function may be in use in other threads during the call of this
00054  * function or when @a pool is cleared or destroyed.
00055  * Initializing the UTF-8 routines will improve performance.
00056  *
00057  * @since New in 1.1.
00058  */
00059 void
00060 svn_utf_initialize(apr_pool_t *pool);
00061 
00062 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
00063  * allocate @a *dest in @a pool.
00064  */
00065 svn_error_t *
00066 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
00067                           const svn_stringbuf_t *src,
00068                           apr_pool_t *pool);
00069 
00070 
00071 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
00072  * @a *dest in @a pool.
00073  */
00074 svn_error_t *
00075 svn_utf_string_to_utf8(const svn_string_t **dest,
00076                        const svn_string_t *src,
00077                        apr_pool_t *pool);
00078 
00079 
00080 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
00081  * allocate @a *dest in @a pool.
00082  */
00083 svn_error_t *
00084 svn_utf_cstring_to_utf8(const char **dest,
00085                         const char *src,
00086                         apr_pool_t *pool);
00087 
00088 
00089 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
00090  * string @a src; allocate @a *dest in @a pool.
00091  *
00092  * @since New in 1.4.
00093  */
00094 svn_error_t *
00095 svn_utf_cstring_to_utf8_ex2(const char **dest,
00096                             const char *src,
00097                             const char *frompage,
00098                             apr_pool_t *pool);
00099 
00100 
00101 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
00102  * ignored.
00103  *
00104  * @deprecated Provided for backward compatibility with the 1.3 API.
00105  */
00106 SVN_DEPRECATED
00107 svn_error_t *
00108 svn_utf_cstring_to_utf8_ex(const char **dest,
00109                            const char *src,
00110                            const char *frompage,
00111                            const char *convset_key,
00112                            apr_pool_t *pool);
00113 
00114 
00115 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
00116  * allocate @a *dest in @a pool.
00117  */
00118 svn_error_t *
00119 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
00120                             const svn_stringbuf_t *src,
00121                             apr_pool_t *pool);
00122 
00123 
00124 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
00125  * allocate @a *dest in @a pool.
00126  */
00127 svn_error_t *
00128 svn_utf_string_from_utf8(const svn_string_t **dest,
00129                          const svn_string_t *src,
00130                          apr_pool_t *pool);
00131 
00132 
00133 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
00134  * allocate @a *dest in @a pool.
00135  */
00136 svn_error_t *
00137 svn_utf_cstring_from_utf8(const char **dest,
00138                           const char *src,
00139                           apr_pool_t *pool);
00140 
00141 
00142 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
00143  * @a src; allocate @a *dest in @a pool.
00144  *
00145  * @since New in 1.4.
00146  */
00147 svn_error_t *
00148 svn_utf_cstring_from_utf8_ex2(const char **dest,
00149                               const char *src,
00150                               const char *topage,
00151                               apr_pool_t *pool);
00152 
00153 
00154 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
00155  * ignored.
00156  *
00157  * @deprecated Provided for backward compatibility with the 1.3 API.
00158  */
00159 SVN_DEPRECATED
00160 svn_error_t *
00161 svn_utf_cstring_from_utf8_ex(const char **dest,
00162                              const char *src,
00163                              const char *topage,
00164                              const char *convset_key,
00165                              apr_pool_t *pool);
00166 
00167 
00168 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
00169  * allocated in @a pool.  A fuzzy recoding leaves all 7-bit ascii
00170  * characters the same, and substitutes "?\\XXX" for others, where XXX
00171  * is the unsigned decimal code for that character.
00172  *
00173  * This function cannot error; it is guaranteed to return something.
00174  * First it will recode as described above and then attempt to convert
00175  * the (new) 7-bit UTF-8 string to native encoding.  If that fails, it
00176  * will return the raw fuzzily recoded string, which may or may not be
00177  * meaningful in the client's locale, but is (presumably) better than
00178  * nothing.
00179  *
00180  * ### Notes:
00181  *
00182  * Improvement is possible, even imminent.  The original problem was
00183  * that if you converted a UTF-8 string (say, a log message) into a
00184  * locale that couldn't represent all the characters, you'd just get a
00185  * static placeholder saying "[unconvertible log message]".  Then
00186  * Justin Erenkrantz pointed out how on platforms that didn't support
00187  * conversion at all, "svn log" would still fail completely when it
00188  * encountered unconvertible data.
00189  *
00190  * Now for both cases, the caller can at least fall back on this
00191  * function, which converts the message as best it can, substituting
00192  * "?\\XXX" escape codes for the non-ascii characters.
00193  *
00194  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
00195  * so when we can detect that at configure time, things will change.
00196  * Also, this should (?) be moved to apr/apu eventually.
00197  *
00198  * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
00199  * details.
00200  */
00201 const char *
00202 svn_utf_cstring_from_utf8_fuzzy(const char *src,
00203                                 apr_pool_t *pool);
00204 
00205 
00206 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
00207  * allocate @a *dest in @a pool.
00208  */
00209 svn_error_t *
00210 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
00211                                     const svn_stringbuf_t *src,
00212                                     apr_pool_t *pool);
00213 
00214 
00215 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
00216  * allocate @a *dest in @a pool.
00217  */
00218 svn_error_t *
00219 svn_utf_cstring_from_utf8_string(const char **dest,
00220                                  const svn_string_t *src,
00221                                  apr_pool_t *pool);
00222 
00223 #ifdef __cplusplus
00224 }
00225 #endif /* __cplusplus */
00226 
00227 #endif /* SVN_UTF_H */
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines