Subversion 1.6.16

svn_utf.h

Go to the documentation of this file.
00001 /**
00002  * @copyright
00003  * ====================================================================
00004  * Copyright (c) 2000-2004, 2008 CollabNet.  All rights reserved.
00005  *
00006  * This software is licensed as described in the file COPYING, which
00007  * you should have received as part of this distribution.  The terms
00008  * are also available at http://subversion.tigris.org/license-1.html.
00009  * If newer versions of this license are posted there, you may use a
00010  * newer version instead, at your option.
00011  *
00012  * This software consists of voluntary contributions made by many
00013  * individuals.  For exact contribution history, see the revision
00014  * history and logs, available at http://subversion.tigris.org/.
00015  * ====================================================================
00016  * @endcopyright
00017  *
00018  * @file svn_utf.h
00019  * @brief UTF-8 conversion routines
00020  * Whenever a conversion routine cannot convert to or from UTF-8, the
00021  * error returned has code @c APR_EINVAL.
00022  */
00023 
00024 
00025 
00026 #ifndef SVN_UTF_H
00027 #define SVN_UTF_H
00028 
00029 #include <apr_pools.h>
00030 #include <apr_xlate.h>  /* for APR_*_CHARSET */
00031 
00032 #include "svn_types.h"
00033 #include "svn_string.h"
00034 
00035 #ifdef __cplusplus
00036 extern "C" {
00037 #endif /* __cplusplus */
00038 
00039 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
00040 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
00041 
00042 /**
00043  * Initialize the UTF-8 encoding/decoding routines.
00044  * Allocate cached translation handles in a subpool of @a pool.
00045  *
00046  * @note It is optional to call this function, but if it is used, no other
00047  * svn function may be in use in other threads during the call of this
00048  * function or when @a pool is cleared or destroyed.
00049  * Initializing the UTF-8 routines will improve performance.
00050  *
00051  * @since New in 1.1.
00052  */
00053 void
00054 svn_utf_initialize(apr_pool_t *pool);
00055 
00056 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
00057  * allocate @a *dest in @a pool.
00058  */
00059 svn_error_t *
00060 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
00061                           const svn_stringbuf_t *src,
00062                           apr_pool_t *pool);
00063 
00064 
00065 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
00066  * @a *dest in @a pool.
00067  */
00068 svn_error_t *
00069 svn_utf_string_to_utf8(const svn_string_t **dest,
00070                        const svn_string_t *src,
00071                        apr_pool_t *pool);
00072 
00073 
00074 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
00075  * allocate @a *dest in @a pool.
00076  */
00077 svn_error_t *
00078 svn_utf_cstring_to_utf8(const char **dest,
00079                         const char *src,
00080                         apr_pool_t *pool);
00081 
00082 
00083 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
00084  * string @a src; allocate @a *dest in @a pool.
00085  *
00086  * @since New in 1.4.
00087  */
00088 svn_error_t *
00089 svn_utf_cstring_to_utf8_ex2(const char **dest,
00090                             const char *src,
00091                             const char *frompage,
00092                             apr_pool_t *pool);
00093 
00094 
00095 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
00096  * ignored.
00097  *
00098  * @deprecated Provided for backward compatibility with the 1.3 API.
00099  */
00100 SVN_DEPRECATED
00101 svn_error_t *
00102 svn_utf_cstring_to_utf8_ex(const char **dest,
00103                            const char *src,
00104                            const char *frompage,
00105                            const char *convset_key,
00106                            apr_pool_t *pool);
00107 
00108 
00109 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
00110  * allocate @a *dest in @a pool.
00111  */
00112 svn_error_t *
00113 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
00114                             const svn_stringbuf_t *src,
00115                             apr_pool_t *pool);
00116 
00117 
00118 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
00119  * allocate @a *dest in @a pool.
00120  */
00121 svn_error_t *
00122 svn_utf_string_from_utf8(const svn_string_t **dest,
00123                          const svn_string_t *src,
00124                          apr_pool_t *pool);
00125 
00126 
00127 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
00128  * allocate @a *dest in @a pool.
00129  */
00130 svn_error_t *
00131 svn_utf_cstring_from_utf8(const char **dest,
00132                           const char *src,
00133                           apr_pool_t *pool);
00134 
00135 
00136 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
00137  * @a src; allocate @a *dest in @a pool.
00138  *
00139  * @since New in 1.4.
00140  */
00141 svn_error_t *
00142 svn_utf_cstring_from_utf8_ex2(const char **dest,
00143                               const char *src,
00144                               const char *topage,
00145                               apr_pool_t *pool);
00146 
00147 
00148 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
00149  * ignored.
00150  *
00151  * @deprecated Provided for backward compatibility with the 1.3 API.
00152  */
00153 SVN_DEPRECATED
00154 svn_error_t *
00155 svn_utf_cstring_from_utf8_ex(const char **dest,
00156                              const char *src,
00157                              const char *topage,
00158                              const char *convset_key,
00159                              apr_pool_t *pool);
00160 
00161 
00162 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
00163  * allocated in @a pool.  A fuzzy recoding leaves all 7-bit ascii
00164  * characters the same, and substitutes "?\\XXX" for others, where XXX
00165  * is the unsigned decimal code for that character.
00166  *
00167  * This function cannot error; it is guaranteed to return something.
00168  * First it will recode as described above and then attempt to convert
00169  * the (new) 7-bit UTF-8 string to native encoding.  If that fails, it
00170  * will return the raw fuzzily recoded string, which may or may not be
00171  * meaningful in the client's locale, but is (presumably) better than
00172  * nothing.
00173  *
00174  * ### Notes:
00175  *
00176  * Improvement is possible, even imminent.  The original problem was
00177  * that if you converted a UTF-8 string (say, a log message) into a
00178  * locale that couldn't represent all the characters, you'd just get a
00179  * static placeholder saying "[unconvertible log message]".  Then
00180  * Justin Erenkrantz pointed out how on platforms that didn't support
00181  * conversion at all, "svn log" would still fail completely when it
00182  * encountered unconvertible data.
00183  *
00184  * Now for both cases, the caller can at least fall back on this
00185  * function, which converts the message as best it can, substituting
00186  * "?\\XXX" escape codes for the non-ascii characters.
00187  *
00188  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
00189  * so when we can detect that at configure time, things will change.
00190  * Also, this should (?) be moved to apr/apu eventually.
00191  *
00192  * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
00193  * details.
00194  */
00195 const char *
00196 svn_utf_cstring_from_utf8_fuzzy(const char *src,
00197                                 apr_pool_t *pool);
00198 
00199 
00200 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
00201  * allocate @a *dest in @a pool.
00202  */
00203 svn_error_t *
00204 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
00205                                     const svn_stringbuf_t *src,
00206                                     apr_pool_t *pool);
00207 
00208 
00209 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
00210  * allocate @a *dest in @a pool.
00211  */
00212 svn_error_t *
00213 svn_utf_cstring_from_utf8_string(const char **dest,
00214                                  const svn_string_t *src,
00215                                  apr_pool_t *pool);
00216 
00217 #ifdef __cplusplus
00218 }
00219 #endif /* __cplusplus */
00220 
00221 #endif /* SVN_UTF_H */
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines