Subversion 1.6.16

svn_path.h

Go to the documentation of this file.
00001 /**
00002  * @copyright
00003  * ====================================================================
00004  * Copyright (c) 2000-2004 CollabNet.  All rights reserved.
00005  *
00006  * This software is licensed as described in the file COPYING, which
00007  * you should have received as part of this distribution.  The terms
00008  * are also available at http://subversion.tigris.org/license-1.html.
00009  * If newer versions of this license are posted there, you may use a
00010  * newer version instead, at your option.
00011  *
00012  * This software consists of voluntary contributions made by many
00013  * individuals.  For exact contribution history, see the revision
00014  * history and logs, available at http://subversion.tigris.org/.
00015  * ====================================================================
00016  * @endcopyright
00017  *
00018  * @file svn_path.h
00019  * @brief A path manipulation library
00020  *
00021  * All incoming and outgoing paths are non-NULL and in UTF-8, unless
00022  * otherwise documented.
00023  *
00024  * No result path ever ends with a separator, no matter whether the
00025  * path is a file or directory, because we always canonicalize() it.
00026  *
00027  * Nearly all the @c svn_path_xxx functions expect paths passed into
00028  * them to be in canonical form as defined by the Subversion path
00029  * library itself.  The only functions which do *not* have such
00030  * expectations are:
00031  *
00032  *    - @c svn_path_canonicalize()
00033  *    - @c svn_path_is_canonical()
00034  *    - @c svn_path_internal_style()
00035  *    - @c svn_path_uri_encode()
00036  *
00037  * For the most part, we mean what most anyone would mean when talking
00038  * about canonical paths, but to be on the safe side, you must run
00039  * your paths through @c svn_path_canonicalize() before passing them to
00040  * other functions in this API.
00041  */
00042 
00043 #ifndef SVN_PATH_H
00044 #define SVN_PATH_H
00045 
00046 #include <apr.h>
00047 #include <apr_pools.h>
00048 #include <apr_tables.h>
00049 
00050 #include "svn_types.h"
00051 #include "svn_string.h"
00052 
00053 
00054 #ifdef __cplusplus
00055 extern "C" {
00056 #endif /* __cplusplus */
00057 
00058 
00059 
00060 /** Convert @a path from the local style to the canonical internal style. */
00061 const char *
00062 svn_path_internal_style(const char *path, apr_pool_t *pool);
00063 
00064 /** Convert @a path from the canonical internal style to the local style. */
00065 const char *
00066 svn_path_local_style(const char *path, apr_pool_t *pool);
00067 
00068 
00069 /** Join a base path (@a base) with a component (@a component), allocating
00070  * the result in @a pool. @a component need not be a single component: it
00071  * can be any path, absolute or relative to @a base.
00072  *
00073  * If either @a base or @a component is the empty path, then the other
00074  * argument will be copied and returned.  If both are the empty path the
00075  * empty path is returned.
00076  *
00077  * If the @a component is an absolute path, then it is copied and returned.
00078  * Exactly one slash character ('/') is used to join the components,
00079  * accounting for any trailing slash in @a base.
00080  *
00081  * Note that the contents of @a base are not examined, so it is possible to
00082  * use this function for constructing URLs, or for relative URLs or
00083  * repository paths.
00084  *
00085  * This function is NOT appropriate for native (local) file
00086  * paths. Only for "internal" canonicalized paths, since it uses '/'
00087  * for the separator. Further, an absolute path (for @a component) is
00088  * based on a leading '/' character.  Thus, an "absolute URI" for the
00089  * @a component won't be detected. An absolute URI can only be used
00090  * for the base.
00091  */
00092 char *
00093 svn_path_join(const char *base, const char *component, apr_pool_t *pool);
00094 
00095 /** Join multiple components onto a @a base path, allocated in @a pool. The
00096  * components are terminated by a @c NULL.
00097  *
00098  * If any component is the empty string, it will be ignored.
00099  *
00100  * If any component is an absolute path, then it resets the base and
00101  * further components will be appended to it.
00102  *
00103  * This function does not support URLs.
00104  *
00105  * See svn_path_join() for further notes about joining paths.
00106  */
00107 char *
00108 svn_path_join_many(apr_pool_t *pool, const char *base, ...);
00109 
00110 
00111 /** Get the basename of the specified canonicalized @a path.  The
00112  * basename is defined as the last component of the path (ignoring any
00113  * trailing slashes).  If the @a path is root ("/"), then that is
00114  * returned.  Otherwise, the returned value will have no slashes in
00115  * it.
00116  *
00117  * Example: svn_path_basename("/foo/bar") -> "bar"
00118  *
00119  * The returned basename will be allocated in @a pool.
00120  *
00121  * @note If an empty string is passed, then an empty string will be returned.
00122  */
00123 char *
00124 svn_path_basename(const char *path, apr_pool_t *pool);
00125 
00126 /** Get the dirname of the specified canonicalized @a path, defined as
00127  * the path with its basename removed.  If @a path is root ("/"), it is
00128  * returned unchanged.
00129  *
00130  * The returned dirname will be allocated in @a pool.
00131  */
00132 char *
00133 svn_path_dirname(const char *path, apr_pool_t *pool);
00134 
00135 /** Split @a path into a root portion and an extension such that
00136  * the root + the extension = the original path, and where the
00137  * extension contains no period (.) characters.  If not @c NULL, set
00138  * @a *path_root to the root portion.  If not @c NULL, set
00139  * @a *path_ext to the extension (or "" if there is no extension
00140  * found).  Allocate both @a *path_root and @a *path_ext in @a pool.
00141  *
00142  * @since New in 1.5.
00143  */
00144 void
00145 svn_path_splitext(const char **path_root, const char **path_ext,
00146                   const char *path, apr_pool_t *pool);
00147 
00148 /** Return the number of components in the canonicalized @a path.
00149  *
00150  * @since New in 1.1.
00151 */
00152 apr_size_t
00153 svn_path_component_count(const char *path);
00154 
00155 /** Add a @a component (a NULL-terminated C-string) to the
00156  * canonicalized @a path.  @a component is allowed to contain
00157  * directory separators.
00158  *
00159  * If @a path is non-empty, append the appropriate directory separator
00160  * character, and then @a component.  If @a path is empty, simply set it to
00161  * @a component; don't add any separator character.
00162  *
00163  * If the result ends in a separator character, then remove the separator.
00164  */
00165 void
00166 svn_path_add_component(svn_stringbuf_t *path, const char *component);
00167 
00168 /** Remove one component off the end of the canonicalized @a path. */
00169 void
00170 svn_path_remove_component(svn_stringbuf_t *path);
00171 
00172 /** Remove @a n components off the end of the canonicalized @a path.
00173  * Equivalent to calling svn_path_remove_component() @a n times.
00174  *
00175  * @since New in 1.1.
00176  */
00177 void
00178 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n);
00179 
00180 /** Divide the canonicalized @a path into @a *dirpath and @a
00181  * *base_name, allocated in @a pool.
00182  *
00183  * If @a dirpath or @a base_name is NULL, then don't set that one.
00184  *
00185  * Either @a dirpath or @a base_name may be @a path's own address, but they
00186  * may not both be the same address, or the results are undefined.
00187  *
00188  * If @a path has two or more components, the separator between @a dirpath
00189  * and @a base_name is not included in either of the new names.
00190  *
00191  *   examples:
00192  *             - <pre>"/foo/bar/baz"  ==>  "/foo/bar" and "baz"</pre>
00193  *             - <pre>"/bar"          ==>  "/"  and "bar"</pre>
00194  *             - <pre>"/"             ==>  "/"  and "/"</pre>
00195  *             - <pre>"X:/"           ==>  "X:/" and "X:/"</pre>
00196  *             - <pre>"bar"           ==>  ""   and "bar"</pre>
00197  *             - <pre>""              ==>  ""   and ""</pre>
00198  */
00199 void
00200 svn_path_split(const char *path,
00201                const char **dirpath,
00202                const char **base_name,
00203                apr_pool_t *pool);
00204 
00205 
00206 /** Return non-zero iff @a path is empty ("") or represents the current
00207  * directory -- that is, if prepending it as a component to an existing
00208  * path would result in no meaningful change.
00209  */
00210 int
00211 svn_path_is_empty(const char *path);
00212 
00213 #ifndef SVN_DIRENT_URI_H
00214 /* This declaration has been moved to svn_dirent_uri.h, remains here only for
00215    compatiblity reasons. */
00216 svn_boolean_t
00217 svn_dirent_is_root(const char *dirent, apr_size_t len);
00218 #endif /* SVN_DIRENT_URI_H */
00219 
00220 /** Return a new path (or URL) like @a path, but transformed such that
00221  * some types of path specification redundancies are removed.
00222  *
00223  * This involves collapsing redundant "/./" elements, removing
00224  * multiple adjacent separator characters, removing trailing
00225  * separator characters, and possibly other semantically inoperative
00226  * transformations.
00227  *
00228  * Convert the scheme and hostname to lowercase (see issue #2475)
00229  *
00230  * The returned path may be statically allocated, equal to @a path, or
00231  * allocated from @a pool.
00232  */
00233 const char *
00234 svn_path_canonicalize(const char *path, apr_pool_t *pool);
00235 
00236 /** Return @c TRUE iff path is canonical. Use @a pool for temporary
00237  * allocations.
00238  *
00239  * @since New in 1.5.
00240  */
00241 svn_boolean_t
00242 svn_path_is_canonical(const char *path, apr_pool_t *pool);
00243 
00244 
00245 /** Return an integer greater than, equal to, or less than 0, according
00246  * as @a path1 is greater than, equal to, or less than @a path2.
00247  */
00248 int
00249 svn_path_compare_paths(const char *path1, const char *path2);
00250 
00251 
00252 /** Return the longest common path shared by two canonicalized paths,
00253  * @a path1 and @a path2.  If there's no common ancestor, return the
00254  * empty path.
00255  *
00256  * @a path1 and @a path2 may be URLs.  In order for two URLs to have
00257  * a common ancestor, they must (a) have the same protocol (since two URLs
00258  * with the same path but different protocols may point at completely
00259  * different resources), and (b) share a common ancestor in their path
00260  * component, i.e. 'protocol://' is not a sufficient ancestor.
00261  */
00262 char *
00263 svn_path_get_longest_ancestor(const char *path1,
00264                               const char *path2,
00265                               apr_pool_t *pool);
00266 
00267 /** Convert @a relative canonicalized path to an absolute path and
00268  * return the results in @a *pabsolute, allocated in @a pool.
00269  *
00270  * @a relative may be a URL, in which case no attempt is made to convert it,
00271  * and a copy of the URL is returned.
00272  */
00273 svn_error_t *
00274 svn_path_get_absolute(const char **pabsolute,
00275                       const char *relative,
00276                       apr_pool_t *pool);
00277 
00278 /** Return the path part of the canonicalized @a path in @a
00279  * *pdirectory, and the file part in @a *pfile.  If @a path is a
00280  * directory, set @a *pdirectory to @a path, and @a *pfile to the
00281  * empty string.  If @a path does not exist it is treated as if it is
00282  * a file, since directories do not normally vanish.
00283  */
00284 svn_error_t *
00285 svn_path_split_if_file(const char *path,
00286                        const char **pdirectory,
00287                        const char **pfile,
00288                        apr_pool_t *pool);
00289 
00290 /** Find the common prefix of the canonicalized paths in @a targets
00291  * (an array of <tt>const char *</tt>'s), and remove redundant paths if @a
00292  * remove_redundancies is TRUE.
00293  *
00294  *   - Set @a *pcommon to the absolute path of the path or URL common to
00295  *     all of the targets.  If the targets have no common prefix, or
00296  *     are a mix of URLs and local paths, set @a *pcommon to the
00297  *     empty string.
00298  *
00299  *   - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
00300  *     to an array of targets relative to @a *pcommon, and if
00301  *     @a remove_redundancies is TRUE, omit any paths/URLs that are
00302  *     descendants of another path/URL in @a targets.  If *pcommon
00303  *     is empty, @a *pcondensed_targets will contain full URLs and/or
00304  *     absolute paths; redundancies can still be removed (from both URLs
00305  *     and paths).  If @a pcondensed_targets is NULL, leave it alone.
00306  *
00307  * Else if there is exactly one target, then
00308  *
00309  *   - Set @a *pcommon to that target, and
00310  *
00311  *   - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
00312  *     to an array containing zero elements.  Else if
00313  *     @a pcondensed_targets is NULL, leave it alone.
00314  *
00315  * If there are no items in @a targets, set @a *pcommon and (if
00316  * applicable) @a *pcondensed_targets to @c NULL.
00317  *
00318  * @note There is no guarantee that @a *pcommon is within a working
00319  * copy.  */
00320 svn_error_t *
00321 svn_path_condense_targets(const char **pcommon,
00322                           apr_array_header_t **pcondensed_targets,
00323                           const apr_array_header_t *targets,
00324                           svn_boolean_t remove_redundancies,
00325                           apr_pool_t *pool);
00326 
00327 
00328 /** Copy a list of canonicalized @a targets, one at a time, into @a
00329  * pcondensed_targets, omitting any targets that are found earlier in
00330  * the list, or whose ancestor is found earlier in the list.  Ordering
00331  * of targets in the original list is preserved in the condensed list
00332  * of targets.  Use @a pool for any allocations.
00333  *
00334  * How does this differ in functionality from svn_path_condense_targets()?
00335  *
00336  * Here's the short version:
00337  *
00338  * 1.  Disclaimer: if you wish to debate the following, talk to Karl. :-)
00339  *     Order matters for updates because a multi-arg update is not
00340  *     atomic, and CVS users are used to, when doing 'cvs up targetA
00341  *     targetB' seeing targetA get updated, then targetB.  I think the
00342  *     idea is that if you're in a time-sensitive or flaky-network
00343  *     situation, a user can say, "I really *need* to update
00344  *     wc/A/D/G/tau, but I might as well update my whole working copy if
00345  *     I can."  So that user will do 'svn up wc/A/D/G/tau wc', and if
00346  *     something dies in the middles of the 'wc' update, at least the
00347  *     user has 'tau' up-to-date.
00348  *
00349  * 2.  Also, we have this notion of an anchor and a target for updates
00350  *     (the anchor is where the update editor is rooted, the target is
00351  *     the actual thing we want to update).  I needed a function that
00352  *     would NOT screw with my input paths so that I could tell the
00353  *     difference between someone being in A/D and saying 'svn up G' and
00354  *     being in A/D/G and saying 'svn up .' -- believe it or not, these
00355  *     two things don't mean the same thing.  svn_path_condense_targets()
00356  *     plays with absolute paths (which is fine, so does
00357  *     svn_path_remove_redundancies()), but the difference is that it
00358  *     actually tweaks those targets to be relative to the "grandfather
00359  *     path" common to all the targets.  Updates don't require a
00360  *     "grandfather path" at all, and even if it did, the whole
00361  *     conversion to an absolute path drops the crucial difference
00362  *     between saying "i'm in foo, update bar" and "i'm in foo/bar,
00363  *     update '.'"
00364  */
00365 svn_error_t *
00366 svn_path_remove_redundancies(apr_array_header_t **pcondensed_targets,
00367                              const apr_array_header_t *targets,
00368                              apr_pool_t *pool);
00369 
00370 
00371 /** Decompose the canonicalized @a path into an array of <tt>const
00372  * char *</tt> components, allocated in @a pool.  If @a path is
00373  * absolute, the first component will be a lone dir separator (the
00374  * root directory).
00375  */
00376 apr_array_header_t *
00377 svn_path_decompose(const char *path, apr_pool_t *pool);
00378 
00379 /** Join an array of <tt>const char *</tt> components into a '/'
00380  * separated path, allocated in @a pool.  The joined path is absolute if
00381  * the first component is a lone dir separator.
00382  *
00383  * Calling svn_path_compose() on the output of svn_path_decompose()
00384  * will return the exact same path.
00385  *
00386  * @since New in 1.5.
00387  */
00388 const char *
00389 svn_path_compose(const apr_array_header_t *components, apr_pool_t *pool);
00390 
00391 /** Test that @a name is a single path component, that is:
00392  *   - not @c NULL or empty.
00393  *   - not a `/'-separated directory path
00394  *   - not empty or `..'
00395  */
00396 svn_boolean_t
00397 svn_path_is_single_path_component(const char *name);
00398 
00399 
00400 /**
00401  * Test to see if a backpath, i.e. '..', is present in @a path.
00402  * If not, return @c FALSE.
00403  * If so, return @c TRUE.
00404  *
00405  * @since New in 1.1.
00406  */
00407 svn_boolean_t
00408 svn_path_is_backpath_present(const char *path);
00409 
00410 
00411 /**
00412  * Test to see if a dotpath, i.e. '.', is present in @a path.
00413  * If not, return @c FALSE.
00414  * If so, return @c TRUE.
00415  *
00416  * @since New in 1.6.
00417  */
00418 svn_boolean_t
00419 svn_path_is_dotpath_present(const char *path);
00420 
00421 
00422 /** Test if @a path2 is a child of @a path1.
00423  * If not, return @c NULL.
00424  * If so, return a copy of the remainder path, allocated in @a pool.
00425  * (The remainder is the component which, added to @a path1, yields
00426  * @a path2.  The remainder does not begin with a dir separator.)
00427  *
00428  * Both paths must be in canonical form, and must either be absolute,
00429  * or contain no ".." components.
00430  *
00431  * If @a path2 is the same as @a path1, it is not considered a child, so the
00432  * result is @c NULL; an empty string is never returned.
00433  *
00434  * @note In 1.5 this function has been extended to allow a @c NULL @a pool
00435  *       in which case a pointer into @a path2 will be returned to
00436  *       identify the remainder path.
00437  *
00438  * ### @todo the ".." restriction is unfortunate, and would ideally
00439  * be lifted by making the implementation smarter.  But this is not
00440  * trivial: if the path is "../foo", how do you know whether or not
00441  * the current directory is named "foo" in its parent?
00442  */
00443 const char *
00444 svn_path_is_child(const char *path1, const char *path2, apr_pool_t *pool);
00445 
00446 /** Return TRUE if @a path1 is an ancestor of @a path2 or the paths are equal
00447  * and FALSE otherwise.
00448  *
00449  * @since New in 1.3.
00450  */
00451 svn_boolean_t
00452 svn_path_is_ancestor(const char *path1, const char *path2);
00453 
00454 /**
00455  * Check whether @a path is a valid Subversion path.
00456  *
00457  * A valid Subversion pathname is a UTF-8 string without control
00458  * characters.  "Valid" means Subversion can store the pathname in
00459  * a repository.  There may be other, OS-specific, limitations on
00460  * what paths can be represented in a working copy.
00461  *
00462  * ASSUMPTION: @a path is a valid UTF-8 string.  This function does
00463  * not check UTF-8 validity.
00464  *
00465  * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if
00466  * invalid.
00467  *
00468  * @note Despite returning an @c SVN_ERR_FS_* error, this function has
00469  * nothing to do with the versioned filesystem's concept of validity.
00470  *
00471  * @since New in 1.2.
00472  */
00473 svn_error_t *
00474 svn_path_check_valid(const char *path, apr_pool_t *pool);
00475 
00476 
00477 /** URI/URL stuff
00478  *
00479  * @defgroup svn_path_uri_stuff URI/URL conversion
00480  * @{
00481  */
00482 
00483 /** Return TRUE iff @a path looks like a valid absolute URL. */
00484 svn_boolean_t
00485 svn_path_is_url(const char *path);
00486 
00487 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */
00488 svn_boolean_t
00489 svn_path_is_uri_safe(const char *path);
00490 
00491 /** Return a URI-encoded copy of @a path, allocated in @a pool.  (@a
00492     path can be an arbitrary UTF-8 string and does not have to be a
00493     canonical path.) */
00494 const char *
00495 svn_path_uri_encode(const char *path, apr_pool_t *pool);
00496 
00497 /** Return a URI-decoded copy of @a path, allocated in @a pool. */
00498 const char *
00499 svn_path_uri_decode(const char *path, apr_pool_t *pool);
00500 
00501 /** Extend @a url by @a component, URI-encoding that @a component
00502  * before adding it to the @a url; return the new @a url, allocated in
00503  * @a pool.  If @a component is @c NULL, just return a copy of @a url,
00504  * allocated in @a pool.
00505  *
00506  * @a component need not be a single path segment, but if it contains
00507  * multiple segments, they must be separated by '/'.  @a component
00508  * should not begin with '/', however; if it does, the behavior is
00509  * undefined.
00510  *
00511  * @a url must be in canonical format; it may not have a trailing '/'.
00512  *
00513  * @note To add a component that is already URI-encoded, use
00514  *       <tt>svn_path_join(url, component, pool)</tt> instead.
00515  *
00516  * @note gstein suggests this for when @a component begins with '/':
00517  *
00518  *       "replace the path entirely
00519  *        https://example.com:4444/base/path joined with /leading/slash,
00520  *        should return: https://example.com:4444/leading/slash
00521  *        per the RFCs on combining URIs"
00522  *
00523  *       We may implement that someday, which is why leading '/' is
00524  *       merely undefined right now.
00525  *
00526  * @since New in 1.6.
00527  */
00528 const char *
00529 svn_path_url_add_component2(const char *url,
00530                             const char *component,
00531                             apr_pool_t *pool);
00532 
00533 /** Like svn_path_url_add_component2, but allows path components that
00534  * end with a trailing '/'
00535  *
00536  * @deprecated Provided for backward compatibility with the 1.5 API.
00537  */
00538 SVN_DEPRECATED
00539 const char *
00540 svn_path_url_add_component(const char *url,
00541                            const char *component,
00542                            apr_pool_t *pool);
00543 
00544 /**
00545  * Convert @a iri (Internationalized URI) to an URI.
00546  * The return value may be the same as @a iri if it was already
00547  * a URI.  Else, allocate the return value in @a pool.
00548  *
00549  * @since New in 1.1.
00550  */
00551 const char *
00552 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool);
00553 
00554 /**
00555  * URI-encode certain characters in @a uri that are not valid in an URI, but
00556  * doesn't have any special meaning in @a uri at their positions.  If no
00557  * characters need escaping, just return @a uri.
00558  *
00559  * @note Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `.
00560  * This may be extended in the future to do context-dependent escaping.
00561  *
00562  * @since New in 1.1.
00563  */
00564 const char *
00565 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool);
00566 
00567 /** @} */
00568 
00569 /** Charset conversion stuff
00570  *
00571  * @defgroup svn_path_charset_stuff Charset conversion
00572  * @{
00573  */
00574 
00575 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */
00576 svn_error_t *
00577 svn_path_cstring_from_utf8(const char **path_apr,
00578                            const char *path_utf8,
00579                            apr_pool_t *pool);
00580 
00581 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */
00582 svn_error_t *
00583 svn_path_cstring_to_utf8(const char **path_utf8,
00584                          const char *path_apr,
00585                          apr_pool_t *pool);
00586 
00587 
00588 /** @} */
00589 
00590 #ifdef __cplusplus
00591 }
00592 #endif /* __cplusplus */
00593 
00594 
00595 #endif /* SVN_PATH_H */
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines