Some path functions

Sun Jul 30 21:02:35 PDT 2006

I have some functions for the path module that I'd like to submit to the 
general public.  I haven't worked on them for about half a year and I 
had submitted them to Walter for inclusion into Phobos a while back, but 
he's got far more pressing issues (like bug fixes and spec. stability) 
to concern himself with.

The list of included functions:
getRoots
isRoot
containsRoot
normPath // normalize a path; similar to the Python version
          // http://docs.python.org/lib/module-os.path.html
isNormPath
normCase // normalize the case
normSep // normalize the separators for the given os
join // variadic join
absPath // includes a private isAbs because the one in std.path doesn't 
seem to be working
expandPath // similar to the Ruby version
            // http://www.rubycentral.com/ref/ref_c_file.html#expand_path

I offer the code free to anyone; use it as you may, it's public domain. 
  I hope it works, but offer no assurances on it's quality and am not 
responsible for it's use in other code.

It compiles clean under DMD 0.163 on Windows, but I haven't tested it on 
Linux since I wrote it -- I no longer have access to a Linux box.  The 
functions use an internal isAbs function because when I was writing the 
code I found the one in Phobos to not be accurate.  I'm probably wrong 
on that front, so to get the Phobos isAbs functionality, remove the 
internal one (the internal one returns a bool instead of an int, so any 
compares will need to be modified).

Hopefully someone will find it useful, but in the end, I just wanted to 
do my small share to contribute to this fantastic language.

Walter, keep up the good work on this language.  But I have to say, 
honestly, to the community, keep up the good work and motivation.  I've 
never come across a more intelligent and dedicated langauge community. 
And friendly by the way.  It's always a pleasure reading the newsgroups.

Onward, upward and D-ward!

-Kramer

P.S. - I'm having some trouble attaching the code so I'm just pasting it 
in this message.  Formatting will be screwy I'm sure, but I'm not sure 
how else to post.  I think my file is too large to send, so I'm leaving 
out the unittests in this message and will past them in another.

######################################################################
######################################################################

/**
  * Author: Joe Zuccarello
  * Date: February 2006
  *
  * By the author's permission, this code is considered in the public 
domain for modification and
  * redistribution at will.
  */

private import std.string,
                std.path,
                std.file,
                std.regexp,
                std.utf,
                std.stdarg,
                std.format;

version(Windows) private import std.c.windows.windows;

version(Windows)
{
extern(Windows)
{
     export
     {
         DWORD GetLogicalDriveStringsA(DWORD, LPTSTR);
         DWORD GetLogicalDriveStringsW(DWORD, LPWSTR);
     }
}
}

private static const char[] wSep = r"\",
                             lSep = "/",
                             rSeps = "[\\\\/]";  // For regexp use

void main() {}
/**
  * Returns all roots for the system.
  *
  * This will return all the roots that are know by the system.
  *
  * For Windows, it will be the drives that are available.
  * For Linux, it will always return root "/".
  *
  * Note, this function uses the Windows API to retrieve the roots.  If 
is ANSI characters are being
  * used, getRoots will attemp to convert it to UTF8.  If the UTF8 code 
throws a UTF exception,
  * that exception will not be caught and will be propogated to the 
caller code.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 14, 2006
  *
  * Returns: All roots for the system.
  *
  * Throws: (Windows) UtfException on conversion error from ANSI to 
UTF-8 if the OS is using ANSI.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     // Assume roots "A:\", "C:\" and "D:\" exist
  *     getRoots() => "A:\", "C:\", "D:\"
  * }
  * version(linux)
  * {
  *     getRoots() => "/"
  * }
  * ---------
  */
char[][] getRoots()
{
     char[][] rtnBuffer;

     version(Windows)
     {
     DWORD bufferLen = 50,
           rtnVal;
     char[] buffer;

     // Unicode
     void getDrivesW()
     {
         wchar[] wBuffer;
         wBuffer.length = bufferLen;
         rtnVal = GetLogicalDriveStringsW(bufferLen, wBuffer);
         buffer = std.utf.toUTF8(wBuffer);
     }

     // ANSI
     void getDrivesA()
     {
         char* tmpBuf = std.windows.charset.toMBSz(buffer);
         rtnVal = GetLogicalDriveStringsA(bufferLen, tmpBuf);
         buffer = std.windows.charset.fromMBSz(tmpBuf);
     }

     // Set initial buffer size
     buffer.length = bufferLen;

     if (useWfuncs)
     {
         getDrivesW();
     }
     else
     {
         getDrivesA();
     }

     while (rtnVal > bufferLen)
     {
         bufferLen = rtnVal;
         buffer.length = bufferLen;
         if (useWfuncs)
         {
             getDrivesW();
         }
         else
         {
             getDrivesA();
         }
     }
     rtnBuffer.length = buffer.length;

     int j = 0;
     for (int i, nullCnt = 0; i < buffer.length && nullCnt < 2; i++)
     {
         if (buffer[i..(i + 1)] != "\0")
         {
             rtnBuffer[j] ~= buffer[i..(i + 1)];
             nullCnt = 0;
         }
         else
         {
             nullCnt++;
             j++;
         }
     }
     rtnBuffer.length = j - 1;

     return rtnBuffer.dup;
     }
     else version(linux)
     {
     rtnBuffer ~= "/";

     return rtnBuffer.dup;
     }
     else
     {
     pragma(msg, "Unsupported OS");
     static assert(0);
     }
}

/**
  * Returns true/false whether a path is a root of the system.
  *
  * This can be used to test a path to see if it's a root of the system.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 14, 2006
  *
  * Returns: true/false whether a path is a root of the system.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     // Assume on Windows, c:\ exists
  *     isRoot(r"c:\") => true
  * }
  * version(linux)
  * {
  *     isRoot("/") => true
  * }
  * ---------
  */
bool isRoot(char[] path, bool caseSensitive = false)
{
     char[][] roots = getRoots();

     foreach (char[] x; roots)
     {
         if (caseSensitive == true)
         {
             if (x == path)
             {
                 return true;
             }
         }
         else
         {
             if (std.string.tolower(x) == std.string.tolower(path))
             {
                 return true;
             }
         }
     }

     return false;
}

/**
  * Returns true/false whether a path contains a root of the system.
  *
  * This can be used to test a path, to determine if it starts with a 
system root.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: true/false whether a path starts with a system root.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     // Assume on Windows, c:\ exists
  *     containsRoot(r"c:\directory\file") => true
  *     containsRoot(r"\directory\file") => false
  * }
  * version(linux)
  * {
  *     containsRoot("/usr/d/src") => true
  *     containsRoot("../d/src") => false
  *     containsRoot("\d/src") => false
  * }
  * ---------
  */
bool containsRoot(char[] path, bool caseSensitive = false)
{
     char[][] roots = getRoots();

     foreach (char[] x; roots)
     {
         if (caseSensitive == true)
         {
             if (std.string.find(path, x) == 0)
             {
                 return true;
             }
         }
         else
         {
             if (std.string.ifind(path, x) == 0)
             {
                 return true;
             }
         }
     }

     return false;
}

/**
  * Test whether a path is normalized.
  *
  * Use this to test whether a path is normalized.
  *
  * Note: This function does not handle UNC paths.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: true/false whether a path is normalized.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     isNormPath(r"directory1\..\directory2\file\.") => false
  *     // This one returns true, because there's no parent directory to 
collapse to.
  *     isNormPath(r"..\directory\file") => true
  * }
  * version(linux)
  * {
  *     isNormPath("/dir/../file") => false
  *     isNormPath("/file") => true
  * }
  * ---------
  */
bool isNormPath(char[] path)
{
     RegExp re;

     version(Windows)
     {
     // Special cases
     if (path == "." || path == ".." || (path == r"\" || path == "/") ||
         std.regexp.find(path, "^\\.\\." ~ "(" ~ rSeps ~ "\\.\\.)+") != 
-1 ||
         std.regexp.find(path, "^[a-zA-Z]*:" ~ rSeps ~ "$") != -1)
     {
         return true;
     }
     else
     {
         // Look for the following.  If found, then this is not a 
normalized path
         if (std.regexp.find(path, rSeps ~ "$") != -1 ||
             std.regexp.find(path, rSeps ~ "\\.\\." ~ "(" ~ rSeps ~ 
"|$)") != -1 ||
             std.regexp.find(path, rSeps ~ "\\." ~ "(" ~ rSeps ~ "|$)") 
!= -1 ||
             std.regexp.find(path, "^\\." ~ rSeps) != -1 || 
std.regexp.find(path, rSeps ~ "{2,}") != -1)
         {
             return false;
         }
         else
         {
             return true;
         }
     }
     }
     else version(linux)
     {
     // Special cases
     if (path == "." || path == ".." || (path == r"\" || path == "/") ||
         std.regexp.find(path, "^\\.\\." ~ "(" ~ rSeps ~ "\\.\\.)+") != -1)
     {
         return true;
     }
     else
     {
         // Look for the following.  If found, then this is not a 
normalized path
         if (std.regexp.find(path, lSep ~ "$") != -1 ||
             std.regexp.find(path, lSep ~ "\\.\\." ~ "(" ~ lSep ~ "|$)") 
!= -1 ||
             std.regexp.find(path, lSep ~ "\\." ~ "(" ~ lSep ~ "|$)") != 
-1 ||
             std.regexp.find(path, "^\\." ~ lSep) != -1 || 
std.regexp.find(path, lSep ~ "{2,}") != -1)
         {
             return false;
         }
         else
         {
             return true;
         }
     }
     }
     else
     {
     pragma(msg, "Unsupported OS");
     static assert(0);
     }
}

/**
  * Normalizes a path.
  *
  * This will normalize a path by collapsing redundant separators and 
parent/current directory
  * references.  It will also remove any trailing separators and 
normalize separators as appropriate
  * for the OS.
  *
  * Inspired by the Python v2.4.2 implementation.
  *
  * Note: This function does not handle UNC paths.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: A normalized path.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * normPath("/dir1/../dir2/./file/") => "/dir2/file"
  * normPath("/dir..../file/./") => "/dir..../file"
  * ---------
  */
char[] normPath(char[] path)
out(result)
{
     assert(isNormPath(result));
}
body
{
     int pcIdx, pcIdx2;
     char[][] pathComps;  // path components after splitting
     char[] result, drive;

     // Normalize the separators for the os
     path = normSep(path);

     // Sanity check.  No need to process a separator, curdir or pardir 
reference.
     if (path != sep && path != curdir && path != pardir)
     {
         // Remove the drive from the path
         version(Windows)
         {
         int idx = std.string.find(path, ":");
         drive ~= idx != -1 ? path[0..(idx + 1)] : "";
         if (idx != -1)
         {
             if ((idx + 1) < path.length)
             {
                 path = path[(idx + 1)..$];
             }
             else
             {
                 path = "";
             }
         }
         }

         // Remove repeating separators
         path = std.string.squeeze(path, sep);

         // If there's an initial separator even after a drive, save it off
         if (path != "")
         {
             if (path[0..1] == sep)
             {
                 drive ~= sep;
             }
         }

         // Split the path components
         pathComps = std.string.split(path, sep);

         while (pcIdx < pathComps.length)
         {
             // Current directory
             if (pathComps[pcIdx] == curdir)
             {
                 if (pathComps.length == 1)
                 {
                     pathComps.length = 0;
                 }
                 else if (pathComps.length > 1)
                 {
                     // At the beginning
                     if (pcIdx == 0)
                     {
                         pathComps = pathComps[1..$];
                     }
                     // At the end
                     else if ((pcIdx + 1) == pathComps.length)
                     {
                         pathComps = pathComps[0..pcIdx];
                     }
                     // In the middle
                     else
                     {
                         pathComps = pathComps[0..pcIdx] ~ 
pathComps[(pcIdx + 1)..$];
                     }
                 }
             }
             // Parent directory reference
             else if (pathComps[pcIdx] == pardir)
             {
                 if (pathComps.length == 1)
                 {
                     pcIdx++;
                 }
                 else if (pathComps.length > 1)
                 {
                     // At the beginning
                     if (pcIdx == 0)
                     {
                         // We don't know what to do with this, so move on
                         pcIdx++;
                     }
                     // Found a reference but there was a separator 
before it.  Need
                     // to remove this reference.
                     else if (pcIdx == 1 && pathComps[(pcIdx - 1)] == "")
                     {
                         // Delete the reference
                         if ((pcIdx + 1) < pathComps.length)
                         {
                             pathComps = pathComps[0..pcIdx] ~ 
pathComps[(pcIdx + 1)..$];
                             pcIdx--;
                         }
                         else
                         {
                             pathComps = pathComps[0..pcIdx];
                         }
                     }
                     else
                     {
                         if (pathComps[(pcIdx - 1)] != pardir)
                         {
                             if ((pcIdx + 1) < pathComps.length)
                             {
                                 // Delete the reference and the 
preceding entry
                                 pathComps = pathComps[0..(pcIdx - 1)] ~ 
pathComps[(pcIdx + 1)..$];
                                 pcIdx--;
                             }
                             // End of line
                             else
                             {
                                 pathComps = pathComps[0..(pcIdx - 1)];
                             }
                         }
                         else
                         {
                             pcIdx++;
                         }
                     }
                 }
             }
             // Something else
             else
             {
                 pcIdx++;
             }
         }

         // Delete any blank chunks out of the array for joining later
         for (int i = 0; i < pathComps.length; i++)
         {
             if (pathComps[i] == "")
             {
                 if (pathComps.length == 1)
                 {
                     pathComps.length = 0;
                 }
                 else if (pathComps.length > 1)
                 {
                     // At the beginning
                     if (i == 0)
                     {
                         pathComps = pathComps[1..$];
                     }
                     // At the end
                     else if ((i + 1) == pathComps.length)
                     {
                         pathComps = pathComps[0..i];
                     }
                     // In the middle.  This should already have been 
taken care of from the logic near
                     // the top of this function from using the squeeze 
and then split, there shouldn't be
                     // any blank chunks in the middle.
                 }
             }
         }

         result = std.string.join(pathComps, sep);
     }
     // Path was either a separator, curdir or pardir reference
     else
     {
         result = path;
     }

     if (result == "" && drive == "")
     {
         result = curdir;
     }
     else
     {
         result = drive ~ result;
     }

     return result.dup;
}

/**
  * Normalize the case and separators of a path.
  *
  * This will normalize the case for a path depending on the operating 
system in use.  For case
  * -insensitive os's (such as Windows), the path will be lower-cased. 
For case sensitive os's (such
  * as Linux), the path case will not be changed.  On Windows, forward 
slashes will be converted to
  * backward slashes and on Linux, backward slashes will be converted to 
forward slashes.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: Normalized case and separators for a path.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     normCase(r"C:\directory1\Subdirectory/FILE) => 
"c:\directory1\subdirectory\file"
  * }
  * version(linux)
  * {
  *     normCase(r"\usr/src\Path.d") => "\usr/src\Path.d"
  * }
  * ---------
  */
char[] normCase(char[] path)
{
     version(Windows)
     {
     // Take care of the case
     path = std.string.tolower(path);
     }

     path = normSep(path);

     return path.dup;
}

/**
  *
  * Normalizes the separators in a path.
  *
  * Use this to normalize separators as appropriate for the operating 
system in use.  On Windows,
  * forward slashes * will be converted to backward slashes.  On Linux, 
the path will just be
  * returned.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: Normalized separators for a path.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     normSep(r"c:/directory\file") => "c:\directory\file"
  * }
  * version(linux)
  * {
  *     normSep(r"/dir1\dir2\dir3/file") => "/dir1\dir2\dir3/file"
  * }
  * ---------
  */
char[] normSep(char[] path)
{
     version(Windows)
     {
     // Convert separators
     if (std.regexp.find(path, lSep) != -1)
     {
         path = std.string.replace(path, lSep, wSep);

         return path.dup;
     }
     else
     {
         return path;
     }
     }
     else version(linux)
     {
     return path;
     }
     else
     {
     pragma(msg, "Unsupported OS");
     static assert(0);
     }
}

/**
  * Joins an arbitrary number of paths together, using std.path.join as 
the main joining component.
  *
  * This will take an arbitrary number of paths and join them by passing 
them to std.path.join to do
  * the actual joining.  *** Join rules follow that of std.path.join ***
  *
  * Note, this function may attempt to convert non-UTF8 characters to 
UTF8.  If the UTF8 code throws
  * a UTF exception, that exception will not be caught and will be 
propogated to the caller code.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: A path joined of one or more separate paths.
  *
  * Throws: UtfException on error from conversion of wchar or dchar 
parameters.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * join(r"\dir1", "dir2", "dir3/file") => "\dir1\dir2\dir3/file"
  * ---------
  */
char[] join(...)
{
     char[] pathX, rtnPath;

     char[] toStringW(wchar c)
     {
         wchar[] result;
         result.length = 1;
         result[0] = c;
         return std.utf.toUTF8(result);
     }

     char[] toStringD(dchar c)
     {
         dchar[] result;
         result.length = 1;
         result[0] = c;
         return std.utf.toUTF8(result);
     }

     for (int i = 0; i < _arguments.length; i++)
     {
         if (_arguments[i] == typeid(char[]))
         {
             pathX = va_arg!(char[])(_argptr);
         }
         else if (_arguments[i] == typeid(wchar[]))
         {
             pathX = std.string.toUTF8(va_arg!(wchar[])(_argptr));
         }
         else if (_arguments[i] == typeid(dchar[]))
         {
             pathX = std.string.toUTF8(va_arg!(dchar[])(_argptr));
         }
         else if (_arguments[i] == typeid(char))
         {
             pathX = std.string.toString(va_arg!(char)(_argptr));
         }
         else if (_arguments[i] == typeid(wchar))
         {
             pathX = toStringW(va_arg!(wchar)(_argptr));
         }
         else if (_arguments[i] == typeid(dchar))
         {
             pathX = toStringD(va_arg!(dchar)(_argptr));
         }

         if (pathX != "")
         {
             rtnPath = std.path.join(rtnPath, pathX);
             pathX = "";
         }
     }

     return rtnPath.dup;
}

/**
  * Returns a normalized absolutized path.
  *
  * If the path is not absolute, it will be joined with the current 
working directory.  If it is an
  * absolute path, nothing will be joined with it.  In either case, the 
path will also be checked to
  * see if it is normalized.  If it's not, it will be normalized.
  *
  * Note: This function does not handle UNC paths.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: A normalized absolutized path.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     // Assume c:\ is the current working directory
  *     absPath(r"file") => "c:\file"
  *     absPath(r"c:\d/src\project") => "c:\d\src\project"
  *     absPath(r".\dir\file\..\dir2\file2") => "c:\dir\dir2\file2"
  * }
  * version(linux)
  * {
  *     // Assume /usr is the current working directory
  *     absPath("d/bin") => "/usr/d/bin"
  *     absPath("/d/lib") => "/d/lib"
  *     absPath("d/src/../file") => "/usr/d/file"
  * }
  * ---------
  */
char[] absPath(char[] path)
out(result)
{
     assert(isNormPath(result));
}
body
{
     bool changed;

     version(Windows)
     {
     // Path is not absolute
     //if (std.regexp.find(path, "^[a-zA-Z]*:\\\\") == -1)
     if (isAbs(path) == false)
     {
         path = std.path.join(getcwd(), path);
         changed = true;
     }
     }
     else version(linux)
     {
     // Path is not absolute
     //if (path[0..1] != r"\" && path[0..1] != "/")
     if (isAbs(path) == false)
     {
         path = std.path.join(getcwd(), path);
         changed = true;
     }
     }
     else
     {
     pragma(msg, "Unsupported OS");
     static assert(0);
     }

     // Normalize the path
     if (isNormPath(path) == false)
     {
         path = normPath(path);
         changed = true;
     }

     if (changed == true)
     {
         return path.dup;
     }
     else
     {
         return path;
     }
}

private bool isAbs(char[] path)
{
     version(Windows)
     {
     if (std.regexp.find(path, "^[a-zA-Z]*:\\\\") != -1)
     {
         return true;
     }
     else
     {
         return false;
     }
     }
     else version(linux)
     {
     if (path[0..1] == "/")
     {
         return true;
     }
     else
     {
         return false;
     }
     }
     else
     {
     pragma(msg, "Unsupported OS");
     static assert(0);
     }
}

/**
  * Expands a path into a normalized absolutized path, with a optional 
reference directory to use
  * with relative paths.
  *
  * This will take a path and expand it into a normalized absolutized 
version of itself.  An optional
  * reference directory can be provided as well.  If the path passed in 
is relative, the reference
  * directory will be used to precede the path.  If the reference 
directory is a relative directory,
  * the reference directory and the path will be appended to the current 
working directory.
  *
  * Note: This function does not handle UNC paths.
  *
  * Authors: Joe Zuccarello
  *
  * Date: February 15, 2006
  *
  * Returns: A normalized absolutized path.
  *
  * Version:
  *
  * License: Public domain.
  *
  * Examples:
  * ---------
  * version(Windows)
  * {
  *     // Assume c:\ is the current working directory
  *     expandPath("file") => "c:\file"
  *     expandPath(r"\dir\file") => "c:\dir\file"
  *     expandPath("file", r"\dir") => "c:\dir\file"
  * }
  * version(linux)
  * {
  *     // Assume /usr is the current working directory
  *     expandPath("file") => "/usr/file"
  *     expandPath(r"/dir/file") => "/usr/dir/file"
  *     expandPath("file", "/dir") => "/usr/dir/file"
  * }
  * ---------
  */
char[] expandPath(char[] path, char[] dir = "")
{
     char[] result;

     if (path != "")
     {
         // Path is absolute; ditch the dir and return this after 
normalizing.
         if (isAbs(path) == true)
         {
             result = normPath(path);
         }
         // Path is not absolute
         else
         {
             if (dir != "")
             {
                 // Check if dir is absolute
                 if (isAbs(dir) == true)
                 {
                     result = normPath(dir ~ sep ~ path);
                 }
                 // Dir is not absolute, but it is a directory (at least 
that's
                 // what the caller is telling us.
                 else
                 {
                     result = normPath(getcwd() ~ sep ~ dir ~ sep ~ path);
                 }
             }
             // Dir is empty and path is not absolute
             else
             {
                 result = normPath(getcwd() ~ sep ~ path);
             }
         }
     }
     // Path is empty, check dir
     else
     {
         if (dir != "")
         {
             // Check if dir is absolute
             if (isAbs(dir) == true)
             {
                 result = normPath(dir);
             }
             else
             {
                 result = normPath(getcwd() ~ sep ~ dir);
             }
         }
         // Path and dir are empty
         else
         {
             result = "";
         }
     }

     return result.dup;
}