From a100a31c805b124c293c3e15ae3ede94cf404bb9 Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 5 Dec 2024 20:39:07 +0530 Subject: [PATCH 1/6] Add a routine for path normalization --- .../Internal/FileSystem/Path/Common.hs | 636 +++++++++++++++++- .../Streamly/Internal/FileSystem/PosixPath.hs | 47 ++ streamly.cabal | 1 + test/Streamly/Test/FileSystem/Path.hs | 61 ++ test/streamly-tests.cabal | 6 + 5 files changed, 749 insertions(+), 2 deletions(-) create mode 100644 test/Streamly/Test/FileSystem/Path.hs diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index 1c0767ecb4..aa2eba77b4 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -34,6 +34,16 @@ module Streamly.Internal.FileSystem.Path.Common , append , unsafeAppend + -- * Path normalization + , readDriveShareName + , readDriveLetter + , readDriveUNC + , readDriveShare + , splitDrive + , normalizeDrive + , normalizePath + , normalize + -- * Utilities , wordToChar , charToWord @@ -43,8 +53,27 @@ where #include "assert.hs" +{- $setup +>>> :m + +>>> import Data.Functor.Identity (runIdentity) +>>> import System.IO.Unsafe (unsafePerformIO) +>>> import qualified Streamly.Data.Stream as Stream +>>> import qualified Streamly.Unicode.Stream as Unicode +>>> import qualified Streamly.Internal.Data.Array as Array +>>> import qualified Streamly.Internal.FileSystem.Path.Common as Common +>>> import qualified Streamly.Internal.Unicode.Stream as Unicode + +>>> packPosix = unsafePerformIO . Stream.fold Array.create . Unicode.encodeUtf8' . Stream.fromList +>>> unpackPosix = runIdentity . Stream.toList . Unicode.decodeUtf8' . Array.read + +>>> packWindows = unsafePerformIO . Stream.fold Array.create . Unicode.encodeUtf16le' . Stream.fromList +>>> unpackWindows = runIdentity . Stream.toList . Unicode.decodeUtf16le' . Array.read +-} + import Control.Monad.Catch (MonadThrow(..)) -import Data.Char (ord, isAlpha) +import Data.Char (ord, isAlpha, isAsciiLower, isAsciiUpper, toUpper) +import Data.Function ((&)) import Data.Functor.Identity (Identity(..)) #ifdef DEBUG import Data.Maybe (fromJust) @@ -54,7 +83,7 @@ import GHC.Base (unsafeChr) import Language.Haskell.TH (Q, Exp) import Language.Haskell.TH.Quote (QuasiQuoter (..)) import Streamly.Internal.Data.Array (Array(..)) -import Streamly.Internal.Data.MutByteArray (Unbox) +import Streamly.Internal.Data.MutByteArray (Unbox(..)) import Streamly.Internal.Data.Path (PathException(..)) import Streamly.Internal.Data.Stream (Stream) import System.IO.Unsafe (unsafePerformIO) @@ -367,3 +396,606 @@ append :: (Unbox a, Integral a) => OS -> (Array a -> String) -> Array a -> Array a -> Array a append os toStr a b = withAppendCheck os toStr b (doAppend os a b) + +-------------------------------------------------------------------------------- +-- Path normalization +-------------------------------------------------------------------------------- + +{-# INLINE ordIntegral #-} +ordIntegral :: Integral a => Char -> a +ordIntegral = fromIntegral . ord + +{-# INLINE isSeparatorIntegral #-} +isSeparatorIntegral :: Integral a => OS -> a -> Bool +isSeparatorIntegral os = + isSeparator os . unsafeChr . fromIntegral + +{-# INLINE primarySeparatorIntegral #-} +primarySeparatorIntegral :: Integral a => OS -> a +primarySeparatorIntegral = ordIntegral . primarySeparator + +{-# INLINE isLetter #-} +isLetter :: Char -> Bool +isLetter l = isAsciiLower l || isAsciiUpper l + +{-# INLINE countUntilSeperator #-} +countUntilSeperator :: (Monad m, Unbox a, Integral a) => OS -> Array a -> m Int +countUntilSeperator os arr = + Stream.fold + (Fold.takeEndBy_ (not . isSeparatorIntegral os) Fold.length) + (Array.read arr) + +-- | +-- >>> readDriveLetter = fmap (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.readDriveLetter . packWindows +-- +-- >>> readDriveLetter "c:" +-- Just ("c:","") +-- +-- >>> readDriveLetter "F:\\" +-- Just ("F:\\","") +-- +-- >>> readDriveLetter "F:\\\\\\" +-- Just ("F:\\\\\\","") +-- +-- >>> readDriveLetter "F:\\\\Desktop" +-- Just ("F:\\\\","Desktop") +-- +-- >>> readDriveLetter "F:\\/./Desktop" +-- Just ("F:\\/","./Desktop") +-- +-- >>> readDriveLetter "\\Desktop" +-- Nothing +-- +readDriveLetter :: (Unbox a, Integral a) => Array a -> Maybe (Array a, Array a) +readDriveLetter arr + | arrLen >= 2, isLetter x, c == ':' = Just $ + if arrLen >= 3 && isSeparator Windows (unsafeIndexChar 2 arr) + then runIdentity $ do + let afterDrive = Array.getSliceUnsafe 2 (arrLen - 2) arr + i <- countUntilSeperator Windows afterDrive + pure + ( Array.getSliceUnsafe 0 (2 + i) arr + , Array.getSliceUnsafe (2 + i) (arrLen - i - 2) arr + ) + else ( Array.getSliceUnsafe 0 2 arr + , Array.getSliceUnsafe 2 (arrLen - 2) arr + ) + | otherwise = Nothing + where + arrLen = Array.length arr + x = unsafeIndexChar 0 arr + c = unsafeIndexChar 1 arr + +-- | +-- >>> readDriveShareName = (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.readDriveShareName . packWindows +-- +-- >>> readDriveShareName "" +-- ("","") +-- +-- >>> readDriveShareName "Desktop/Folder" +-- ("Desktop/","Folder") +-- +-- >>> readDriveShareName "Desktop//\\\\\\Folder" +-- ("Desktop/","/\\\\\\Folder") +-- +-- >>> readDriveShareName "Desktop" +-- ("Desktop","") +-- +readDriveShareName :: (Unbox a, Integral a) => Array a -> (Array a, Array a) +readDriveShareName arr = runIdentity $ do + i <- Stream.fold (Fold.takeEndBy isSep Fold.length) (Array.read arr) + pure (Array.getSliceUnsafe 0 i arr, Array.getSliceUnsafe i (arrLen - i) arr) + where + isSep = isSeparatorIntegral Windows + arrLen = Array.length arr + +-- | +-- >>> readDriveShare = fmap (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.readDriveShare . packWindows +-- +-- >>> readDriveShare "" +-- Nothing +-- +-- >>> readDriveShare "Desktop" +-- Nothing +-- +-- >>> readDriveShare "\\/" +-- Just ("\\/","") +-- +-- >>> readDriveShare "\\\\localhost\\Desktop" +-- Just ("\\\\localhost\\","Desktop") +-- +-- >>> readDriveShare "\\\\localhost" +-- Just ("\\\\localhost","") +-- +-- >>> readDriveShare "\\\\localhost/" +-- Just ("\\\\localhost/","") +-- +readDriveShare :: (Unbox a, Integral a) => Array a -> Maybe (Array a, Array a) +readDriveShare arr + | arrLen >= 2 && isSep s1 && isSep s2 = + let (a, b) = + readDriveShareName (Array.getSliceUnsafe 2 (arrLen - 2) arr) + in Just (Array.getSliceUnsafe 0 (2 + Array.length a) arr, b) + | otherwise = Nothing + where + isSep = isSeparator Windows + arrLen = Array.length arr + s1 = unsafeIndexChar 0 arr + s2 = unsafeIndexChar 1 arr + +-- | +-- >>> readDriveUNC = fmap (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.readDriveUNC . packWindows +-- +-- >>> readDriveUNC "" +-- Nothing +-- +-- >>> readDriveUNC "Desktop" +-- Nothing +-- +-- >>> readDriveUNC "\\/?\\c:" +-- Just ("\\/?\\c:","") +-- +-- >>> readDriveUNC "\\/?\\F:/" +-- Just ("\\/?\\F:/","") +-- +-- >>> readDriveUNC "\\/?\\F:/\\\\Desktop" +-- Just ("\\/?\\F:/\\\\","Desktop") +-- +-- >>> readDriveUNC "\\/?\\uNc/" +-- Just ("\\/?\\uNc/","") +-- +-- >>> readDriveUNC "\\/?\\uNc/\\Desktop" +-- Just ("\\/?\\uNc/\\","Desktop") +-- +-- >>> readDriveUNC "\\/?\\uNc/Desktop\\Folder" +-- Just ("\\/?\\uNc/Desktop\\","Folder") +-- +readDriveUNC + :: (Unbox a, Integral a) => Array a -> Maybe (Array a, Array a) +readDriveUNC arr + | arrLen >= 4, q == '?', all isSep [s1,s2,s3] = + if arrLen >= 8 && map toUpper [u, n, c] == "UNC" && isSep s4 + then + let (a, b) = + readDriveShareName (Array.getSliceUnsafe 8 (arrLen - 8) arr) + in Just (Array.getSliceUnsafe 0 (8 + Array.length a) arr, b) + else + case readDriveLetter (Array.getSliceUnsafe 4 (arrLen - 4) arr) of + Nothing -> Nothing + Just (a, b) -> + Just (Array.getSliceUnsafe 0 (4 + Array.length a) arr, b) + | otherwise = Nothing + where + isSep = isSeparator Windows + arrLen = Array.length arr + + s1 = unsafeIndexChar 0 arr + s2 = unsafeIndexChar 1 arr + q = unsafeIndexChar 2 arr + s3 = unsafeIndexChar 3 arr + u = unsafeIndexChar 4 arr + n = unsafeIndexChar 5 arr + c = unsafeIndexChar 6 arr + s4 = unsafeIndexChar 7 arr + +-- | +-- >>> :{ +-- splitDrive Common.Posix = (\(a, b) -> (unpackPosix a, unpackPosix b)) . Common.splitDrive Common.Posix . packPosix +-- splitDrive Common.Windows = (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.splitDrive Common.Windows . packWindows +-- :} +-- +-- >>> splitDrive Common.Posix "" +-- ("","") +-- +-- >>> splitDrive Common.Posix "/" +-- ("/","") +-- +-- >>> splitDrive Common.Posix "./" +-- ("","./") +-- +-- >>> splitDrive Common.Posix "/home/usr" +-- ("/","home/usr") +-- +-- >>> splitDrive Common.Posix "/////home/usr" +-- ("/","home/usr") +-- +-- >>> splitDrive Common.Posix "/test" +-- ("/","test") +-- +-- >>> splitDrive Common.Posix "//test" +-- ("/","test") +-- +-- >>> splitDrive Common.Posix "test/file" +-- ("","test/file") +-- +-- >>> splitDrive Common.Posix "file" +-- ("","file") +-- +-- >>> splitDrive Common.Windows "F:\\/./Desktop" +-- ("F:\\/","./Desktop") +-- +-- >>> splitDrive Common.Windows "\\\\localhost\\Desktop" +-- ("\\\\localhost\\","Desktop") +-- +-- >>> splitDrive Common.Windows "\\/?\\uNc/Desktop\\Folder" +-- ("\\/?\\uNc/Desktop\\","Folder") +-- +-- >>> splitDrive Common.Windows "\\local/device" +-- ("\\","local/device") +-- +-- >>> splitDrive Common.Windows "\\." +-- ("\\",".") +-- +-- >>> splitDrive Common.Windows "file" +-- ("","file") +-- +-- >>> splitDrive Common.Windows "c:/file" +-- ("c:/","file") +-- +-- >>> splitDrive Common.Windows "c:\\file" +-- ("c:\\","file") +-- +-- >>> splitDrive Common.Windows "\\\\shared\\test" +-- ("\\\\shared\\","test") +-- +-- >>> splitDrive Common.Windows "\\\\shared" +-- ("\\\\shared","") +-- +-- >>> splitDrive Common.Windows "\\\\?\\UNC\\shared\\file" +-- ("\\\\?\\UNC\\shared\\","file") +-- +-- >>> splitDrive Common.Windows "\\\\?\\UNCshared\\file" +-- ("\\\\?\\","UNCshared\\file") +-- +-- >>> splitDrive Common.Windows "\\\\?\\d:\\file" +-- ("\\\\?\\d:\\","file") +-- +-- >>> splitDrive Common.Windows "/d" +-- ("\\","d") +-- +splitDrive :: (Unbox a, Integral a) => OS -> Array a -> (Array a, Array a) +splitDrive Windows arr | Just res <- readDriveLetter arr = res +splitDrive Windows arr | Just res <- readDriveUNC arr = res +splitDrive Windows arr | Just res <- readDriveShare arr = res +splitDrive os arr = runIdentity $ do + i <- countUntilSeperator os arr + pure + $ if i > 0 + then ( Array.fromListN 1 [primarySeparatorIntegral os] + , Array.getSliceUnsafe i (arrLen - i) arr + ) + else (Array.empty, Array.getSliceUnsafe i (arrLen - i) arr) + where + arrLen = Array.length arr + +-- XXX Should we normalize uNc to UNC? +-- XXX What about uNcshared vs UNCshared? +-- | +-- >>> :{ +-- normalizeDrive Common.Posix = unpackPosix . Common.normalizeDrive Common.Posix . packPosix +-- normalizeDrive Common.Windows = unpackWindows . Common.normalizeDrive Common.Windows . packWindows +-- :} +-- +-- >>> normalizeDrive Common.Posix "" +-- "" +-- +-- >>> normalizeDrive Common.Posix "/" +-- "/" +-- +-- >>> normalizeDrive Common.Windows "" +-- "" +-- +-- >>> normalizeDrive Common.Windows "F:\\/" +-- "F:\\" +-- +-- >>> normalizeDrive Common.Windows "\\\\localhost/" +-- "\\\\localhost\\" +-- +-- >>> normalizeDrive Common.Windows "\\/?\\uNc/Desktop\\" +-- "\\\\?\\UNC\\Desktop\\" +-- +-- >>> normalizeDrive Common.Windows "\\" +-- "\\" +-- +normalizeDrive :: (Unbox a, Integral a) => OS -> Array a -> Array a +normalizeDrive _ arr | Array.null arr = Array.empty +normalizeDrive Posix _ = + Array.fromListN 1 [primarySeparatorIntegral Posix] +normalizeDrive Windows arr + | Just (drv, _) <- readDriveLetter arrSRep = + let drvLen = Array.length drv + in + if drvLen == 0 + then error "normalizeDrive: impossible" + else + let x = ordIntegral $ toUpper $ unsafeIndexChar 0 drv + in if drvLen == 2 + then Array.fromListN 2 [x, ordIntegral ':'] + else Array.fromListN 3 [x, ordIntegral ':', primarySeparatorIntegral Windows] + | otherwise = arrSRep + where + canonicalizeSeperator x = + if isSeparatorIntegral Windows x + then primarySeparatorIntegral Windows + else x + arrSRep = + unsafePerformIO + $ Stream.fold Array.create + $ fmap canonicalizeSeperator + $ Array.read arr + +-- | +-- >>> :{ +-- normalizePath Common.Posix = unpackPosix . Common.normalizePath Common.Posix . packPosix +-- normalizePath Common.Windows = unpackWindows . Common.normalizePath Common.Windows . packWindows +-- :} +-- +-- >>> normalizePath Common.Posix "" +-- "" +-- +-- >>> normalizePath Common.Posix "/" +-- "" +-- +-- >>> normalizePath Common.Posix "/." +-- "" +-- +-- >>> normalizePath Common.Posix "/home/usr/" +-- "home/usr/" +-- +-- >>> normalizePath Common.Posix "/////home/usr/." +-- "home/usr/" +-- +-- >>> normalizePath Common.Windows "./Desktop/" +-- "Desktop\\" +-- +-- >>> normalizePath Common.Windows "\\Desktop\\Folder/." +-- "Desktop\\Folder\\" +-- +-- >>> normalizePath Common.Windows "\\Desktop\\Folder/" +-- "Desktop\\Folder\\" +-- +-- >>> normalizePath Common.Windows "\\Desktop\\File" +-- "Desktop\\File" +-- +-- >>> normalizePath Common.Windows "." +-- "" +-- +-- >>> normalizePath Common.Windows "" +-- "" +-- +{-# INLINE normalizePath #-} +normalizePath :: forall a. (Unbox a, Integral a) => OS -> Array a -> Array a +normalizePath os arr = + Array.unsafeFreeze $ unsafePerformIO $ do + let workSliceStream = MutArray.read workSliceMut + mid <- + Stream.indexOnSuffix (isSeparatorIntegral os) workSliceStream + & Stream.filter (not . shouldFilterOut) + & Stream.mapM (\(i, len) -> getSliceWithSepSuffix os i len) + & Stream.fold (Fold.foldlM' (combine os) initBufferM) + case os of + Posix -> pure mid + Windows -> + let midLen = MutArray.length mid in + pure $ case midLen of + ml | ml >= 2 -> + let lastElem = Array.getIndexUnsafe (arrLen - 1) arr + lastButOne = Array.getIndexUnsafe (arrLen - 2) arr + in if (isSeparatorIntegral Windows lastButOne + && lastElem == dotElem) + || isSeparatorIntegral Windows lastElem + then mid + else MutArray.unsafeGetSlice 0 (midLen - 1) mid + ml | ml >= 1 -> + let lastElem = Array.getIndexUnsafe (arrLen - 1) arr + in if isSeparatorIntegral Windows lastElem + then mid + else MutArray.unsafeGetSlice 0 (midLen - 1) mid + _ -> mid + + where + + (dotElem :: a) = ordIntegral '.' + arrLen = Array.length arr + + workSlice = arr + workSliceMut = Array.unsafeThaw workSlice + workSliceElemLen = Array.length workSlice + + shouldFilterOut (off, len) = + len == 0 || + (len == 1 && Array.getIndexUnsafe off workSlice == dotElem) + + getSliceWithSepSuffix Posix i len + | i + len == workSliceElemLen = + pure $ MutArray.unsafeGetSlice i len workSliceMut + getSliceWithSepSuffix Posix i len = + pure $ MutArray.unsafeGetSlice i (len + 1) workSliceMut + getSliceWithSepSuffix Windows i len = + pure $ MutArray.unsafeGetSlice i len workSliceMut + + combine Posix b a = MutArray.unsafeSplice b a + combine Windows b a = do + b1 <- MutArray.unsafeSplice b a + MutArray.unsafeSnoc b1 (primarySeparatorIntegral Windows) + + initBufferM = MutArray.emptyOf (arrLen + 1) + + +-- | +-- >>> :{ +-- normalize Common.Posix = unpackPosix . Common.normalize Common.Posix . packPosix +-- normalize Common.Windows = unpackWindows . Common.normalize Common.Windows . packWindows +-- :} +-- +-- >>> normalize Common.Posix "" +-- "" +-- +-- >>> normalize Common.Posix "/" +-- "/" +-- +-- >>> normalize Common.Posix "/path/to///file" +-- "/path/to/file" +-- +-- >>> normalize Common.Posix "/path/to///folder/" +-- "/path/to/folder/" +-- +-- >>> normalize Common.Posix "/path/to/././folder/." +-- "/path/to/folder/" +-- +-- >>> normalize Common.Posix "/path/to/./../folder/." +-- "/path/to/../folder/" +-- +-- >>> normalize Common.Posix "/file/\\test////" +-- "/file/\\test/" +-- +-- >>> normalize Common.Posix "/file/./test" +-- "/file/test" +-- +-- >>> normalize Common.Posix "/test/file/../bob/fred/" +-- "/test/file/../bob/fred/" +-- +-- >>> normalize Common.Posix "../bob/fred/" +-- "../bob/fred/" +-- +-- >>> normalize Common.Posix "/a/../c" +-- "/a/../c" +-- +-- >>> normalize Common.Posix "./bob/fred/" +-- "bob/fred/" +-- +-- >>> normalize Common.Posix "." +-- "." +-- +-- >>> normalize Common.Posix "./" +-- "./" +-- +-- >>> normalize Common.Posix "./." +-- "./" +-- +-- >>> normalize Common.Posix "/./" +-- "/" +-- +-- >>> normalize Common.Posix "/" +-- "/" +-- +-- >>> normalize Common.Posix "bob/fred/." +-- "bob/fred/" +-- +-- >>> normalize Common.Posix "//home" +-- "/home" +-- +-- >>> normalize Common.Windows "." +-- "." +-- +-- >>> normalize Common.Windows "\\\\?\\c:\\" +-- "\\\\?\\c:\\" +-- +-- >>> normalize Common.Windows "c:\\file/bob\\" +-- "C:\\file\\bob\\" +-- +-- >>> normalize Common.Windows "c:\\file/bob\\" +-- "C:\\file\\bob\\" +-- +-- >>> normalize Common.Windows "c:\\" +-- "C:\\" +-- +-- >>> normalize Common.Windows "c:\\\\\\\\" +-- "C:\\" +-- +-- >>> normalize Common.Windows "C:.\\" +-- "C:" +-- +-- >>> normalize Common.Windows "\\\\server\\test" +-- "\\\\server\\test" +-- +-- >>> normalize Common.Windows "//server/test" +-- "\\\\server\\test" +-- +-- >>> normalize Common.Windows "c:/file" +-- "C:\\file" +-- +-- >>> normalize Common.Windows "\\file" +-- "\\file" +-- +-- >>> normalize Common.Windows "/file" +-- "\\file" +-- +-- >>> normalize Common.Windows "/./" +-- "\\" +-- +-- >>> normalize Common.Windows "/file/\\test////" +-- "\\file\\test\\" +-- +-- >>> normalize Common.Windows "/file/./test" +-- "\\file\\test" +-- +-- >>> normalize Common.Windows "/test/file/../bob/fred/" +-- "\\test\\file\\..\\bob\\fred\\" +-- +-- >>> normalize Common.Windows "../bob/fred/" +-- "..\\bob\\fred\\" +-- +-- >>> normalize Common.Windows "/a/../c" +-- "\\a\\..\\c" +-- +-- >>> normalize Common.Windows "./bob/fred/" +-- "bob\\fred\\" +-- +-- >>> normalize Common.Windows "./" +-- ".\\" +-- +-- >>> normalize Common.Windows "./." +-- ".\\" +-- +-- >>> normalize Common.Windows "/./" +-- "\\" +-- +-- >>> normalize Common.Windows "/" +-- "\\" +-- +-- >>> normalize Common.Windows "bob/fred/." +-- "bob\\fred\\" +-- +-- >>> normalize Common.Windows "//home" +-- "\\\\home" +-- +{-# INLINE normalize #-} +normalize :: forall a. (Unbox a, Integral a) => OS -> Array a -> Array a +normalize os arr = + let (a, b) = splitDrive os arr + drv = normalizeDrive os a + pth = normalizePath os b + drvLen = Array.length drv + pthLen = Array.length pth + arrLen = Array.length arr + in if drvLen == 0 && pthLen == 0 && arrLen > 0 + then + if arrLen >= 2 + then + let x = unsafeIndexChar 0 arr + y = unsafeIndexChar 1 arr + in + if x == '.' && isSeparator os y + then Array.fromListN 2 + [ordIntegral '.', primarySeparatorIntegral os] + else Array.fromListN 1 [ordIntegral '.'] + else Array.fromListN 1 [ordIntegral '.'] + else if drvLen == 0 + then pth + else if pthLen == 0 + then drv + else Array.unsafeFreeze $ unsafePerformIO $ do + let x = unsafeIndexChar (drvLen - 1) drv + if isSeparator os x + then do + marr <- MutArray.emptyOf (drvLen + pthLen) + marr1 <- MutArray.unsafeSplice marr (Array.unsafeThaw drv) + MutArray.unsafeSplice marr1 (Array.unsafeThaw pth) + else do + marr <- MutArray.emptyOf (drvLen + pthLen + 1) + marr1 <- MutArray.unsafeSplice marr (Array.unsafeThaw drv) + marr2 <- + MutArray.unsafeSnoc + marr1 (ordIntegral (primarySeparator os)) + MutArray.unsafeSplice marr2 (Array.unsafeThaw pth) diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index 3c7ba4392f..513bf29f2a 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -41,6 +41,7 @@ module Streamly.Internal.FileSystem.OS_PATH -- * Conversions , IsPath (..) , adapt + , normalize -- * Construction , fromChunk @@ -360,3 +361,49 @@ append (OS_PATH a) (OS_PATH b) = OS_PATH $ Common.append Common.OS_NAME (Common.toString Unicode.UNICODE_DECODER) a b + +-- | Normalize the path. +-- +-- The behaviour is similar to FilePath.normalise. +-- +-- >>> Path.toString $ Path.normalize $ [path|/file/\test////|] +-- "/file/\\test/" +-- +-- >>> Path.toString $ Path.normalize $ [path|/file/./test|] +-- "/file/test" +-- +-- >>> Path.toString $ Path.normalize $ [path|/test/file/../bob/fred/|] +-- "/test/file/../bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|../bob/fred/|] +-- "../bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|/a/../c|] +-- "/a/../c" +-- +-- >>> Path.toString $ Path.normalize $ [path|./bob/fred/|] +-- "bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|.|] +-- "." +-- +-- >>> Path.toString $ Path.normalize $ [path|./|] +-- "./" +-- +-- >>> Path.toString $ Path.normalize $ [path|./.|] +-- "./" +-- +-- >>> Path.toString $ Path.normalize $ [path|/./|] +-- "/" +-- +-- >>> Path.toString $ Path.normalize $ [path|/|] +-- "/" +-- +-- >>> Path.toString $ Path.normalize $ [path|bob/fred/.|] +-- "bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|//home|] +-- "/home" +-- +normalize :: OS_PATH -> OS_PATH +normalize (OS_PATH a) = OS_PATH $ Common.normalize Common.OS_NAME a diff --git a/streamly.cabal b/streamly.cabal index 6c7295e28b..ca092d096b 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -127,6 +127,7 @@ extra-source-files: test/Streamly/Test/FileSystem/Event/Windows.hs test/Streamly/Test/FileSystem/Event/Linux.hs test/Streamly/Test/FileSystem/Handle.hs + test/Streamly/Test/FileSystem/Path.hs test/Streamly/Test/Network/Socket.hs test/Streamly/Test/Network/Inet/TCP.hs test/Streamly/Test/Prelude.hs diff --git a/test/Streamly/Test/FileSystem/Path.hs b/test/Streamly/Test/FileSystem/Path.hs new file mode 100644 index 0000000000..e2f7873740 --- /dev/null +++ b/test/Streamly/Test/FileSystem/Path.hs @@ -0,0 +1,61 @@ +-- | +-- Module : Streamly.Test.FileSystem.Path +-- Copyright : (c) 2021 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- + +module Streamly.Test.FileSystem.Path (main) where + +import qualified System.FilePath as FilePath +import qualified Streamly.Internal.FileSystem.Path as Path + +import Test.Hspec as H + +moduleName :: String +moduleName = "FileSystem.Path" + +testNormalize :: String -> Spec +testNormalize inp = + it ("normalize: " ++ show inp) $ do + p <- Path.fromString inp + let expected = FilePath.normalise inp + got = Path.toString (Path.normalize p) + got `shouldBe` expected + +main :: IO () +main = + hspec $ + H.parallel $ + describe moduleName $ do + describe "normalize" $ do + -- Primarily for Windows + testNormalize "C:\\" + testNormalize "C:" + testNormalize "\\\\?\\c:\\" + testNormalize "c:\\file/bob\\" + testNormalize "c:\\" + testNormalize "c:\\\\\\\\" + testNormalize "C:.\\" + testNormalize "\\\\server\\test" + testNormalize "//server/test" + testNormalize "c:/file" + testNormalize "/file" + testNormalize "\\" + -- Primarily for Posix + testNormalize "/./" + testNormalize "/file/\\test////" + testNormalize "/file/./test" + testNormalize "/test/file/../bob/fred/" + testNormalize "../bob/fred/" + testNormalize "/a/../c" + testNormalize "./bob/fred/" + testNormalize "." + testNormalize "./" + testNormalize "./." + testNormalize "/./" + testNormalize "/" + testNormalize "bob/fred/." + testNormalize "//home" diff --git a/test/streamly-tests.cabal b/test/streamly-tests.cabal index bef27c55f4..4564d2a87b 100644 --- a/test/streamly-tests.cabal +++ b/test/streamly-tests.cabal @@ -445,6 +445,12 @@ test-suite FileSystem.Handle if flag(use-streamly-core) buildable: False +test-suite FileSystem.Path + import: test-options + type: exitcode-stdio-1.0 + main-is: Streamly/Test/FileSystem/Path.hs + ghc-options: -main-is Streamly.Test.FileSystem.Path.main + test-suite Network.Inet.TCP import: lib-options type: exitcode-stdio-1.0 From 82d07edb656bdc69195104be11ada30491471978 Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 19 Dec 2024 22:36:34 +0530 Subject: [PATCH 2/6] Add a normalized path equality check operation --- .../Internal/FileSystem/Path/Common.hs | 475 +++++++----------- .../Streamly/Internal/FileSystem/PosixPath.hs | 61 ++- core/streamly-core.cabal | 4 +- 3 files changed, 202 insertions(+), 338 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index aa2eba77b4..1b580ece1a 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -35,14 +35,19 @@ module Streamly.Internal.FileSystem.Path.Common , unsafeAppend -- * Path normalization + + -- Most of these helpers are exposed because we want to test them using + -- docspec. + , readDriveShareName , readDriveLetter , readDriveUNC , readDriveShare - , splitDrive + , spanDrive , normalizeDrive - , normalizePath - , normalize + , splitPath + , isNotFileLocation + , normalizedEq -- * Utilities , wordToChar @@ -72,6 +77,7 @@ where -} import Control.Monad.Catch (MonadThrow(..)) +import Control.Monad.IO.Class (MonadIO(..)) import Data.Char (ord, isAlpha, isAsciiLower, isAsciiUpper, toUpper) import Data.Function ((&)) import Data.Functor.Identity (Identity(..)) @@ -398,7 +404,7 @@ append os toStr a b = withAppendCheck os toStr b (doAppend os a b) -------------------------------------------------------------------------------- --- Path normalization +-- Path equality helpers -------------------------------------------------------------------------------- {-# INLINE ordIntegral #-} @@ -425,6 +431,10 @@ countUntilSeperator os arr = (Fold.takeEndBy_ (not . isSeparatorIntegral os) Fold.length) (Array.read arr) +-------------------------------------------------------------------------------- +-- Path equality windows specific +-------------------------------------------------------------------------------- + -- | -- >>> readDriveLetter = fmap (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.readDriveLetter . packWindows -- @@ -579,89 +589,59 @@ readDriveUNC arr s4 = unsafeIndexChar 7 arr -- | --- >>> :{ --- splitDrive Common.Posix = (\(a, b) -> (unpackPosix a, unpackPosix b)) . Common.splitDrive Common.Posix . packPosix --- splitDrive Common.Windows = (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.splitDrive Common.Windows . packWindows --- :} --- --- >>> splitDrive Common.Posix "" --- ("","") --- --- >>> splitDrive Common.Posix "/" --- ("/","") +-- >>> spanDrive = (\(a, b) -> (unpackWindows a, unpackWindows b)) . Common.spanDrive . packWindows -- --- >>> splitDrive Common.Posix "./" --- ("","./") --- --- >>> splitDrive Common.Posix "/home/usr" --- ("/","home/usr") --- --- >>> splitDrive Common.Posix "/////home/usr" --- ("/","home/usr") --- --- >>> splitDrive Common.Posix "/test" --- ("/","test") --- --- >>> splitDrive Common.Posix "//test" --- ("/","test") --- --- >>> splitDrive Common.Posix "test/file" --- ("","test/file") --- --- >>> splitDrive Common.Posix "file" --- ("","file") --- --- >>> splitDrive Common.Windows "F:\\/./Desktop" +-- >>> spanDrive "F:\\/./Desktop" -- ("F:\\/","./Desktop") -- --- >>> splitDrive Common.Windows "\\\\localhost\\Desktop" +-- >>> spanDrive "\\\\localhost\\Desktop" -- ("\\\\localhost\\","Desktop") -- --- >>> splitDrive Common.Windows "\\/?\\uNc/Desktop\\Folder" +-- >>> spanDrive "\\/?\\uNc/Desktop\\Folder" -- ("\\/?\\uNc/Desktop\\","Folder") -- --- >>> splitDrive Common.Windows "\\local/device" +-- >>> spanDrive "\\local/device" -- ("\\","local/device") -- --- >>> splitDrive Common.Windows "\\." +-- >>> spanDrive "\\." -- ("\\",".") -- --- >>> splitDrive Common.Windows "file" +-- >>> spanDrive "file" -- ("","file") -- --- >>> splitDrive Common.Windows "c:/file" +-- >>> spanDrive "c:/file" -- ("c:/","file") -- --- >>> splitDrive Common.Windows "c:\\file" +-- >>> spanDrive "c:\\file" -- ("c:\\","file") -- --- >>> splitDrive Common.Windows "\\\\shared\\test" +-- >>> spanDrive "\\\\shared\\test" -- ("\\\\shared\\","test") -- --- >>> splitDrive Common.Windows "\\\\shared" +-- >>> spanDrive "\\\\shared" -- ("\\\\shared","") -- --- >>> splitDrive Common.Windows "\\\\?\\UNC\\shared\\file" +-- >>> spanDrive "\\\\?\\UNC\\shared\\file" -- ("\\\\?\\UNC\\shared\\","file") -- --- >>> splitDrive Common.Windows "\\\\?\\UNCshared\\file" +-- >>> spanDrive "\\\\?\\UNCshared\\file" -- ("\\\\?\\","UNCshared\\file") -- --- >>> splitDrive Common.Windows "\\\\?\\d:\\file" +-- >>> spanDrive "\\\\?\\d:\\file" -- ("\\\\?\\d:\\","file") -- --- >>> splitDrive Common.Windows "/d" +-- >>> spanDrive "/d" -- ("\\","d") -- -splitDrive :: (Unbox a, Integral a) => OS -> Array a -> (Array a, Array a) -splitDrive Windows arr | Just res <- readDriveLetter arr = res -splitDrive Windows arr | Just res <- readDriveUNC arr = res -splitDrive Windows arr | Just res <- readDriveShare arr = res -splitDrive os arr = runIdentity $ do - i <- countUntilSeperator os arr +spanDrive :: (Unbox a, Integral a) => Array a -> (Array a, Array a) +spanDrive arr | Just res <- readDriveLetter arr = res +spanDrive arr | Just res <- readDriveUNC arr = res +spanDrive arr | Just res <- readDriveShare arr = res +spanDrive arr = runIdentity $ do + i <- countUntilSeperator Windows arr pure $ if i > 0 - then ( Array.fromListN 1 [primarySeparatorIntegral os] + then ( Array.fromListN 1 [primarySeparatorIntegral Windows] , Array.getSliceUnsafe i (arrLen - i) arr ) else (Array.empty, Array.getSliceUnsafe i (arrLen - i) arr) @@ -671,37 +651,26 @@ splitDrive os arr = runIdentity $ do -- XXX Should we normalize uNc to UNC? -- XXX What about uNcshared vs UNCshared? -- | --- >>> :{ --- normalizeDrive Common.Posix = unpackPosix . Common.normalizeDrive Common.Posix . packPosix --- normalizeDrive Common.Windows = unpackWindows . Common.normalizeDrive Common.Windows . packWindows --- :} +-- >>> normalizeDrive = unpackWindows . Common.normalizeDrive . packWindows -- --- >>> normalizeDrive Common.Posix "" +-- >>> normalizeDrive "" -- "" -- --- >>> normalizeDrive Common.Posix "/" --- "/" --- --- >>> normalizeDrive Common.Windows "" --- "" --- --- >>> normalizeDrive Common.Windows "F:\\/" +-- >>> normalizeDrive "F:\\/" -- "F:\\" -- --- >>> normalizeDrive Common.Windows "\\\\localhost/" +-- >>> normalizeDrive "\\\\localhost/" -- "\\\\localhost\\" -- --- >>> normalizeDrive Common.Windows "\\/?\\uNc/Desktop\\" +-- >>> normalizeDrive "\\/?\\uNc/Desktop\\" -- "\\\\?\\UNC\\Desktop\\" -- --- >>> normalizeDrive Common.Windows "\\" +-- >>> normalizeDrive "\\" -- "\\" -- -normalizeDrive :: (Unbox a, Integral a) => OS -> Array a -> Array a -normalizeDrive _ arr | Array.null arr = Array.empty -normalizeDrive Posix _ = - Array.fromListN 1 [primarySeparatorIntegral Posix] -normalizeDrive Windows arr +normalizeDrive :: (Unbox a, Integral a) => Array a -> Array a +normalizeDrive arr | Array.null arr = Array.empty +normalizeDrive arr | Just (drv, _) <- readDriveLetter arrSRep = let drvLen = Array.length drv in @@ -724,278 +693,178 @@ normalizeDrive Windows arr $ fmap canonicalizeSeperator $ Array.read arr +-------------------------------------------------------------------------------- +-- Path equality posix specific +-------------------------------------------------------------------------------- + +-- Posix specific function. +isAbsoluteLocation :: (Integral a, Unbox a) => Array a -> Bool +isAbsoluteLocation arr = arrLen > 0 && firstChar == primarySeparator Posix + where + arrLen = Array.length arr + firstChar = unsafeIndexChar 0 arr + +-------------------------------------------------------------------------------- +-- Path equality common operations +-------------------------------------------------------------------------------- + -- | -- >>> :{ --- normalizePath Common.Posix = unpackPosix . Common.normalizePath Common.Posix . packPosix --- normalizePath Common.Windows = unpackWindows . Common.normalizePath Common.Windows . packWindows +-- splitPath Common.Posix = Stream.toList . fmap unpackPosix . Common.splitPath Common.Posix . packPosix +-- splitPath Common.Windows = Stream.toList . fmap unpackWindows . Common.splitPath Common.Windows . packWindows -- :} -- --- >>> normalizePath Common.Posix "" --- "" --- --- >>> normalizePath Common.Posix "/" --- "" --- --- >>> normalizePath Common.Posix "/." --- "" --- --- >>> normalizePath Common.Posix "/home/usr/" --- "home/usr/" +-- >>> splitPath Common.Posix "home//user/./..////\\directory/." +-- ["home","user","..","\\directory"] -- --- >>> normalizePath Common.Posix "/////home/usr/." --- "home/usr/" +-- >>> splitPath Common.Windows "home//user/./..////\\directory/." +-- ["home","user","..","directory"] -- --- >>> normalizePath Common.Windows "./Desktop/" --- "Desktop\\" --- --- >>> normalizePath Common.Windows "\\Desktop\\Folder/." --- "Desktop\\Folder\\" --- --- >>> normalizePath Common.Windows "\\Desktop\\Folder/" --- "Desktop\\Folder\\" --- --- >>> normalizePath Common.Windows "\\Desktop\\File" --- "Desktop\\File" --- --- >>> normalizePath Common.Windows "." --- "" --- --- >>> normalizePath Common.Windows "" --- "" --- -{-# INLINE normalizePath #-} -normalizePath :: forall a. (Unbox a, Integral a) => OS -> Array a -> Array a -normalizePath os arr = - Array.unsafeFreeze $ unsafePerformIO $ do - let workSliceStream = MutArray.read workSliceMut - mid <- - Stream.indexOnSuffix (isSeparatorIntegral os) workSliceStream - & Stream.filter (not . shouldFilterOut) - & Stream.mapM (\(i, len) -> getSliceWithSepSuffix os i len) - & Stream.fold (Fold.foldlM' (combine os) initBufferM) - case os of - Posix -> pure mid - Windows -> - let midLen = MutArray.length mid in - pure $ case midLen of - ml | ml >= 2 -> - let lastElem = Array.getIndexUnsafe (arrLen - 1) arr - lastButOne = Array.getIndexUnsafe (arrLen - 2) arr - in if (isSeparatorIntegral Windows lastButOne - && lastElem == dotElem) - || isSeparatorIntegral Windows lastElem - then mid - else MutArray.unsafeGetSlice 0 (midLen - 1) mid - ml | ml >= 1 -> - let lastElem = Array.getIndexUnsafe (arrLen - 1) arr - in if isSeparatorIntegral Windows lastElem - then mid - else MutArray.unsafeGetSlice 0 (midLen - 1) mid - _ -> mid +{-# INLINE splitPath #-} +splitPath + :: forall a m. (Unbox a, Integral a, MonadIO m) + => OS -> Array a -> Stream m (Array a) +splitPath os arr = + Stream.indexOnSuffix (isSeparatorIntegral os) (Array.read arr) + & Stream.filter (not . shouldFilterOut) + & fmap (\(i, len) -> Array.getSliceUnsafe i len arr) where - (dotElem :: a) = ordIntegral '.' - arrLen = Array.length arr - - workSlice = arr - workSliceMut = Array.unsafeThaw workSlice - workSliceElemLen = Array.length workSlice - shouldFilterOut (off, len) = len == 0 || - (len == 1 && Array.getIndexUnsafe off workSlice == dotElem) - - getSliceWithSepSuffix Posix i len - | i + len == workSliceElemLen = - pure $ MutArray.unsafeGetSlice i len workSliceMut - getSliceWithSepSuffix Posix i len = - pure $ MutArray.unsafeGetSlice i (len + 1) workSliceMut - getSliceWithSepSuffix Windows i len = - pure $ MutArray.unsafeGetSlice i len workSliceMut - - combine Posix b a = MutArray.unsafeSplice b a - combine Windows b a = do - b1 <- MutArray.unsafeSplice b a - MutArray.unsafeSnoc b1 (primarySeparatorIntegral Windows) - - initBufferM = MutArray.emptyOf (arrLen + 1) - + (len == 1 && unsafeIndexChar off arr == '.') -- | -- >>> :{ --- normalize Common.Posix = unpackPosix . Common.normalize Common.Posix . packPosix --- normalize Common.Windows = unpackWindows . Common.normalize Common.Windows . packWindows +-- isNotFileLocation Common.Posix = Common.isNotFileLocation Common.Posix . packPosix +-- isNotFileLocation Common.Windows = Common.isNotFileLocation Common.Windows . packWindows -- :} -- --- >>> normalize Common.Posix "" --- "" --- --- >>> normalize Common.Posix "/" --- "/" --- --- >>> normalize Common.Posix "/path/to///file" --- "/path/to/file" --- --- >>> normalize Common.Posix "/path/to///folder/" --- "/path/to/folder/" +-- >>> isNotFileLocation Common.Posix "" +-- False -- --- >>> normalize Common.Posix "/path/to/././folder/." --- "/path/to/folder/" +-- >>> isNotFileLocation Common.Posix "/" +-- True -- --- >>> normalize Common.Posix "/path/to/./../folder/." --- "/path/to/../folder/" +-- >>> isNotFileLocation Common.Posix "/." +-- True -- --- >>> normalize Common.Posix "/file/\\test////" --- "/file/\\test/" +-- >>> isNotFileLocation Common.Posix "./." +-- True -- --- >>> normalize Common.Posix "/file/./test" --- "/file/test" +-- >>> isNotFileLocation Common.Posix "home/user" +-- False -- --- >>> normalize Common.Posix "/test/file/../bob/fred/" --- "/test/file/../bob/fred/" +-- >>> isNotFileLocation Common.Windows "\\" +-- True -- --- >>> normalize Common.Posix "../bob/fred/" --- "../bob/fred/" +-- >>> isNotFileLocation Common.Windows "\\." +-- True -- --- >>> normalize Common.Posix "/a/../c" --- "/a/../c" +-- >>> isNotFileLocation Common.Windows "" +-- False -- --- >>> normalize Common.Posix "./bob/fred/" --- "bob/fred/" +-- >>> isNotFileLocation Common.Windows "home\\user" +-- False -- --- >>> normalize Common.Posix "." --- "." +-- >>> isNotFileLocation Common.Windows "/home/user/" +-- True -- --- >>> normalize Common.Posix "./" --- "./" --- --- >>> normalize Common.Posix "./." --- "./" --- --- >>> normalize Common.Posix "/./" --- "/" --- --- >>> normalize Common.Posix "/" --- "/" --- --- >>> normalize Common.Posix "bob/fred/." --- "bob/fred/" --- --- >>> normalize Common.Posix "//home" --- "/home" --- --- >>> normalize Common.Windows "." --- "." --- --- >>> normalize Common.Windows "\\\\?\\c:\\" --- "\\\\?\\c:\\" +isNotFileLocation :: (Integral a, Unbox a) => OS -> Array a -> Bool +isNotFileLocation os arr = + (arrLen > 0 && (isSeparator os lastChar || lastChar == winDriveSep)) + || (arrLen > 1 && isSeparator os sndlastChar && lastChar == '.') + + where + winDriveSep = ':' + arrLen = Array.length arr + lastChar = unsafeIndexChar (arrLen - 1) arr + sndlastChar = unsafeIndexChar (arrLen - 2) arr + +-- | +-- >>> :{ +-- normalizedEq Common.Posix a b = Common.normalizedEq Common.Posix (packPosix a) (packPosix b) +-- normalizedEq Common.Windows a b = Common.normalizedEq Common.Windows (packWindows a) (packWindows b) +-- :} -- --- >>> normalize Common.Windows "c:\\file/bob\\" --- "C:\\file\\bob\\" +-- >>> normalizedEq Common.Posix "/file/\\test////" "/file/\\test/" +-- True -- --- >>> normalize Common.Windows "c:\\file/bob\\" --- "C:\\file\\bob\\" +-- >>> normalizedEq Common.Posix "/file/./test" "/file/test" +-- True -- --- >>> normalize Common.Windows "c:\\" --- "C:\\" +-- >>> normalizedEq Common.Posix "/test/file/../bob/fred/" "/test/file/../bob/fred/" +-- True -- --- >>> normalize Common.Windows "c:\\\\\\\\" --- "C:\\" +-- >>> normalizedEq Common.Posix "../bob/fred/" "../bob/fred/" +-- True -- --- >>> normalize Common.Windows "C:.\\" --- "C:" +-- >>> normalizedEq Common.Posix "/a/../c" "/a/../c" +-- True -- --- >>> normalize Common.Windows "\\\\server\\test" --- "\\\\server\\test" +-- >>> normalizedEq Common.Posix "./bob/fred/" "bob/fred/" +-- True -- --- >>> normalize Common.Windows "//server/test" --- "\\\\server\\test" +-- >>> normalizedEq Common.Posix "./" "./" +-- True -- --- >>> normalize Common.Windows "c:/file" --- "C:\\file" +-- >>> normalizedEq Common.Posix "./." "./" +-- True -- --- >>> normalize Common.Windows "\\file" --- "\\file" +-- >>> normalizedEq Common.Posix "/./" "/" +-- True -- --- >>> normalize Common.Windows "/file" --- "\\file" +-- >>> normalizedEq Common.Posix "/" "/" +-- True -- --- >>> normalize Common.Windows "/./" --- "\\" +-- >>> normalizedEq Common.Posix "bob/fred/." "bob/fred/" +-- True -- --- >>> normalize Common.Windows "/file/\\test////" --- "\\file\\test\\" +-- >>> normalizedEq Common.Posix "//home" "/home" +-- True -- --- >>> normalize Common.Windows "/file/./test" --- "\\file\\test" +-- >>> normalizedEq Common.Windows "c:\\file/bob\\" "C:\\file\\bob\\" +-- True -- --- >>> normalize Common.Windows "/test/file/../bob/fred/" --- "\\test\\file\\..\\bob\\fred\\" +-- >>> normalizedEq Common.Windows "c:\\" "C:\\" +-- True -- --- >>> normalize Common.Windows "../bob/fred/" --- "..\\bob\\fred\\" +-- >>> normalizedEq Common.Windows "C:.\\" "C:" +-- True -- --- >>> normalize Common.Windows "/a/../c" --- "\\a\\..\\c" +-- >>> normalizedEq Common.Windows "\\\\server\\test" "\\\\server\\test" +-- True -- --- >>> normalize Common.Windows "./bob/fred/" --- "bob\\fred\\" +-- >>> normalizedEq Common.Windows "//server/test" "\\\\server\\test" +-- True -- --- >>> normalize Common.Windows "./" --- ".\\" +-- >>> normalizedEq Common.Windows "c:/file" "C:\\file" +-- True -- --- >>> normalize Common.Windows "./." --- ".\\" +-- >>> normalizedEq Common.Windows "/file" "\\file" +-- True -- --- >>> normalize Common.Windows "/./" --- "\\" +-- >>> normalizedEq Common.Windows "\\" "\\" +-- True -- --- >>> normalize Common.Windows "/" --- "\\" +-- >>> normalizedEq Common.Windows "/./" "\\" +-- True -- --- >>> normalize Common.Windows "bob/fred/." --- "bob\\fred\\" --- --- >>> normalize Common.Windows "//home" --- "\\\\home" --- -{-# INLINE normalize #-} -normalize :: forall a. (Unbox a, Integral a) => OS -> Array a -> Array a -normalize os arr = - let (a, b) = splitDrive os arr - drv = normalizeDrive os a - pth = normalizePath os b - drvLen = Array.length drv - pthLen = Array.length pth - arrLen = Array.length arr - in if drvLen == 0 && pthLen == 0 && arrLen > 0 - then - if arrLen >= 2 - then - let x = unsafeIndexChar 0 arr - y = unsafeIndexChar 1 arr - in - if x == '.' && isSeparator os y - then Array.fromListN 2 - [ordIntegral '.', primarySeparatorIntegral os] - else Array.fromListN 1 [ordIntegral '.'] - else Array.fromListN 1 [ordIntegral '.'] - else if drvLen == 0 - then pth - else if pthLen == 0 - then drv - else Array.unsafeFreeze $ unsafePerformIO $ do - let x = unsafeIndexChar (drvLen - 1) drv - if isSeparator os x - then do - marr <- MutArray.emptyOf (drvLen + pthLen) - marr1 <- MutArray.unsafeSplice marr (Array.unsafeThaw drv) - MutArray.unsafeSplice marr1 (Array.unsafeThaw pth) - else do - marr <- MutArray.emptyOf (drvLen + pthLen + 1) - marr1 <- MutArray.unsafeSplice marr (Array.unsafeThaw drv) - marr2 <- - MutArray.unsafeSnoc - marr1 (ordIntegral (primarySeparator os)) - MutArray.unsafeSplice marr2 (Array.unsafeThaw pth) +normalizedEq :: (Integral a, Unbox a) => OS -> Array a -> Array a -> Bool +normalizedEq Posix a b = unsafePerformIO $ do + let absA = isAbsoluteLocation a + absB = isAbsoluteLocation b + notFA = isNotFileLocation Posix a + notFB = isNotFileLocation Posix b + if absA == absB && notFA == notFB + then Stream.eqBy Array.byteEq (splitPath Posix a) (splitPath Posix b) + else pure False +normalizedEq Windows a b = unsafePerformIO $ do + let (da, pa) = spanDrive a + (db, pb) = spanDrive b + nFA = isNotFileLocation Windows a + nFB = isNotFileLocation Windows b + if nFA == nFB && Array.byteEq (normalizeDrive da) (normalizeDrive db) + then Stream.eqBy Array.byteEq (splitPath Windows pa) (splitPath Windows pb) + else pure False diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index 513bf29f2a..25314e5b1f 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -41,7 +41,7 @@ module Streamly.Internal.FileSystem.OS_PATH -- * Conversions , IsPath (..) , adapt - , normalize + , normalizedEq -- * Construction , fromChunk @@ -362,48 +362,43 @@ append (OS_PATH a) (OS_PATH b) = $ Common.append Common.OS_NAME (Common.toString Unicode.UNICODE_DECODER) a b --- | Normalize the path. +-- | Compare 2 paths in their normalized form -- --- The behaviour is similar to FilePath.normalise. +-- >>> Path.normalizedEq [path|/file/\\test////|] [path|/file/\\test/|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|/file/\test////|] --- "/file/\\test/" +-- >>> Path.normalizedEq [path|/file/./test|] [path|/file/test|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|/file/./test|] --- "/file/test" +-- >>> Path.normalizedEq [path|/test/file/../bob/fred/|] [path|/test/file/../bob/fred/|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|/test/file/../bob/fred/|] --- "/test/file/../bob/fred/" +-- >>> Path.normalizedEq [path|../bob/fred/|] [path|../bob/fred/|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|../bob/fred/|] --- "../bob/fred/" +-- >>> Path.normalizedEq [path|/a/../c|] [path|/a/../c|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|/a/../c|] --- "/a/../c" +-- >>> Path.normalizedEq [path|./bob/fred/|] [path|bob/fred/|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|./bob/fred/|] --- "bob/fred/" +-- >>> Path.normalizedEq [path|./|] [path|./|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|.|] --- "." +-- >>> Path.normalizedEq [path|./.|] [path|./|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|./|] --- "./" +-- >>> Path.normalizedEq [path|/./|] [path|/|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|./.|] --- "./" +-- >>> Path.normalizedEq [path|/|] [path|/|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|/./|] --- "/" +-- >>> Path.normalizedEq [path|bob/fred/.|] [path|bob/fred/|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|/|] --- "/" +-- >>> Path.normalizedEq [path|//home|] [path|/home|] +-- True -- --- >>> Path.toString $ Path.normalize $ [path|bob/fred/.|] --- "bob/fred/" --- --- >>> Path.toString $ Path.normalize $ [path|//home|] --- "/home" --- -normalize :: OS_PATH -> OS_PATH -normalize (OS_PATH a) = OS_PATH $ Common.normalize Common.OS_NAME a +normalizedEq :: OS_PATH -> OS_PATH -> Bool +normalizedEq (OS_PATH a) (OS_PATH b) = Common.normalizedEq Common.OS_NAME a b diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index b8df8a60a2..b9fe00aca9 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -457,8 +457,8 @@ library -- Only those modules should be here which are fully re-exported via some -- other module. - other-modules: - Streamly.Internal.Data.Fold.Step + + , Streamly.Internal.Data.Fold.Step , Streamly.Internal.Data.Fold.Type , Streamly.Internal.Data.Fold.Combinators , Streamly.Internal.Data.Fold.Container From 7230d9caceaed24b667b863ff2b8c8ba40ca577d Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 19 Dec 2024 22:38:46 +0530 Subject: [PATCH 3/6] fixup! Add a normalized path equality check operation --- streamly.cabal | 1 - test/Streamly/Test/FileSystem/Path.hs | 61 --------------------------- test/streamly-tests.cabal | 6 --- 3 files changed, 68 deletions(-) delete mode 100644 test/Streamly/Test/FileSystem/Path.hs diff --git a/streamly.cabal b/streamly.cabal index ca092d096b..6c7295e28b 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -127,7 +127,6 @@ extra-source-files: test/Streamly/Test/FileSystem/Event/Windows.hs test/Streamly/Test/FileSystem/Event/Linux.hs test/Streamly/Test/FileSystem/Handle.hs - test/Streamly/Test/FileSystem/Path.hs test/Streamly/Test/Network/Socket.hs test/Streamly/Test/Network/Inet/TCP.hs test/Streamly/Test/Prelude.hs diff --git a/test/Streamly/Test/FileSystem/Path.hs b/test/Streamly/Test/FileSystem/Path.hs deleted file mode 100644 index e2f7873740..0000000000 --- a/test/Streamly/Test/FileSystem/Path.hs +++ /dev/null @@ -1,61 +0,0 @@ --- | --- Module : Streamly.Test.FileSystem.Path --- Copyright : (c) 2021 Composewell Technologies --- License : BSD-3-Clause --- Maintainer : streamly@composewell.com --- Stability : experimental --- Portability : GHC --- - -module Streamly.Test.FileSystem.Path (main) where - -import qualified System.FilePath as FilePath -import qualified Streamly.Internal.FileSystem.Path as Path - -import Test.Hspec as H - -moduleName :: String -moduleName = "FileSystem.Path" - -testNormalize :: String -> Spec -testNormalize inp = - it ("normalize: " ++ show inp) $ do - p <- Path.fromString inp - let expected = FilePath.normalise inp - got = Path.toString (Path.normalize p) - got `shouldBe` expected - -main :: IO () -main = - hspec $ - H.parallel $ - describe moduleName $ do - describe "normalize" $ do - -- Primarily for Windows - testNormalize "C:\\" - testNormalize "C:" - testNormalize "\\\\?\\c:\\" - testNormalize "c:\\file/bob\\" - testNormalize "c:\\" - testNormalize "c:\\\\\\\\" - testNormalize "C:.\\" - testNormalize "\\\\server\\test" - testNormalize "//server/test" - testNormalize "c:/file" - testNormalize "/file" - testNormalize "\\" - -- Primarily for Posix - testNormalize "/./" - testNormalize "/file/\\test////" - testNormalize "/file/./test" - testNormalize "/test/file/../bob/fred/" - testNormalize "../bob/fred/" - testNormalize "/a/../c" - testNormalize "./bob/fred/" - testNormalize "." - testNormalize "./" - testNormalize "./." - testNormalize "/./" - testNormalize "/" - testNormalize "bob/fred/." - testNormalize "//home" diff --git a/test/streamly-tests.cabal b/test/streamly-tests.cabal index 4564d2a87b..bef27c55f4 100644 --- a/test/streamly-tests.cabal +++ b/test/streamly-tests.cabal @@ -445,12 +445,6 @@ test-suite FileSystem.Handle if flag(use-streamly-core) buildable: False -test-suite FileSystem.Path - import: test-options - type: exitcode-stdio-1.0 - main-is: Streamly/Test/FileSystem/Path.hs - ghc-options: -main-is Streamly.Test.FileSystem.Path.main - test-suite Network.Inet.TCP import: lib-options type: exitcode-stdio-1.0 From 1ff37a3b9fdcec425c76f2f6bb08c68105ecfa41 Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 19 Dec 2024 22:39:20 +0530 Subject: [PATCH 4/6] fixup! fixup! Add a normalized path equality check operation --- core/streamly-core.cabal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index b9fe00aca9..b8df8a60a2 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -457,8 +457,8 @@ library -- Only those modules should be here which are fully re-exported via some -- other module. - - , Streamly.Internal.Data.Fold.Step + other-modules: + Streamly.Internal.Data.Fold.Step , Streamly.Internal.Data.Fold.Type , Streamly.Internal.Data.Fold.Combinators , Streamly.Internal.Data.Fold.Container From 72c96bdff7bcf086ed475709efc0939766d0bce4 Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 19 Dec 2024 22:43:34 +0530 Subject: [PATCH 5/6] fixup! fixup! fixup! Add a normalized path equality check operation --- core/src/Streamly/Internal/FileSystem/Path/Common.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index 1b580ece1a..a421515715 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -773,11 +773,11 @@ splitPath os arr = -- isNotFileLocation :: (Integral a, Unbox a) => OS -> Array a -> Bool isNotFileLocation os arr = - (arrLen > 0 && (isSeparator os lastChar || lastChar == winDriveSep)) + (arrLen == 0) + || (arrLen > 0 && (isSeparator os lastChar)) || (arrLen > 1 && isSeparator os sndlastChar && lastChar == '.') where - winDriveSep = ':' arrLen = Array.length arr lastChar = unsafeIndexChar (arrLen - 1) arr sndlastChar = unsafeIndexChar (arrLen - 2) arr @@ -863,8 +863,8 @@ normalizedEq Posix a b = unsafePerformIO $ do normalizedEq Windows a b = unsafePerformIO $ do let (da, pa) = spanDrive a (db, pb) = spanDrive b - nFA = isNotFileLocation Windows a - nFB = isNotFileLocation Windows b + nFA = isNotFileLocation Windows pa + nFB = isNotFileLocation Windows pb if nFA == nFB && Array.byteEq (normalizeDrive da) (normalizeDrive db) then Stream.eqBy Array.byteEq (splitPath Windows pa) (splitPath Windows pb) else pure False From 005316336c188da4e253f3341b02d6b18c168a3a Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 19 Dec 2024 22:46:53 +0530 Subject: [PATCH 6/6] fixup! fixup! fixup! fixup! Add a normalized path equality check operation --- core/src/Streamly/Internal/FileSystem/Path/Common.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index a421515715..113600b866 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -742,7 +742,7 @@ splitPath os arr = -- :} -- -- >>> isNotFileLocation Common.Posix "" --- False +-- True -- -- >>> isNotFileLocation Common.Posix "/" -- True @@ -763,7 +763,7 @@ splitPath os arr = -- True -- -- >>> isNotFileLocation Common.Windows "" --- False +-- True -- -- >>> isNotFileLocation Common.Windows "home\\user" -- False