Skip to content

Commit

Permalink
Test the correctness of functions used in the DirIO benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
adithyaov committed Feb 17, 2025
1 parent 7115878 commit cfd2120
Showing 1 changed file with 72 additions and 17 deletions.
89 changes: 72 additions & 17 deletions benchmark/Streamly/Benchmark/FileSystem/DirIO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
-- Portability : GHC

{-# LANGUAGE CPP #-}
{-# LANGUAGE QuasiQuotes #-}

module Main (main) where

Expand All @@ -16,14 +17,18 @@ module Main (main) where

import Data.Maybe (fromJust)
import Data.Word (Word8)
import GHC.IO.Encoding (setLocaleEncoding, utf8)
import Streamly.Data.Array (Array)
import Streamly.Data.Stream (Stream)
import Streamly.Data.Unfold (Unfold)
import Streamly.FileSystem.Path (Path)
import System.Process (callCommand)
import Streamly.Unicode.String (str)
import System.Process (callCommand, readCreateProcess, shell)
import Streamly.Benchmark.Common (o_1_space_prefix)
import Streamly.Benchmark.Common.Handle (scratchDir)

import qualified Streamly.Unicode.Stream as Unicode
import qualified Streamly.Internal.Unicode.Stream as Unicode (lines)
import qualified Streamly.Data.Stream.Prelude as Stream
import qualified Streamly.Data.Fold as Fold
import qualified Streamly.Data.Array as Array
Expand All @@ -48,7 +53,7 @@ import Prelude hiding (last, length)
import Test.Tasty.Bench hiding (env)

--------------------------------------------------------------------------------
-- Functions
-- Helpers
--------------------------------------------------------------------------------

_concatIterateWith :: (StreamK.StreamK IO (Either Path Path)
Expand Down Expand Up @@ -82,9 +87,9 @@ _streamDirByteChunked
:: Either [Path] b -> Stream IO (Either [Path] (Array Word8))
_streamDirByteChunked = either Dir.readEitherByteChunks (const Stream.nil)

_streamDirByteChunkedMaybe
streamDirByteChunkedMaybe
:: Either [Path] b -> Maybe (Stream IO (Either [Path] (Array Word8)))
_streamDirByteChunkedMaybe =
streamDirByteChunkedMaybe =
either (Just . Dir.readEitherByteChunks) (const Nothing)

_streamDirChunked :: Either [Path] b -> Stream IO (Either [Path] [Path])
Expand All @@ -93,31 +98,32 @@ _streamDirChunked = either Dir.readEitherChunks (const Stream.nil)
streamDirChunkedMaybe :: Either [Path] b -> Maybe (Stream IO (Either [Path] [Path]))
streamDirChunkedMaybe = either (Just . Dir.readEitherChunks) (const Nothing)

--------------------------------------------------------------------------------
-- Functions
--------------------------------------------------------------------------------

#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
-- Fastest implementation, only works for posix as of now.
listDirByteChunked :: FilePath -> IO ()
listDirByteChunked :: FilePath -> Stream IO (Array Word8)
listDirByteChunked inp = do
Stream.fold Fold.drain
$ Stream.catRights
$ Stream.concatIterateDfs streamDirChunkedMaybe
Stream.catRights
$ Stream.concatIterateDfs streamDirByteChunkedMaybe
$ Stream.fromPure (Left [fromJust $ Path.fromString inp])
#endif

-- Faster than the listDir implementation below
listDirChunked :: FilePath -> IO ()
listDirChunked :: [Char] -> Stream IO Word8
listDirChunked inp = do
Stream.fold Fold.drain
$ Stream.unfoldEachEndBy 10 Array.reader
Stream.unfoldEachEndBy 10 Array.reader
$ fmap Path.toChunk
$ Stream.unfoldEach Unfold.fromList
$ fmap (either id id)
$ Stream.concatIterateDfs streamDirChunkedMaybe
$ Stream.fromPure (Left [fromJust $ Path.fromString inp])

listDir :: FilePath -> IO ()
listDir :: [Char] -> Stream IO Word8
listDir inp = do
Stream.fold Fold.drain
$ Stream.unfoldEachEndBy 10 Array.reader
Stream.unfoldEachEndBy 10 Array.reader
$ fmap (Path.toChunk . either id id)
$ Stream.unfoldIterateDfs unfoldDir
$ Stream.fromPure (Left (fromJust $ Path.fromString inp))
Expand All @@ -138,16 +144,65 @@ createDirStucture = do
callCommand cmd
return dirRoot

testListDirCorrectness :: FilePath -> IO ()
testListDirCorrectness dirStructure = do
-- XXX We should just use streamly-process instead
findRes <- readCreateProcess ((shell [str|find #{dirStructure}|])) ""
let strmBase =
StreamK.toStream
$ StreamK.sortBy compare
$ StreamK.fromStream
$ Unicode.lines Fold.toList $ Stream.fromList findRes
let strm1 =
StreamK.toStream
$ StreamK.sortBy compare
$ StreamK.fromStream
$ Unicode.lines Fold.toList
$ Unicode.decodeUtf8 $ listDirChunked dirStructure
strm2 =
StreamK.toStream
$ StreamK.sortBy compare
$ StreamK.fromStream
$ Unicode.lines Fold.toList
$ Unicode.decodeUtf8 $ listDir dirStructure
strm3 =
StreamK.toStream
$ StreamK.sortBy compare
$ StreamK.fromStream
$ Unicode.lines Fold.toList
$ Unicode.decodeUtf8Chunks $ listDirByteChunked dirStructure
isCorrect1 <- Stream.eqBy (==) strm1 strmBase
if isCorrect1
then putStrLn "The listDirChunked output is correct."
else error "Output test failed"
isCorrect2 <- Stream.eqBy (==) strm2 strmBase
if isCorrect2
then putStrLn "The listDir output is correct."
else error "Output test failed"
-- NOTE: The behaviour of listDirByteChunked is slightly different. It does
-- not emit the first element.
isCorrect3 <- Stream.eqBy (==) (Stream.cons dirStructure strm3) strmBase
if isCorrect3
then putStrLn "The listDirByteChunked output is correct."
else error "Output test failed"

-- | List the current directory recursively
main :: IO ()
main = do
setLocaleEncoding utf8

bigDirStructure <- createDirStucture
testListDirCorrectness "benchmark-tmp/dir-structure"

defaultMain
[ bgroup (o_1_space_prefix moduleName)
[ bench "listDirChunked" $ nfIO $ listDirChunked bigDirStructure
[ bench "listDirChunked" $ nfIO $
Stream.fold Fold.drain $ listDirChunked bigDirStructure
#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
, bench "listDirByteChunked" $ nfIO $ listDirByteChunked bigDirStructure
, bench "listDirByteChunked" $ nfIO $
Stream.fold Fold.drain $ listDirByteChunked bigDirStructure
#endif
, bench "listDir" $ nfIO $ listDir bigDirStructure
, bench "listDir" $ nfIO $
Stream.fold Fold.drain $ listDir bigDirStructure
]
]

0 comments on commit cfd2120

Please sign in to comment.