From a9b2dcf7c09b6039f5fab33ad8e1bde3cd6251ba Mon Sep 17 00:00:00 2001 From: John Ky Date: Sun, 30 Jun 2019 21:13:58 +1000 Subject: [PATCH] Use hw-json-standard-cursor-dependency --- .../HaskellWorks/Data/Json/Standard/Cursor.hs | 3 - .../Data/Json/Standard/Cursor/Fast.hs | 44 --- .../Data/Json/Standard/Cursor/Generic.hs | 50 ---- .../Data/Json/Standard/Cursor/Index.hs | 24 -- .../Json/Standard/Cursor/Internal/Blank.hs | 58 ---- .../Standard/Cursor/Internal/BlankedJson.hs | 24 -- .../Json/Standard/Cursor/Internal/IbBp.hs | 24 -- .../Standard/Cursor/Internal/MakeIndex.hs | 134 --------- .../Standard/Cursor/Internal/StateMachine.hs | 109 ------- .../Cursor/Internal/ToBalancedParens64.hs | 28 -- .../Cursor/Internal/ToInterestBits64.hs | 31 -- .../Json/Standard/Cursor/Internal/Word8.hs | 72 ----- .../Data/Json/Standard/Cursor/Load/Cursor.hs | 44 --- .../Data/Json/Standard/Cursor/Load/Raw.hs | 20 -- .../Data/Json/Standard/Cursor/SemiIndex.hs | 269 ------------------ .../Data/Json/Standard/Cursor/Slow.hs | 43 --- .../Data/Json/Standard/Cursor/Specific.hs | 11 - .../Data/Json/Standard/Cursor/Type.hs | 71 ----- hw-json.cabal | 94 ++---- 19 files changed, 31 insertions(+), 1122 deletions(-) delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Fast.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Generic.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Index.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Blank.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/BlankedJson.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/IbBp.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/MakeIndex.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/StateMachine.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToBalancedParens64.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToInterestBits64.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Word8.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Cursor.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Raw.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/SemiIndex.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Slow.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Specific.hs delete mode 100644 hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Type.hs diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor.hs deleted file mode 100644 index d4c174d..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor.hs +++ /dev/null @@ -1,3 +0,0 @@ -module HaskellWorks.Data.Json.Standard.Cursor - ( - ) where diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Fast.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Fast.hs deleted file mode 100644 index f5d34f8..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Fast.hs +++ /dev/null @@ -1,44 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Fast - ( Cursor - , fromByteString - , fromForeignRegion - , fromString - ) where - -import Data.Word -import Foreign.ForeignPtr -import HaskellWorks.Data.Json.Standard.Cursor.Generic -import HaskellWorks.Data.Json.Standard.Cursor.Specific -import HaskellWorks.Data.RankSelect.CsPoppy - -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as BSC -import qualified Data.ByteString.Internal as BSI -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.BalancedParens.RangeMin as RM -import qualified HaskellWorks.Data.FromForeignRegion as F -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.IbBp as J - -data Fast - -instance SpecificCursor Fast where - type CursorOf Fast = Cursor - -type Cursor = GenericCursor BS.ByteString CsPoppy (RM.RangeMin (DVS.Vector Word64)) - -fromByteString :: BS.ByteString -> Cursor -fromByteString bs = GenericCursor - { cursorText = bs - , interests = makeCsPoppy ib - , balancedParens = RM.mkRangeMin bp - , cursorRank = 1 - } - where J.IbBp ib bp = J.toIbBp bs - -fromForeignRegion :: F.ForeignRegion -> Cursor -fromForeignRegion (fptr, offset, size) = fromByteString (BSI.fromForeignPtr (castForeignPtr fptr) offset size) - -fromString :: String -> Cursor -fromString = fromByteString . BSC.pack diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Generic.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Generic.hs deleted file mode 100644 index f231129..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Generic.hs +++ /dev/null @@ -1,50 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeFamilies #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Generic - ( GenericCursor(..) - , jsonCursorPos - ) where - -import HaskellWorks.Data.Positioning -import HaskellWorks.Data.RankSelect.Base.Rank0 -import HaskellWorks.Data.RankSelect.Base.Rank1 -import HaskellWorks.Data.RankSelect.Base.Select1 -import HaskellWorks.Data.TreeCursor -import Prelude hiding (drop) - -import qualified HaskellWorks.Data.BalancedParens as BP - -data GenericCursor t v w = GenericCursor - { cursorText :: !t - , interests :: !v - , balancedParens :: !w - , cursorRank :: !Count - } - deriving (Eq, Show) - -instance (BP.BalancedParens u, Rank1 u, Rank0 u) => TreeCursor (GenericCursor t v u) where - firstChild :: GenericCursor t v u -> Maybe (GenericCursor t v u) - firstChild k = let mq = BP.firstChild (balancedParens k) (cursorRank k) in (\q -> k { cursorRank = q }) <$> mq - - nextSibling :: GenericCursor t v u -> Maybe (GenericCursor t v u) - nextSibling k = (\q -> k { cursorRank = q }) <$> BP.nextSibling (balancedParens k) (cursorRank k) - - parent :: GenericCursor t v u -> Maybe (GenericCursor t v u) - parent k = let mq = BP.parent (balancedParens k) (cursorRank k) in (\q -> k { cursorRank = q }) <$> mq - - depth :: GenericCursor t v u -> Maybe Count - depth k = BP.depth (balancedParens k) (cursorRank k) - - subtreeSize :: GenericCursor t v u -> Maybe Count - subtreeSize k = BP.subtreeSize (balancedParens k) (cursorRank k) - -jsonCursorPos :: (Rank1 w, Select1 v) => GenericCursor s v w -> Position -jsonCursorPos k = toPosition (select1 ik (rank1 bpk (cursorRank k)) - 1) - where ik = interests k - bpk = balancedParens k diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Index.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Index.hs deleted file mode 100644 index 3b6c779..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Index.hs +++ /dev/null @@ -1,24 +0,0 @@ -{-# LANGUAGE BangPatterns #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Index - ( indexJson - ) where - -import Data.Word -import HaskellWorks.Data.BalancedParens.Simple -import HaskellWorks.Data.Json.Standard.Cursor.Generic - -import qualified Data.ByteString as BS -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.ByteString as BS -import qualified HaskellWorks.Data.Json.Standard.Cursor.Slow as SLOW - -indexJson :: String -> IO () -indexJson filename = do - bs <- BS.mmap filename - -- We use the SLOW reference implementation because we are writing to a file and will never query. - let GenericCursor _ !ib (SimpleBalancedParens !bp) _ = SLOW.fromByteString bs - let wib = DVS.unsafeCast ib :: DVS.Vector Word8 - let wbp = DVS.unsafeCast bp :: DVS.Vector Word8 - BS.writeFile (filename ++ ".ib.idx") (BS.toByteString wib) - BS.writeFile (filename ++ ".bp.idx") (BS.toByteString wbp) diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Blank.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Blank.hs deleted file mode 100644 index f27709b..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Blank.hs +++ /dev/null @@ -1,58 +0,0 @@ -{-# LANGUAGE BangPatterns #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Internal.Blank - ( blankJson - ) where - -import Data.ByteString (ByteString) -import Data.Word -import Data.Word8 -import HaskellWorks.Data.Json.Standard.Cursor.Internal.Word8 -import Prelude as P - -import qualified Data.ByteString as BS - -data BlankState - = Escaped - | InJson - | InString - | InNumber - | InIdent - -blankJson :: [BS.ByteString] -> [BS.ByteString] -blankJson = blankJson' InJson - -blankJson' :: BlankState -> [BS.ByteString] -> [BS.ByteString] -blankJson' lastState as = case as of - (bs:bss) -> - let (!cs, Just (!nextState, _)) = BS.unfoldrN (BS.length bs) blankByteString (lastState, bs) in - cs:blankJson' nextState bss - [] -> [] - where - blankByteString :: (BlankState, ByteString) -> Maybe (Word8, (BlankState, ByteString)) - blankByteString (InJson, bs) = case BS.uncons bs of - Just (!c, !cs) | isLeadingDigit c -> Just (_1 , (InNumber , cs)) - Just (!c, !cs) | c == _quotedbl -> Just (_parenleft , (InString , cs)) - Just (!c, !cs) | isAlphabetic c -> Just (c , (InIdent , cs)) - Just (!c, !cs) -> Just (c , (InJson , cs)) - Nothing -> Nothing - blankByteString (InString, bs) = case BS.uncons bs of - Just (!c, !cs) | c == _backslash -> Just (_space , (Escaped , cs)) - Just (!c, !cs) | c == _quotedbl -> Just (_parenright , (InJson , cs)) - Just (_ , !cs) -> Just (_space , (InString , cs)) - Nothing -> Nothing - blankByteString (Escaped, bs) = case BS.uncons bs of - Just (_, !cs) -> Just (_space, (InString, cs)) - Nothing -> Nothing - blankByteString (InNumber, bs) = case BS.uncons bs of - Just (!c, !cs) | isTrailingDigit c -> Just (_0 , (InNumber , cs)) - Just (!c, !cs) | c == _quotedbl -> Just (_parenleft , (InString , cs)) - Just (!c, !cs) | isAlphabetic c -> Just (c , (InIdent , cs)) - Just (!c, !cs) -> Just (c , (InJson , cs)) - Nothing -> Nothing - blankByteString (InIdent, bs) = case BS.uncons bs of - Just (!c, !cs) | isAlphabetic c -> Just (_underscore , (InIdent , cs)) - Just (!c, !cs) | isLeadingDigit c -> Just (_1 , (InNumber , cs)) - Just (!c, !cs) | c == _quotedbl -> Just (_parenleft , (InString , cs)) - Just (!c, !cs) -> Just (c , (InJson , cs)) - Nothing -> Nothing diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/BlankedJson.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/BlankedJson.hs deleted file mode 100644 index 0cb04b6..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/BlankedJson.hs +++ /dev/null @@ -1,24 +0,0 @@ - -module HaskellWorks.Data.Json.Standard.Cursor.Internal.BlankedJson - ( BlankedJson(..) - , ToBlankedJson(..) - , toBlankedJsonTyped - ) where - -import HaskellWorks.Data.ByteString -import HaskellWorks.Data.Json.Standard.Cursor.Internal.Blank - -import qualified Data.ByteString as BS - -newtype BlankedJson = BlankedJson - { unBlankedJson :: [BS.ByteString] - } deriving (Eq, Show) - -class ToBlankedJson a where - toBlankedJson :: a -> [BS.ByteString] - -instance ToBlankedJson BS.ByteString where - toBlankedJson bs = blankJson (chunkedBy 4096 bs) - -toBlankedJsonTyped :: ToBlankedJson a => a -> BlankedJson -toBlankedJsonTyped = BlankedJson . toBlankedJson diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/IbBp.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/IbBp.hs deleted file mode 100644 index 5dfd8b6..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/IbBp.hs +++ /dev/null @@ -1,24 +0,0 @@ -module HaskellWorks.Data.Json.Standard.Cursor.Internal.IbBp where - -import Data.Word - -import qualified Data.ByteString as BS -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.BlankedJson as J -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.ToBalancedParens64 as J -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.ToInterestBits64 as J - -data IbBp = IbBp - { ib :: DVS.Vector Word64 - , bp :: DVS.Vector Word64 - } - -class ToIbBp a where - toIbBp :: a -> IbBp - -instance ToIbBp BS.ByteString where - toIbBp bs = IbBp - { ib = J.toInterestBits64 blankedJson - , bp = J.toBalancedParens64 blankedJson - } - where blankedJson = J.toBlankedJsonTyped bs diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/MakeIndex.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/MakeIndex.hs deleted file mode 100644 index 50cc429..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/MakeIndex.hs +++ /dev/null @@ -1,134 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RankNTypes #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Internal.MakeIndex - ( blankedJsonToInterestBits - , byteStringToBits - , blankedJsonToBalancedParens - , compressWordAsBit - , interestingWord8s - ) where - -import Control.Monad -import Data.Array.Unboxed ((!)) -import Data.ByteString (ByteString) -import Data.Int -import Data.Word -import Data.Word8 -import HaskellWorks.Data.Bits.BitWise -import Prelude as P - -import qualified Data.Array.Unboxed as A -import qualified Data.Bits as BITS -import qualified Data.ByteString as BS - -interestingWord8s :: A.UArray Word8 Word8 -interestingWord8s = A.array (0, 255) [ - (w, if w == _bracketleft || w == _braceleft || w == _parenleft || w == _t || w == _f || w == _n || w == _1 - then 1 - else 0) - | w <- [0 .. 255]] - -blankedJsonToInterestBits :: [BS.ByteString] -> [BS.ByteString] -blankedJsonToInterestBits = blankedJsonToInterestBits' "" - -padRight :: Word8 -> Int -> BS.ByteString -> BS.ByteString -padRight w n bs = if BS.length bs >= n then bs else fst (BS.unfoldrN n gen bs) - where gen :: ByteString -> Maybe (Word8, ByteString) - gen cs = case BS.uncons cs of - Just (c, ds) -> Just (c, ds) - Nothing -> Just (w, BS.empty) - -blankedJsonToInterestBits' :: BS.ByteString -> [BS.ByteString] -> [BS.ByteString] -blankedJsonToInterestBits' rs as = case as of - (bs:bss) -> - let cs = if BS.length rs /= 0 then BS.concat [rs, bs] else bs in - let lencs = BS.length cs in - let q = lencs + 7 `quot` 8 in - let (ds, es) = BS.splitAt (q * 8) cs in - let (fs, _) = BS.unfoldrN q gen ds in - fs:blankedJsonToInterestBits' es bss - [] -> [] - where gen :: ByteString -> Maybe (Word8, ByteString) - gen ds = if BS.length ds == 0 - then Nothing - else Just ( BS.foldr (\b m -> (interestingWord8s ! b) .|. (m .<. 1)) 0 (padRight 0 8 (BS.take 8 ds)) - , BS.drop 8 ds - ) - -repartitionMod8 :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString) -repartitionMod8 aBS bBS = (BS.take cLen abBS, BS.drop cLen abBS) - where abBS = BS.concat [aBS, bBS] - abLen = BS.length abBS - cLen = (abLen `div` 8) * 8 - -compressWordAsBit :: [BS.ByteString] -> [BS.ByteString] -compressWordAsBit = compressWordAsBit' BS.empty - -compressWordAsBit' :: BS.ByteString -> [BS.ByteString] -> [BS.ByteString] -compressWordAsBit' aBS as = case as of - (bBS:bBSs) -> - let (cBS, dBS) = repartitionMod8 aBS bBS in - let (cs, _) = BS.unfoldrN (BS.length cBS + 7 `div` 8) gen cBS in - cs:compressWordAsBit' dBS bBSs - [] -> do - let (cs, _) = BS.unfoldrN (BS.length aBS + 7 `div` 8) gen aBS - [cs] - where gen :: ByteString -> Maybe (Word8, ByteString) - gen xs = if BS.length xs == 0 - then Nothing - else Just ( BS.foldr (\b m -> ((b .&. 1) .|. (m .<. 1))) 0 (padRight 0 8 (BS.take 8 xs)) - , BS.drop 8 xs - ) - -blankedJsonToBalancedParens :: [BS.ByteString] -> [BS.ByteString] -blankedJsonToBalancedParens as = case as of - (bs:bss) -> - let (cs, _) = BS.unfoldrN (BS.length bs * 2) gen (Nothing, bs) in - cs:blankedJsonToBalancedParens bss - [] -> [] - where gen :: (Maybe Bool, ByteString) -> Maybe (Word8, (Maybe Bool, ByteString)) - gen (Just True , bs) = Just (0xFF, (Nothing, bs)) - gen (Just False , bs) = Just (0x00, (Nothing, bs)) - gen (Nothing , bs) = case BS.uncons bs of - Just (c, cs) -> case balancedParensOf c of - MiniN -> gen (Nothing , cs) - MiniT -> Just (0xFF, (Nothing , cs)) - MiniF -> Just (0x00, (Nothing , cs)) - MiniTF -> Just (0xFF, (Just False , cs)) - Nothing -> Nothing - -data MiniBP = MiniN | MiniT | MiniF | MiniTF - -balancedParensOf :: Word8 -> MiniBP -balancedParensOf c = case c of - d | d == _braceleft -> MiniT - d | d == _braceright -> MiniF - d | d == _bracketleft -> MiniT - d | d == _bracketright -> MiniF - d | d == _parenleft -> MiniT - d | d == _parenright -> MiniF - d | d == _t -> MiniTF - d | d == _f -> MiniTF - d | d == _1 -> MiniTF - d | d == _n -> MiniTF - _ -> MiniN - -yieldBitsOfWord8 :: Word8 -> [Bool] -> [Bool] -yieldBitsOfWord8 w = - (((w .&. BITS.bit 0) /= 0):) . - (((w .&. BITS.bit 1) /= 0):) . - (((w .&. BITS.bit 2) /= 0):) . - (((w .&. BITS.bit 3) /= 0):) . - (((w .&. BITS.bit 4) /= 0):) . - (((w .&. BITS.bit 5) /= 0):) . - (((w .&. BITS.bit 6) /= 0):) . - (((w .&. BITS.bit 7) /= 0):) - -yieldBitsofWord8s :: [Word8] -> [Bool] -> [Bool] -yieldBitsofWord8s = P.foldr ((>>) . yieldBitsOfWord8) id - -byteStringToBits :: [BS.ByteString] -> [Bool] -> [Bool] -byteStringToBits as = case as of - (bs:bss) -> yieldBitsofWord8s (BS.unpack bs) . byteStringToBits bss - [] -> id diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/StateMachine.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/StateMachine.hs deleted file mode 100644 index 9d70991..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/StateMachine.hs +++ /dev/null @@ -1,109 +0,0 @@ -{-# LANGUAGE BinaryLiterals #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE OverloadedStrings #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Internal.StateMachine - ( lookupPhiTable - , lookupTransitionTable - , phiTable - , phiTableSimd - , transitionTable - , transitionTableSimd - , IntState(..) - , State(..) - ) where - -import Data.Word -import HaskellWorks.Data.Bits.BitWise - -import qualified Data.Vector as DV -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.Word8 as W8 - -{-# ANN module ("HLint: ignore Redundant guard" :: String) #-} - -newtype IntState = IntState Int deriving (Eq, Ord, Show, Num) - -data State = InJson | InString | InEscape | InValue deriving (Eq, Enum, Bounded, Show) - -phiTable :: DV.Vector (DVS.Vector Word8) -phiTable = DV.constructN 5 gos - where gos :: DV.Vector (DVS.Vector Word8) -> DVS.Vector Word8 - gos v = DVS.constructN 256 go - where vi = DV.length v - go :: DVS.Vector Word8 -> Word8 - go u = fromIntegral (snd (stateMachine (fromIntegral ui) (toEnum vi))) - where ui = DVS.length u -{-# NOINLINE phiTable #-} - -phiTable2 :: DVS.Vector Word8 -phiTable2 = DVS.constructN (4 * fromIntegral iLen) go - where iLen = 256 :: Int - go :: DVS.Vector Word8 -> Word8 - go u = fromIntegral (snd (stateMachine (fromIntegral ui) (toEnum (fromIntegral uj)))) - where (uj, ui) = fromIntegral (DVS.length u) `divMod` iLen -{-# NOINLINE phiTable2 #-} - -lookupPhiTable :: IntState -> Word8 -> Word8 -lookupPhiTable (IntState s) w = DVS.unsafeIndex phiTable2 (s * 256 + fromIntegral w) -{-# INLINE lookupPhiTable #-} - -phiTableSimd :: DVS.Vector Word32 -phiTableSimd = DVS.constructN 256 go - where go :: DVS.Vector Word32 -> Word32 - go v = (snd (stateMachine vi InJson ) .<. 0) .|. - (snd (stateMachine vi InString) .<. 8) .|. - (snd (stateMachine vi InEscape) .<. 16) .|. - (snd (stateMachine vi InValue ) .<. 24) - where vi = fromIntegral (DVS.length v) -{-# NOINLINE phiTableSimd #-} - -transitionTable :: DV.Vector (DVS.Vector Word8) -transitionTable = DV.constructN 5 gos - where gos :: DV.Vector (DVS.Vector Word8) -> DVS.Vector Word8 - gos v = DVS.constructN 256 go - where vi = DV.length v - go :: DVS.Vector Word8 -> Word8 - go u = fromIntegral (fromEnum (fst (stateMachine ui (toEnum vi)))) - where ui = fromIntegral (DVS.length u) -{-# NOINLINE transitionTable #-} - -transitionTable2 :: DVS.Vector Word8 -transitionTable2 = DVS.constructN (4 * fromIntegral iLen) go - where iLen = 256 :: Int - go :: DVS.Vector Word8 -> Word8 - go u = fromIntegral (fromEnum (fst (stateMachine (fromIntegral ui) (toEnum (fromIntegral uj))))) - where (uj, ui) = fromIntegral (DVS.length u) `divMod` iLen -{-# NOINLINE transitionTable2 #-} - -lookupTransitionTable :: IntState -> Word8 -> IntState -lookupTransitionTable (IntState s) w = fromIntegral (DVS.unsafeIndex transitionTable2 (s * 256 + fromIntegral w)) -{-# INLINE lookupTransitionTable #-} - -transitionTableSimd :: DVS.Vector Word64 -transitionTableSimd = DVS.constructN 256 go - where go :: DVS.Vector Word64 -> Word64 - go v = fromIntegral (fromEnum (fst (stateMachine vi InJson ))) .|. - fromIntegral (fromEnum (fst (stateMachine vi InString))) .|. - fromIntegral (fromEnum (fst (stateMachine vi InEscape))) .|. - fromIntegral (fromEnum (fst (stateMachine vi InValue ))) - where vi = fromIntegral (DVS.length v) -{-# NOINLINE transitionTableSimd #-} - -stateMachine :: Word8 -> State -> (State, Word32) -stateMachine c InJson | W8.isOpen c = (InJson , 0b110) -stateMachine c InJson | W8.isClose c = (InJson , 0b001) -stateMachine c InJson | W8.isDelim c = (InJson , 0b000) -stateMachine c InJson | W8.isValueChar c = (InValue , 0b111) -stateMachine c InJson | W8.isDoubleQuote c = (InString, 0b111) -stateMachine _ InJson | otherwise = (InJson , 0b000) -stateMachine c InString | W8.isDoubleQuote c = (InJson , 0b000) -stateMachine c InString | W8.isBackSlash c = (InEscape, 0b000) -stateMachine _ InString | otherwise = (InString, 0b000) -stateMachine _ InEscape | otherwise = (InString, 0b000) -stateMachine c InValue | W8.isOpen c = (InJson , 0b110) -stateMachine c InValue | W8.isClose c = (InJson , 0b001) -stateMachine c InValue | W8.isDelim c = (InJson , 0b000) -stateMachine c InValue | W8.isValueChar c = (InValue , 0b000) -stateMachine _ InValue | otherwise = (InJson , 0b000) -{-# INLINE stateMachine #-} diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToBalancedParens64.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToBalancedParens64.hs deleted file mode 100644 index 25544af..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToBalancedParens64.hs +++ /dev/null @@ -1,28 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE MultiParamTypeClasses #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Internal.ToBalancedParens64 - ( ToBalancedParens64(..) - ) where - -import Control.Applicative -import Data.Word -import HaskellWorks.Data.Json.Standard.Cursor.Internal.MakeIndex - -import qualified Data.ByteString.Lazy as LBS -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.BlankedJson as J - -genBitWordsForever :: LBS.ByteString -> Maybe (Word8, LBS.ByteString) -genBitWordsForever bs = LBS.uncons bs <|> Just (0, bs) -{-# INLINE genBitWordsForever #-} - -class ToBalancedParens64 a where - toBalancedParens64 :: a -> DVS.Vector Word64 - -instance ToBalancedParens64 J.BlankedJson where - toBalancedParens64 (J.BlankedJson bj) = DVS.unsafeCast (DVS.unfoldrN newLen genBitWordsForever bpBS) - where bpBS = LBS.fromChunks (compressWordAsBit (blankedJsonToBalancedParens bj)) - newLen = fromIntegral ((LBS.length bpBS + 7) `div` 8 * 8) diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToInterestBits64.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToInterestBits64.hs deleted file mode 100644 index 4865bad..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/ToInterestBits64.hs +++ /dev/null @@ -1,31 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE MultiParamTypeClasses #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Internal.ToInterestBits64 - ( ToInterestBits64(..) - ) where - -import Control.Applicative -import Data.ByteString.Internal -import Data.Word -import HaskellWorks.Data.Json.Standard.Cursor.Internal.MakeIndex - -import qualified Data.ByteString as BS -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.BlankedJson as J - -class ToInterestBits64 a where - toInterestBits64 :: a -> DVS.Vector Word64 - -instance ToInterestBits64 J.BlankedJson where - toInterestBits64 bj = DVS.unsafeCast (DVS.unfoldrN newLen genInterestForever interestBS) - where interestBS = blankedJsonBssToInterestBitsBs (J.unBlankedJson bj) - newLen = (BS.length interestBS + 7) `div` 8 * 8 - -blankedJsonBssToInterestBitsBs :: [ByteString] -> ByteString -blankedJsonBssToInterestBitsBs bss = BS.concat $ blankedJsonToInterestBits bss - -genInterestForever :: ByteString -> Maybe (Word8, ByteString) -genInterestForever bs = BS.uncons bs <|> Just (0, bs) diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Word8.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Word8.hs deleted file mode 100644 index 6f956d6..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Internal/Word8.hs +++ /dev/null @@ -1,72 +0,0 @@ -module HaskellWorks.Data.Json.Standard.Cursor.Internal.Word8 where - -import Data.Word -import Data.Word8 hiding (isDigit) - -import qualified Data.Char as C - -isLeadingDigit :: Word8 -> Bool -isLeadingDigit w = w == _hyphen || (w >= _0 && w <= _9) - -isTrailingDigit :: Word8 -> Bool -isTrailingDigit w = w == _plus || w == _hyphen || (w >= _0 && w <= _9) || w == _period || w == _E || w == _e - -isAlphabetic :: Word8 -> Bool -isAlphabetic w = (w >= _A && w <= _Z) || (w >= _a && w <= _z) - -isDigit :: Word8 -> Bool -isDigit w = w >= _0 && w <= _9 - -wIsJsonNumberDigit :: Word8 -> Bool -wIsJsonNumberDigit w = (w >= _0 && w <= _9) || w == _hyphen - -doubleQuote :: Word8 -doubleQuote = fromIntegral (C.ord '"') - -backSlash :: Word8 -backSlash = fromIntegral (C.ord '\\') - -openBrace :: Word8 -openBrace = fromIntegral (C.ord '{') - -closeBrace :: Word8 -closeBrace = fromIntegral (C.ord '}') - -openBracket :: Word8 -openBracket = fromIntegral (C.ord '[') - -closeBracket :: Word8 -closeBracket = fromIntegral (C.ord ']') - -comma :: Word8 -comma = fromIntegral (C.ord ',') - -colon :: Word8 -colon = fromIntegral (C.ord ':') - -isPeriod :: Word8 -> Bool -isPeriod w = w == 46 - -isMinus :: Word8 -> Bool -isMinus w = w == 45 - -isPlus :: Word8 -> Bool -isPlus w = w == 43 - -isValueChar :: Word8 -> Bool -isValueChar c = isAlphabetic c || isDigit c || isPeriod c || isMinus c || isPlus c - -isOpen :: Word8 -> Bool -isOpen c = c == openBracket || c == openBrace - -isClose :: Word8 -> Bool -isClose c = c == closeBracket || c == closeBrace - -isDelim :: Word8 -> Bool -isDelim c = c == comma || c == colon - -isDoubleQuote :: Word8 -> Bool -isDoubleQuote c = c == doubleQuote - -isBackSlash :: Word8 -> Bool -isBackSlash c = c == backSlash diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Cursor.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Cursor.hs deleted file mode 100644 index c83823f..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Cursor.hs +++ /dev/null @@ -1,44 +0,0 @@ -{-# LANGUAGE BangPatterns #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Load.Cursor - ( loadCursor - , loadCursorWithIndex - , loadCursorWithCsPoppyIndex - , loadCursorWithCsPoppyIndex2 - ) where - -import Data.Word -import HaskellWorks.Data.BalancedParens.Simple -import HaskellWorks.Data.Json.Standard.Cursor.Generic -import HaskellWorks.Data.Json.Standard.Cursor.Load.Raw -import HaskellWorks.Data.RankSelect.CsPoppy - -import qualified Data.ByteString.Internal as BSI -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.ByteString as BS -import qualified HaskellWorks.Data.Json.Standard.Cursor.Fast as FAST - -loadCursor :: String -> IO FAST.Cursor -loadCursor path = do - bs <- BS.mmap path - let !cursor = FAST.fromByteString bs - return cursor - -loadCursorWithIndex :: String -> IO (GenericCursor BSI.ByteString (DVS.Vector Word64) (SimpleBalancedParens (DVS.Vector Word64))) -loadCursorWithIndex filename = do - (jsonBS, jsonIb, jsonBp) <- loadRawWithIndex filename - let cursor = GenericCursor jsonBS jsonIb (SimpleBalancedParens jsonBp) 1 - return cursor - -loadCursorWithCsPoppyIndex :: String -> IO (GenericCursor BSI.ByteString CsPoppy (SimpleBalancedParens (DVS.Vector Word64))) -loadCursorWithCsPoppyIndex filename = do - (jsonBS, jsonIb, jsonBp) <- loadRawWithIndex filename - let cursor = GenericCursor jsonBS (makeCsPoppy jsonIb) (SimpleBalancedParens jsonBp) 1 - return cursor - -loadCursorWithCsPoppyIndex2 :: String -> IO (GenericCursor BSI.ByteString CsPoppy (SimpleBalancedParens CsPoppy)) -loadCursorWithCsPoppyIndex2 filename = do - (jsonBS, jsonIb, jsonBp) <- loadRawWithIndex filename - let cursor = GenericCursor jsonBS (makeCsPoppy jsonIb) (SimpleBalancedParens (makeCsPoppy jsonBp)) 1 - :: GenericCursor BSI.ByteString CsPoppy (SimpleBalancedParens CsPoppy) - return cursor diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Raw.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Raw.hs deleted file mode 100644 index a226fde..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Load/Raw.hs +++ /dev/null @@ -1,20 +0,0 @@ -module HaskellWorks.Data.Json.Standard.Cursor.Load.Raw - ( loadRawWithIndex - ) where - -import Data.Word -import HaskellWorks.Data.FromForeignRegion -import System.IO.MMap - -import qualified Data.ByteString as BS -import qualified Data.Vector.Storable as DVS - -loadRawWithIndex :: String -> IO (BS.ByteString, DVS.Vector Word64, DVS.Vector Word64) -loadRawWithIndex filename = do - jsonFr <- mmapFileForeignPtr filename ReadOnly Nothing - jsonIbFr <- mmapFileForeignPtr (filename ++ ".ib.idx") ReadOnly Nothing - jsonBpFr <- mmapFileForeignPtr (filename ++ ".bp.idx") ReadOnly Nothing - let jsonBS = fromForeignRegion jsonFr :: BS.ByteString - let jsonIb = fromForeignRegion jsonIbFr :: DVS.Vector Word64 - let jsonBp = fromForeignRegion jsonBpFr :: DVS.Vector Word64 - return (jsonBS, jsonIb, jsonBp) diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/SemiIndex.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/SemiIndex.hs deleted file mode 100644 index 1055ff1..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/SemiIndex.hs +++ /dev/null @@ -1,269 +0,0 @@ -{-# LANGUAGE BinaryLiterals #-} -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE DeriveTraversable #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE MultiWayIf #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} - -module HaskellWorks.Data.Json.Standard.Cursor.SemiIndex - ( semiIndexBuilder - , SemiIndex(..) - , PreSiChunk(..) - , SiChunk(..) - , buildSemiIndex - , State(..) - , buildFromByteString2 - , buildFromByteString3 - , toIbBpBuilders - ) where - -import Control.Monad.ST -import Data.Bits.Pdep -import Data.Bits.Pext -import Data.Word -import Foreign.Storable (Storable (..)) -import HaskellWorks.Data.Bits.BitWise -import HaskellWorks.Data.Bits.PopCount.PopCount1 -import HaskellWorks.Data.Json.Standard.Cursor.Internal.StateMachine (IntState (..), State (..)) -import HaskellWorks.Data.Vector.AsVector64 - -import qualified Data.ByteString as BS -import qualified Data.ByteString.Builder as B -import qualified Data.ByteString.Lazy as LBS -import qualified Data.ByteString.Unsafe as BSU -import qualified Data.Vector.Storable as DVS -import qualified Data.Vector.Storable.Mutable as DVSM -import qualified HaskellWorks.Data.Bits.Writer.Storable as W -import qualified HaskellWorks.Data.ByteString as BS -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.Blank as J -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.BlankedJson as J -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.MakeIndex as J -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.StateMachine as SM -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.ToBalancedParens64 as J -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.Word8 as W8 - -{-# ANN module ("HLint: ignore Reduce duplication" :: String) #-} -{-# ANN module ("HLint: ignore Redundant do" :: String) #-} - -data PreSiChunk v = PreSiChunk - { preSiChunkIb :: !v -- interest bits - , preSiChunkBpOp :: !v -- balanced parens interest bits - , preSiChunkBpCl :: !v -- balanced parens open close - } deriving (Functor, Traversable, Foldable) - -data SiChunk v = SiChunk - { siChunkIb :: !v -- interest bits - , siChunkBp :: !v -- balanced parens open close - } deriving (Functor, Traversable, Foldable) - -data SemiIndex v = SemiIndex - { semiIndexIb :: !v - , semiIndexBp :: !v - } deriving (Functor, Traversable, Foldable) - -semiIndexBuilder :: LBS.ByteString -> SemiIndex B.Builder -semiIndexBuilder lbs = SemiIndex (B.lazyByteString ibs) (B.byteString (BS.toByteString bps)) - where blankedJson = J.blankJson (LBS.toChunks lbs) - ibs = LBS.fromChunks (J.blankedJsonToInterestBits blankedJson) - bps = J.toBalancedParens64 (J.BlankedJson blankedJson) -{-# INLINE semiIndexBuilder #-} - -buildSemiIndex :: BS.ByteString -> SemiIndex (DVS.Vector Word64) -buildSemiIndex bs = DVS.createT $ do - let len = (BS.length bs + 7) `div` 8 - mib <- W.newWriter len - mbp <- W.newWriter (len * 2) - buildFromByteString mib mbp bs 0 InJson -{-# INLINE buildSemiIndex #-} - -buildFromByteString :: W.Writer s -> W.Writer s -> BS.ByteString -> Int -> State -> ST s (SemiIndex (DVS.MVector s Word64)) -buildFromByteString ib bp bs i = go - where go state = if i < BS.length bs - then do - let c = BSU.unsafeIndex bs i - case state of - InJson -> if - | c == W8.openBracket || c == W8.openBrace -> do - W.unsafeWriteBit ib 1 - W.unsafeWriteBit bp 1 - buildFromByteString ib bp bs (i + 1) InJson - | c == W8.closeBracket || c == W8.closeBrace -> do - W.unsafeWriteBit bp 0 - W.unsafeWriteBit ib 0 - buildFromByteString ib bp bs (i + 1) InJson - | c == W8.comma || c == W8.colon -> do - W.unsafeWriteBit ib 0 - buildFromByteString ib bp bs (i + 1) InJson - | W8.isAlphabetic c || W8.isDigit c || W8.isPeriod c || W8.isMinus c || W8.isPlus c -> do - W.unsafeWriteBit ib 1 - W.unsafeWriteBit bp 1 - W.unsafeWriteBit bp 0 - buildFromByteString ib bp bs (i + 1) InValue - | c == W8.doubleQuote -> do - W.unsafeWriteBit ib 1 - W.unsafeWriteBit bp 1 - W.unsafeWriteBit bp 0 - buildFromByteString ib bp bs (i + 1) InString - | otherwise -> do - W.unsafeWriteBit ib 0 - buildFromByteString ib bp bs (i + 1) InJson - InString -> do - W.unsafeWriteBit ib 0 - let newContext = if - | c == W8.doubleQuote -> InJson - | c == W8.backSlash -> InEscape - | otherwise -> InString - buildFromByteString ib bp bs (i + 1) newContext - InEscape -> do - W.unsafeWriteBit ib 0 - buildFromByteString ib bp bs (i + 1) InString - InValue -> if - | W8.isAlphabetic c || W8.isDigit c || W8.isPeriod c || W8.isMinus c || W8.isPlus c -> do - W.unsafeWriteBit ib 0 - buildFromByteString ib bp bs (i + 1) InValue - | otherwise -> go InJson - else do - ibv <- W.written ib - bpv <- W.written bp - return (SemiIndex ibv bpv) -{-# INLINE buildFromByteString #-} - -constructSI :: forall a s. Storable a => Int -> (Int -> s -> (s, a)) -> s -> (s, DVS.Vector a) -constructSI n f state = DVS.createT $ do - mv <- DVSM.unsafeNew n - state' <- go 0 state mv - return (state', mv) - where go :: Int -> s -> DVSM.MVector t a -> ST t s - go i s mv = if i < DVSM.length mv - then do - let (s', a) = f i s - DVSM.unsafeWrite mv i a - go (i + 1) s' mv - else return s -{-# INLINE constructSI #-} - -buildFromByteString2 :: [BS.ByteString] -> [DVS.Vector Word64] -buildFromByteString2 = go (IntState (fromEnum InJson)) - where go :: IntState -> [BS.ByteString] -> [DVS.Vector Word64] - go s (bs:bss) = v:go s' bss - where (s', v) = constructSI (BS.length bs `div` 16) f s -- TODO adjust length - f :: Int -> IntState -> (IntState, Word64) - f i s'' = let j = i * 16 in transition16 s'' - (BSU.unsafeIndex bs j ) - (BSU.unsafeIndex bs (j + 1)) - (BSU.unsafeIndex bs (j + 2)) - (BSU.unsafeIndex bs (j + 3)) - (BSU.unsafeIndex bs (j + 4)) - (BSU.unsafeIndex bs (j + 5)) - (BSU.unsafeIndex bs (j + 6)) - (BSU.unsafeIndex bs (j + 7)) - (BSU.unsafeIndex bs (j + 8)) - (BSU.unsafeIndex bs (j + 9)) - (BSU.unsafeIndex bs (j + 10)) - (BSU.unsafeIndex bs (j + 11)) - (BSU.unsafeIndex bs (j + 12)) - (BSU.unsafeIndex bs (j + 13)) - (BSU.unsafeIndex bs (j + 14)) - (BSU.unsafeIndex bs (j + 15)) - go _ [] = [] -{-# INLINE buildFromByteString2 #-} - -transition16 :: IntState - -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 - -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 - -> (IntState, Word64) -transition16 s a b c d e f g h i j k l m n o p = (sp, w) - where (sa, wa) = transition1 s a - (sb, wb) = transition1 sa b - (sc, wc) = transition1 sb c - (sd, wd) = transition1 sc d - (se, we) = transition1 sd e - (sf, wf) = transition1 se f - (sg, wg) = transition1 sf g - (sh, wh) = transition1 sg h - (si, wi) = transition1 sh i - (sj, wj) = transition1 si j - (sk, wk) = transition1 sj k - (sl, wl) = transition1 sk l - (sm, wm) = transition1 sl m - (sn, wn) = transition1 sm n - (so, wo) = transition1 sn o - (sp, wp) = transition1 so p - w = (fromIntegral wa ) .|. - (fromIntegral wb .<. 4) .|. - (fromIntegral wc .<. 8) .|. - (fromIntegral wd .<. 12) .|. - (fromIntegral we .<. 16) .|. - (fromIntegral wf .<. 20) .|. - (fromIntegral wg .<. 24) .|. - (fromIntegral wh .<. 28) .|. - (fromIntegral wi .<. 32) .|. - (fromIntegral wj .<. 36) .|. - (fromIntegral wk .<. 40) .|. - (fromIntegral wl .<. 44) .|. - (fromIntegral wm .<. 48) .|. - (fromIntegral wn .<. 52) .|. - (fromIntegral wo .<. 56) .|. - (fromIntegral wp .<. 60) -{-# INLINE transition16 #-} - -transition1 :: IntState -> Word8 -> (IntState, Word64) -transition1 s a = (s', w) - where s' = SM.lookupTransitionTable s (fromIntegral a) - w = fromIntegral $ SM.lookupPhiTable s (fromIntegral a) -{-# INLINE transition1 #-} - -buildFromByteString3 :: [BS.ByteString] -> [PreSiChunk (DVS.Vector Word64)] -buildFromByteString3 bss = makePreSiChunk <$> buildFromByteString2 bss - where makePreSiChunk :: DVS.Vector Word64 -> PreSiChunk (DVS.Vector Word64) - makePreSiChunk v = PreSiChunk (makeIb v) (makeBpOp v) (makeBpCl v) - makeIb :: DVS.Vector Word64 -> DVS.Vector Word64 - makeIb v = asVector64 $ BS.toByteString $ DVS.constructN (DVS.length v) go - where go :: DVS.Vector Word16 -> Word16 - go u = let ui = DVS.length u in fromIntegral (pext (DVS.unsafeIndex v ui) 0x4444444444444444) - makeBpOp :: DVS.Vector Word64 -> DVS.Vector Word64 - makeBpOp v = asVector64 $ BS.toByteString $ DVS.constructN (DVS.length v) go - where go :: DVS.Vector Word16 -> Word16 - go u = let ui = DVS.length u in fromIntegral (pext (DVS.unsafeIndex v ui) 0x2222222222222222) - makeBpCl v = asVector64 $ BS.toByteString $ DVS.constructN (DVS.length v) go - where go :: DVS.Vector Word16 -> Word16 - go u = let ui = DVS.length u in fromIntegral (pext (DVS.unsafeIndex v ui) 0x1111111111111111) - -toIbBpBuilders :: [PreSiChunk (DVS.Vector Word64)] -> [SiChunk (DVS.Vector Word64)] -toIbBpBuilders = go 0 0 - where go :: Word64 -> Word64 -> [PreSiChunk (DVS.Vector Word64)] -> [SiChunk (DVS.Vector Word64)] - go b n (PreSiChunk ib bpOp bpCl:cs) = SiChunk ib bp: go b' n' cs - where ((b', n'), bp) = mkBp b n (DVS.unsafeCast bpOp) (DVS.unsafeCast bpCl) - go b _ [] = [SiChunk DVS.empty (DVS.singleton b)] - mkBp :: Word64 -> Word64 -> DVS.Vector Word32 -> DVS.Vector Word32 -> ((Word64, Word64), DVS.Vector Word64) - mkBp b n bpOp bpCl = DVS.createT $ do - mv <- DVSM.unsafeNew (DVS.length bpOp) - mkBpGo b n bpOp bpCl mv 0 0 - mkBpGo :: Word64 -> Word64 -> DVS.Vector Word32 -> DVS.Vector Word32 -> DVS.MVector s Word64 -> Int -> Int -> ST s ((Word64, Word64), DVS.MVector s Word64) - mkBpGo b n bpOp bpCl mv vi mvi = if vi < DVS.length bpOp - then do - let op = DVS.unsafeIndex bpOp vi - let cl = DVS.unsafeIndex bpCl vi - let (slo, mlo) = compress op cl -- slo: source lo, mlo: mask lo - let wb = pext slo mlo - let wn = popCount1 mlo - let tb = (wb .<. n) .|. b - let tn = n + wn - if tn < 64 - then do - mkBpGo tb tn bpOp bpCl mv (vi + 1) mvi - else do - DVSM.unsafeWrite mv mvi tb - let ub = wb .>. (64 - n) - let un = tn - 64 - mkBpGo ub un bpOp bpCl mv (vi + 1) (mvi + 1) - else return ((b, n), DVSM.take mvi mv) - compress :: Word32 -> Word32 -> (Word64, Word64) - compress op cl = (sw, mw) - where iop = pdep (fromIntegral op :: Word64) 0x5555555555555555 -- interleaved open - icl = pdep (fromIntegral cl :: Word64) 0xaaaaaaaaaaaaaaaa -- interleaved close - ioc = iop .|. icl -- interleaved open/close - sw = pext iop ioc - pc = popCount1 ioc - mw = (1 .<. pc) - 1 diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Slow.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Slow.hs deleted file mode 100644 index 86c6b3c..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Slow.hs +++ /dev/null @@ -1,43 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Slow - ( Cursor - , fromByteString - , fromForeignRegion - , fromString - ) where - -import Data.Word -import Foreign.ForeignPtr -import HaskellWorks.Data.Json.Standard.Cursor.Generic -import HaskellWorks.Data.Json.Standard.Cursor.Specific - -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as BSC -import qualified Data.ByteString.Internal as BSI -import qualified Data.Vector.Storable as DVS -import qualified HaskellWorks.Data.BalancedParens as BP -import qualified HaskellWorks.Data.FromForeignRegion as F -import qualified HaskellWorks.Data.Json.Standard.Cursor.Internal.IbBp as J - -data Slow - -instance SpecificCursor Slow where - type CursorOf Slow = Cursor - -type Cursor = GenericCursor BS.ByteString (DVS.Vector Word64) (BP.SimpleBalancedParens (DVS.Vector Word64)) - -fromByteString :: BS.ByteString -> Cursor -fromByteString bs = GenericCursor - { cursorText = bs - , interests = ib - , balancedParens = BP.SimpleBalancedParens bp - , cursorRank = 1 - } - where J.IbBp ib bp = J.toIbBp bs - -fromForeignRegion :: F.ForeignRegion -> Cursor -fromForeignRegion (fptr, offset, size) = fromByteString (BSI.fromForeignPtr (castForeignPtr fptr) offset size) - -fromString :: String -> Cursor -fromString = fromByteString . BSC.pack diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Specific.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Specific.hs deleted file mode 100644 index b696dbd..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Specific.hs +++ /dev/null @@ -1,11 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Specific - ( SpecificCursor(..) - , jsonCursorPos - ) where - -import HaskellWorks.Data.Json.Standard.Cursor.Generic - -class SpecificCursor w where - type CursorOf w diff --git a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Type.hs b/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Type.hs deleted file mode 100644 index 16a2caa..0000000 --- a/hw-json-standard-cursor/HaskellWorks/Data/Json/Standard/Cursor/Type.hs +++ /dev/null @@ -1,71 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE MultiParamTypeClasses #-} - -module HaskellWorks.Data.Json.Standard.Cursor.Type - ( JsonType(..) - , JsonTypeAt(..) - ) where - -import Data.Char -import Data.String -import Data.Word8 -import HaskellWorks.Data.Bits.BitWise -import HaskellWorks.Data.Drop -import HaskellWorks.Data.Json.Standard.Cursor.Generic -import HaskellWorks.Data.Json.Standard.Cursor.Internal.Word8 -import HaskellWorks.Data.Positioning -import HaskellWorks.Data.RankSelect.Base.Rank0 -import HaskellWorks.Data.RankSelect.Base.Rank1 -import HaskellWorks.Data.RankSelect.Base.Select1 -import Prelude hiding (drop) - -import qualified Data.ByteString as BS -import qualified HaskellWorks.Data.BalancedParens as BP - -{-# ANN module ("HLint: Reduce duplication" :: String) #-} - -data JsonType - = JsonTypeArray - | JsonTypeBool - | JsonTypeNull - | JsonTypeNumber - | JsonTypeObject - | JsonTypeString - deriving (Eq, Show) - -class JsonTypeAt a where - jsonTypeAtPosition :: Position -> a -> Maybe JsonType - jsonTypeAt :: a -> Maybe JsonType - -instance (BP.BalancedParens w, Rank0 w, Rank1 w, Select1 v, TestBit w) => JsonTypeAt (GenericCursor String v w) where - jsonTypeAtPosition p k = case drop (toCount p) (cursorText k) of - c:_ | fromIntegral (ord c) == _bracketleft -> Just JsonTypeArray - c:_ | fromIntegral (ord c) == _f -> Just JsonTypeBool - c:_ | fromIntegral (ord c) == _t -> Just JsonTypeBool - c:_ | fromIntegral (ord c) == _n -> Just JsonTypeNull - c:_ | wIsJsonNumberDigit (fromIntegral (ord c)) -> Just JsonTypeNumber - c:_ | fromIntegral (ord c) == _braceleft -> Just JsonTypeObject - c:_ | fromIntegral (ord c) == _quotedbl -> Just JsonTypeString - _ -> Nothing - - jsonTypeAt k = jsonTypeAtPosition p k - where p = lastPositionOf (select1 ik (rank1 bpk (cursorRank k))) - ik = interests k - bpk = balancedParens k - -instance (BP.BalancedParens w, Rank0 w, Rank1 w, Select1 v, TestBit w) => JsonTypeAt (GenericCursor BS.ByteString v w) where - jsonTypeAtPosition p k = case BS.uncons (drop (toCount p) (cursorText k)) of - Just (c, _) | c == _bracketleft -> Just JsonTypeArray - Just (c, _) | c == _f -> Just JsonTypeBool - Just (c, _) | c == _t -> Just JsonTypeBool - Just (c, _) | c == _n -> Just JsonTypeNull - Just (c, _) | wIsJsonNumberDigit c -> Just JsonTypeNumber - Just (c, _) | c == _braceleft -> Just JsonTypeObject - Just (c, _) | c == _quotedbl -> Just JsonTypeString - _ -> Nothing - - jsonTypeAt k = jsonTypeAtPosition p k - where p = lastPositionOf (select1 ik (rank1 bpk (cursorRank k))) - ik = interests k - bpk = balancedParens k diff --git a/hw-json.cabal b/hw-json.cabal index cee0e8e..51912b1 100644 --- a/hw-json.cabal +++ b/hw-json.cabal @@ -37,36 +37,37 @@ flag sse42 manual: False default: False -common base { build-depends: base >= 4 && < 5 } +common base { build-depends: base >= 4 && < 5 } -common ansi-wl-pprint { build-depends: ansi-wl-pprint >= 0.6.8.2 && < 0.7 } -common array { build-depends: array >= 0.5 && < 0.6 } -common attoparsec { build-depends: attoparsec >= 0.13 && < 0.14 } -common bits-extra { build-depends: bits-extra >= 0.0.1.3 && < 0.1 } -common bytestring { build-depends: bytestring >= 0.10.6 && < 0.11 } -common criterion { build-depends: criterion >= 1.4 && < 1.6 } -common directory { build-depends: directory >= 1.3 && < 1.4 } -common dlist { build-depends: dlist >= 0.8 && < 0.9 } -common generic-lens { build-depends: generic-lens >= 1.1.0.0 && < 1.2 } -common hedgehog { build-depends: hedgehog >= 0.6 && < 1.1 } -common hspec { build-depends: hspec >= 2.4 && < 3 } -common hw-balancedparens { build-depends: hw-balancedparens >= 0.3.0.0 && < 0.4 } -common hw-bits { build-depends: hw-bits >= 0.7.0.5 && < 0.8 } -common hw-hspec-hedgehog { build-depends: hw-hspec-hedgehog >= 0.1.0.4 && < 0.2 } -common hw-json-simd { build-depends: hw-json-simd >= 0.1.0.2 && < 0.2 } -common hw-mquery { build-depends: hw-mquery >= 0.2.0.0 && < 0.3 } -common hw-parser { build-depends: hw-parser >= 0.1 && < 0.2 } -common hw-prim { build-depends: hw-prim >= 0.6.2.28 && < 0.7 } -common hw-rankselect { build-depends: hw-rankselect >= 0.13 && < 0.14 } -common hw-rankselect-base { build-depends: hw-rankselect-base >= 0.3.2.1 && < 0.4 } -common hw-simd { build-depends: hw-simd >= 0.1.1.2 && < 0.2 } -common lens { build-depends: lens >= 4 && < 5 } -common mmap { build-depends: mmap >= 0.5 && < 0.6 } -common optparse-applicative { build-depends: optparse-applicative >= 0.14 && < 0.15 } -common text { build-depends: text >= 1.2 && < 1.3 } -common transformers { build-depends: transformers >= 0.4 && < 0.6 } -common vector { build-depends: vector >= 0.12 && < 0.13 } -common word8 { build-depends: word8 >= 0.1 && < 0.2 } +common ansi-wl-pprint { build-depends: ansi-wl-pprint >= 0.6.8.2 && < 0.7 } +common array { build-depends: array >= 0.5 && < 0.6 } +common attoparsec { build-depends: attoparsec >= 0.13 && < 0.14 } +common bits-extra { build-depends: bits-extra >= 0.0.1.3 && < 0.1 } +common bytestring { build-depends: bytestring >= 0.10.6 && < 0.11 } +common criterion { build-depends: criterion >= 1.4 && < 1.6 } +common directory { build-depends: directory >= 1.3 && < 1.4 } +common dlist { build-depends: dlist >= 0.8 && < 0.9 } +common generic-lens { build-depends: generic-lens >= 1.1.0.0 && < 1.2 } +common hedgehog { build-depends: hedgehog >= 0.6 && < 1.1 } +common hspec { build-depends: hspec >= 2.4 && < 3 } +common hw-balancedparens { build-depends: hw-balancedparens >= 0.3.0.0 && < 0.4 } +common hw-bits { build-depends: hw-bits >= 0.7.0.5 && < 0.8 } +common hw-hspec-hedgehog { build-depends: hw-hspec-hedgehog >= 0.1.0.4 && < 0.2 } +common hw-json-simd { build-depends: hw-json-simd >= 0.1.0.2 && < 0.2 } +common hw-json-standard-cursor { build-depends: hw-json-standard-cursor >= 0.1.0.0 && < 0.2 } +common hw-mquery { build-depends: hw-mquery >= 0.2.0.0 && < 0.3 } +common hw-parser { build-depends: hw-parser >= 0.1 && < 0.2 } +common hw-prim { build-depends: hw-prim >= 0.6.2.28 && < 0.7 } +common hw-rankselect { build-depends: hw-rankselect >= 0.13 && < 0.14 } +common hw-rankselect-base { build-depends: hw-rankselect-base >= 0.3.2.1 && < 0.4 } +common hw-simd { build-depends: hw-simd >= 0.1.1.2 && < 0.2 } +common lens { build-depends: lens >= 4 && < 5 } +common mmap { build-depends: mmap >= 0.5 && < 0.6 } +common optparse-applicative { build-depends: optparse-applicative >= 0.14 && < 0.15 } +common text { build-depends: text >= 1.2 && < 1.3 } +common transformers { build-depends: transformers >= 0.4 && < 0.6 } +common vector { build-depends: vector >= 0.12 && < 0.13 } +common word8 { build-depends: word8 >= 0.1 && < 0.2 } common semigroups { if impl(ghc < 8 ) { build-depends: semigroups >= 0.16 && < 0.19 } } @@ -97,39 +98,6 @@ library hw-json-simple-cursor HaskellWorks.Data.Json.Simple.Cursor.Internal.ToIbBp HaskellWorks.Data.Json.Simple.Cursor.Internal.Word8 -library hw-json-standard-cursor - import: base, config - , array - , bits-extra - , bytestring - , hw-balancedparens - , hw-bits - , hw-prim - , hw-rankselect - , hw-rankselect-base - , mmap - , vector - , word8 - hs-source-dirs: hw-json-standard-cursor - exposed-modules: HaskellWorks.Data.Json.Standard.Cursor - HaskellWorks.Data.Json.Standard.Cursor.Fast - HaskellWorks.Data.Json.Standard.Cursor.Generic - HaskellWorks.Data.Json.Standard.Cursor.Index - HaskellWorks.Data.Json.Standard.Cursor.Internal.Blank - HaskellWorks.Data.Json.Standard.Cursor.Internal.BlankedJson - HaskellWorks.Data.Json.Standard.Cursor.Internal.IbBp - HaskellWorks.Data.Json.Standard.Cursor.Internal.MakeIndex - HaskellWorks.Data.Json.Standard.Cursor.Internal.StateMachine - HaskellWorks.Data.Json.Standard.Cursor.Internal.ToBalancedParens64 - HaskellWorks.Data.Json.Standard.Cursor.Internal.ToInterestBits64 - HaskellWorks.Data.Json.Standard.Cursor.Internal.Word8 - HaskellWorks.Data.Json.Standard.Cursor.Load.Cursor - HaskellWorks.Data.Json.Standard.Cursor.Load.Raw - HaskellWorks.Data.Json.Standard.Cursor.SemiIndex - HaskellWorks.Data.Json.Standard.Cursor.Slow - HaskellWorks.Data.Json.Standard.Cursor.Specific - HaskellWorks.Data.Json.Standard.Cursor.Type - library import: base, config , ansi-wl-pprint @@ -139,6 +107,7 @@ library , dlist , hw-balancedparens , hw-bits + , hw-json-standard-cursor , hw-mquery , hw-parser , hw-prim @@ -154,7 +123,6 @@ library other-modules: Paths_hw_json autogen-modules: Paths_hw_json build-depends: hw-json-simple-cursor - , hw-json-standard-cursor exposed-modules: HaskellWorks.Data.Json.DecodeError HaskellWorks.Data.Json.FromValue