From 408fd075833d55a8fc4541be536744a29c0d6c1e Mon Sep 17 00:00:00 2001 From: Sergey Kurgak Date: Wed, 5 Jun 2024 13:10:55 +0400 Subject: [PATCH 1/2] Change parseSharedStrings function --- src/Codec/Xlsx/Parser/Stream.hs | 9 ++-- test/StreamTests.hs | 92 +++++++++++++++++++++++++++++++++ xlsx.cabal | 1 + 3 files changed, 98 insertions(+), 4 deletions(-) diff --git a/src/Codec/Xlsx/Parser/Stream.hs b/src/Codec/Xlsx/Parser/Stream.hs index 58e89fe..0e9a25a 100644 --- a/src/Codec/Xlsx/Parser/Stream.hs +++ b/src/Codec/Xlsx/Parser/Stream.hs @@ -41,6 +41,7 @@ module Codec.Xlsx.Parser.Stream , WorkbookInfo(..) , SheetInfo(..) , wiSheets + , getOrParseSharedStringss , getWorkbookInfo , CellRow , readSheet @@ -256,10 +257,10 @@ parseSharedStrings ) => HexpatEvent -> m (Maybe Text) parseSharedStrings = \case - StartElement "t" _ -> Nothing <$ (ss_string .= mempty) - EndElement "t" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string - CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt) - _ -> pure Nothing + StartElement "si" _ -> Nothing <$ (ss_string .= mempty) + EndElement "si" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string + CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt) + _ -> pure Nothing -- | Run a series of actions on an Xlsx file runXlsxM :: MonadIO m => FilePath -> XlsxM a -> m a diff --git a/test/StreamTests.hs b/test/StreamTests.hs index aad1268..9ce01be 100644 --- a/test/StreamTests.hs +++ b/test/StreamTests.hs @@ -21,6 +21,7 @@ tests = testGroup #else import Control.Exception +import Codec.Archive.Zip as Zip import Codec.Xlsx import Codec.Xlsx.Parser.Stream import Conduit ((.|)) @@ -31,10 +32,12 @@ import Data.Set.Lens import qualified Data.ByteString.Lazy as LB import qualified Data.ByteString as BS import Data.Map (Map) +import qualified Data.Conduit.Combinators as C import qualified Data.Map as M import qualified Data.IntMap.Strict as IM import Data.Text (Text) import qualified Data.Text as Text +import qualified Data.Vector as V import Diff import Test.Tasty (TestTree, testGroup) import Test.Tasty.HUnit (testCase) @@ -66,6 +69,11 @@ tests = , testProperty "Set of input texts is as value set length" sharedStringInputTextsIsSameAsValueSetLength ], + testGroup "Reader/shared strings" + [ testCase "Can parse RichText values" richCellTextIsParsed + ], + + testGroup "Reader/Writer" [ testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbook , testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbookRow @@ -234,4 +242,88 @@ untypedCellsAreParsedAsFloats = do ] expected @==? (_ri_cell_row . _si_row <$> items) + +richCellTextIsParsed :: IO () +richCellTextIsParsed = do + BS.writeFile "testinput.xlsx" (toBs richWorkbook) + runXlsxM "testinput.xlsx" $ do + sharedStrings <- getOrParseSharedStringss + let result = Set.fromList $ V.toList sharedStrings + liftIO $ expected @==? result + + where + expected :: Set.Set Text + expected = Set.fromList + [ textA1 + , firstClauseB1 <> secondClauseB1 + , firstClauseB2 <> secondClauseB2 + ] + + textA1 = "Text at A1" + firstClauseB1 = "First clause at B1;" + firstClauseB2 = "First clause at B2;" + secondClauseB1 = "Second clause at B1" + secondClauseB2 = "Second clause at B2" + + richWorkbook :: Xlsx + richWorkbook = def & atSheet "Sheet1" ?~ toWs + [ ((RowIndex 1, ColumnIndex 1), cellValue ?~ CellText textA1 $ def) + , ((RowIndex 2, ColumnIndex 1), cellValue ?~ cellRich firstClauseB1 secondClauseB1 $ def) + , ((RowIndex 2, ColumnIndex 2), cellValue ?~ cellRich firstClauseB2 secondClauseB2 $ def) + ] + +cellRich :: Text -> Text -> CellValue +cellRich firstClause secondClause = CellRich + [ RichTextRun + { _richTextRunProperties = Just RunProperties + { _runPropertiesBold = Nothing + , _runPropertiesCharset = Just 1 + , _runPropertiesColor = Just Color + { _colorAutomatic = Nothing + , _colorARGB = Nothing + , _colorTheme = Just 1 + , _colorTint = Nothing + } + , _runPropertiesCondense = Nothing + , _runPropertiesExtend = Nothing + , _runPropertiesFontFamily = Just FontFamilySwiss + , _runPropertiesItalic = Nothing + , _runPropertiesOutline = Nothing + , _runPropertiesFont = Just "Aptos Narrow" + , _runPropertiesScheme = Nothing + , _runPropertiesShadow = Nothing + , _runPropertiesStrikeThrough = Nothing + , _runPropertiesSize = Just 11.0 + , _runPropertiesUnderline = Nothing + , _runPropertiesVertAlign = Nothing + } + , _richTextRunText = firstClause + } + , RichTextRun + { _richTextRunProperties = Just RunProperties + { _runPropertiesBold = Just True + , _runPropertiesCharset = Just 1 + , _runPropertiesColor = Just Color + { _colorAutomatic = Nothing + , _colorARGB = Just "FFFF0000" + , _colorTheme = Nothing + , _colorTint = Nothing + } + , _runPropertiesCondense = Nothing + , _runPropertiesExtend = Nothing + , _runPropertiesFontFamily = Just FontFamilySwiss + , _runPropertiesItalic = Nothing + , _runPropertiesOutline = Nothing + , _runPropertiesFont = Just "Arial" + , _runPropertiesScheme = Nothing + , _runPropertiesShadow = Nothing + , _runPropertiesStrikeThrough = Nothing + , _runPropertiesSize = Just 8.0 + , _runPropertiesUnderline = Nothing + , _runPropertiesVertAlign = Nothing + } + , _richTextRunText = secondClause + } + ] + #endif diff --git a/xlsx.cabal b/xlsx.cabal index b256d2f..90b9c90 100644 --- a/xlsx.cabal +++ b/xlsx.cabal @@ -176,6 +176,7 @@ test-suite data-test , conduit , filepath , deepseq + , zip if flag(microlens) Build-depends: microlens >= 0.4 && < 0.5 , microlens-mtl From c489d9d1d6099d770eaabb382c1dea803bac2d1d Mon Sep 17 00:00:00 2001 From: Sergey Kurgak Date: Thu, 6 Jun 2024 09:09:45 +0400 Subject: [PATCH 2/2] Add TODOs about CellRich parsing --- src/Codec/Xlsx/Parser/Stream.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Codec/Xlsx/Parser/Stream.hs b/src/Codec/Xlsx/Parser/Stream.hs index 0e9a25a..a6a0df5 100644 --- a/src/Codec/Xlsx/Parser/Stream.hs +++ b/src/Codec/Xlsx/Parser/Stream.hs @@ -182,6 +182,8 @@ makeLenses 'MkSheetState -- | State for parsing shared strings data SharedStringsState = MkSharedStringsState { _ss_string :: TB.Builder -- ^ String we are parsing + -- TODO: At the moment SharedStrings can be used only to create CellText values. + -- We should add support for CellRich values. , _ss_list :: DL.DList Text -- ^ list of shared strings } deriving stock (Generic, Show) makeLenses 'MkSharedStringsState @@ -257,6 +259,7 @@ parseSharedStrings ) => HexpatEvent -> m (Maybe Text) parseSharedStrings = \case + -- TODO: Add parsing of text styles to further create CellRich values. StartElement "si" _ -> Nothing <$ (ss_string .= mempty) EndElement "si" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt)