Skip to content

Commit

Permalink
feat: Implement encoding in rules file for -f *.rules
Browse files Browse the repository at this point in the history
We use `text-icu` as [recommended by `text`](https://hackage.haskell.org/package/text-2.1.2/docs/Data-Text-Encoding.html):

> To gain access to a much larger family of encodings, use the `text-icu` package.
  • Loading branch information
jokesper committed Jan 21, 2025
1 parent 0635fb9 commit 5966c8a
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 6 deletions.
12 changes: 11 additions & 1 deletion hledger-lib/Hledger/Read/RulesReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ where
--- ** imports
import Prelude hiding (Applicative(..))
import Control.Applicative (Applicative(..))
import qualified Control.Exception as C
import Control.Monad (unless, when, void)
import Control.Monad.Except (ExceptT(..), liftEither, throwError)
import qualified Control.Monad.Fail as Fail
Expand All @@ -65,6 +66,8 @@ import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import Data.Text.ICU.Error (ICUError)
import qualified Data.Text.ICU.Convert as UCNV
import Data.Time ( Day, TimeZone, UTCTime, LocalTime, ZonedTime(ZonedTime),
defaultTimeLocale, getCurrentTimeZone, localDay, parseTimeM, utcToLocalTime, localTimeToUTC, zonedTimeToUTC)
import Safe (atMay, headMay, lastMay, readMay)
Expand Down Expand Up @@ -132,14 +135,20 @@ parse iopts f _ = do
Nothing -> return [maybe err (dbg4 "inferred source") $ dataFileFor f] -- shouldn't fail, f has .rules extension
where err = error' $ "could not infer a data file for " <> f
return $ dbg4 "data file" $ headMay fs
mconverter <- do
case T.unpack <$> getDirective "encoding" rules of
Just enc -> Just <$> do
let ioconverter = UCNV.open (dbg4 "encoding" enc) (Just False)
ExceptT $ (Right . dbg4 "converter" <$> ioconverter) `C.catch` (\(_::ICUError) -> return $ Left "could not open ICU converter")
Nothing -> return Nothing
case mdatafile of
Nothing -> return nulljournal -- data file specified by source rule was not found
Just dat -> do
exists <- liftIO $ doesFileExist dat
if not (dat=="-" || exists)
then return nulljournal -- data file inferred from rules file name was not found
else do
t <- liftIO $ readFileOrStdinPortably dat
t <- liftIO $ readFileOrStdinPortably' mconverter dat
readJournalFromCsv (Just $ Left rules) dat t Nothing
-- apply any command line account aliases. Can fail with a bad replacement pattern.
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
Expand Down Expand Up @@ -500,6 +509,7 @@ directivep = (do
directives :: [Text]
directives =
["source"
,"encoding"
,"date-format"
,"decimal-mark"
,"separator"
Expand Down
26 changes: 21 additions & 5 deletions hledger-lib/Hledger/Utils/IO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ terminals, pager output, ANSI colour/styles, etc.
-}

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE ScopedTypeVariables #-}

Expand Down Expand Up @@ -32,9 +33,11 @@ module Hledger.Utils.IO (
expandGlob,
sortByModTime,
readFileOrStdinPortably,
readFileOrStdinPortably',
readFileStrictly,
readFilePortably,
readHandlePortably,
readHandlePortably',
-- hereFileRelative,

-- * Command line parsing
Expand Down Expand Up @@ -107,6 +110,7 @@ where
import Control.Concurrent (forkIO)
import Control.Exception (catch, evaluate, throwIO)
import Control.Monad (when, forM, guard, void)
import qualified Data.ByteString as B
import Data.Char (toLower)
import Data.Colour.RGBSpace (RGB(RGB))
import Data.Colour.RGBSpace.HSL (lightness)
Expand All @@ -118,6 +122,7 @@ import Data.Maybe (isJust, catMaybes)
import Data.Ord (comparing, Down (Down))
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.ICU.Convert as UCNV
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import Data.Time.Clock (getCurrentTime)
Expand Down Expand Up @@ -280,19 +285,30 @@ readFilePortably f = openFile f ReadMode >>= readHandlePortably

-- | Like readFilePortably, but read from standard input if the path is "-".
readFileOrStdinPortably :: String -> IO T.Text
readFileOrStdinPortably f = openFileOrStdin f ReadMode >>= readHandlePortably
readFileOrStdinPortably = readFileOrStdinPortably' Nothing

-- | Like readFileOrStdinPortably, but take an optional converter.
readFileOrStdinPortably' :: Maybe UCNV.Converter -> String -> IO T.Text
readFileOrStdinPortably' c f = openFileOrStdin f >>= readHandlePortably' c
where
openFileOrStdin :: String -> IOMode -> IO Handle
openFileOrStdin "-" _ = return stdin
openFileOrStdin f' m = openFile f' m
openFileOrStdin :: String -> IO Handle
openFileOrStdin "-" = return stdin
openFileOrStdin f' = openFile f' ReadMode

readHandlePortably :: Handle -> IO T.Text
readHandlePortably h = do
readHandlePortably = readHandlePortably' Nothing

readHandlePortably' :: Maybe UCNV.Converter -> Handle -> IO T.Text
readHandlePortably' Nothing h = do
hSetNewlineMode h universalNewlineMode
menc <- hGetEncoding h
when (fmap show menc == Just "UTF-8") $ -- XXX no Eq instance, rely on Show
hSetEncoding h utf8_bom
T.hGetContents h
readHandlePortably' (Just c) h = do
-- We need to manually apply the newline mode
-- Since we already have a Text
T.replace "\r\n" "\n". UCNV.toUnicode c <$> B.hGetContents h

-- | Like embedFile, but takes a path relative to the package directory.
embedFileRelative :: FilePath -> Q Exp
Expand Down
1 change: 1 addition & 0 deletions hledger-lib/hledger-lib.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ library
, template-haskell
, terminal-size >=0.3.3
, text >=1.2.4.1
, text-icu >=0.8.0.5
, time >=1.5
, timeit
, transformers >=0.2
Expand Down
1 change: 1 addition & 0 deletions hledger-lib/package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ dependencies:
- template-haskell
- terminal-size >=0.3.3
- text >=1.2.4.1
- text-icu >=0.8.0.5
- time >=1.5
- timeit
- transformers >=0.2
Expand Down

0 comments on commit 5966c8a

Please sign in to comment.