From b4384b848bec9d241bd0cdc97071fecd3b2ddb85 Mon Sep 17 00:00:00 2001 From: Vladislav Zavialov Date: Wed, 1 Jan 2025 15:21:06 +0300 Subject: [PATCH] [#311] Fix a "resource exhausted" crash Problem: xrefcheck was crashing on repositories containing a large number of Markdown files, reporting the following internal error: openBinaryFile: resource exhausted (Too many open files) Solution: use strict file IO (BS.readFile instead of BS.L.readFile) to ensure that file handles are released in a timely fashion. --- CHANGES.md | 4 ++++ src/Xrefcheck/Scanners/Markdown.hs | 10 ++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ce10154e..3c6fd7ee 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,10 @@ Unreleased ========== +* [#311](https://github.com/serokell/xrefcheck/issues/311) + + Fix a "resource exhausted" crash on repositories containing a large number + of Markdown files. + 0.3.0 ========== diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index a6872315..6264e42c 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -23,11 +23,10 @@ import CMarkGFM import Control.Lens (_Just, makeLenses, makeLensesFor, use, (.=)) import Control.Monad.Trans.Writer.CPS (Writer, runWriter, tell) import Data.Aeson (FromJSON (..), genericParseJSON) -import Data.ByteString.Lazy qualified as BSL +import Data.ByteString qualified as BS import Data.DList qualified as DList import Data.Reflection (Given) import Data.Text qualified as T -import Data.Text.Lazy qualified as LT import Fmt (Buildable (..), nameF) import Text.HTML.TagSoup import Text.Interpolation.Nyan @@ -409,18 +408,17 @@ textToMode ("ignore" : [x]) | otherwise = InvalidMode x textToMode _ = NotAnAnnotation -parseFileInfo :: MarkdownConfig -> String -> LT.Text -> (FileInfo, [ScanError 'Parse]) +parseFileInfo :: MarkdownConfig -> String -> T.Text -> (FileInfo, [ScanError 'Parse]) parseFileInfo config pathForPrinting input = runWriter $ flip runReaderT (ExtractorCtx config pathForPrinting) $ nodeExtractInfo - $ commonmarkToNode [optFootnotes] [extAutolink] - $ toStrict input + $ commonmarkToNode [optFootnotes] [extAutolink] input markdownScanner :: Given PrintUnixPaths => MarkdownConfig -> ScanAction markdownScanner config root relativePath = parseFileInfo config pathForPrinting . decodeUtf8 - <$> BSL.readFile rootedPath + <$> BS.readFile rootedPath where rootedPath = filePathFromRoot root relativePath pathForPrinting = mkPathForPrinting rootedPath