From bc73837f65e410d4c18bdc00c22449d4dd9da29f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Belellou?= <1219617+kevin-belellou@users.noreply.github.com> Date: Thu, 14 Nov 2024 17:47:08 +0100 Subject: [PATCH] Fixes #19 - Multiple column information per line in acq.txt files (#23) --- .../chemstation/ColumnInformationMapper.java | 92 +++++++++++++----- .../ColumnInformationMapperTests.java | 40 ++++---- src/test/resources/V179_2.D/acq.txt | Bin 0 -> 16500 bytes 3 files changed, 87 insertions(+), 45 deletions(-) create mode 100644 src/test/resources/V179_2.D/acq.txt diff --git a/src/main/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapper.java b/src/main/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapper.java index d84b90a..7df4587 100644 --- a/src/main/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapper.java +++ b/src/main/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapper.java @@ -5,75 +5,108 @@ import fr.ifpen.allotropeconverters.gc.schema.ChromatographyColumnLength; import fr.ifpen.allotropeconverters.gc.schema.ColumnInnerDiameter; -import java.io.*; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; import java.util.InputMismatchException; +import java.util.LinkedHashMap; import java.util.Locale; +import java.util.Map; +import java.util.NoSuchElementException; import java.util.Scanner; +import java.util.regex.MatchResult; import java.util.regex.Pattern; import static java.nio.charset.StandardCharsets.UTF_16; public final class ColumnInformationMapper { - private static final Pattern COLUMN_NAME_PATTERN = Pattern.compile(".*:"); + + private static final String COLON_REGEX = "\\s*:\\s*"; + private static final String SEPARATOR_REGEX = "\\s*"; + private static final String TEXT_REGEX = "(\\S+)"; + private static final String NUMBER_REGEX = "([\\d.]+)"; + private static final String COLUMN_SEPARATOR_REGEX = "(?>\\s+|\\n+)"; + + private static final Map COLUMN_NAMES_MAP = new LinkedHashMap<>(); + + static { + COLUMN_NAMES_MAP.put("Model#", true); + COLUMN_NAMES_MAP.put("Manufacturer", true); + COLUMN_NAMES_MAP.put("Diameter", false); + COLUMN_NAMES_MAP.put("Length", false); + COLUMN_NAMES_MAP.put("Film thickness", false); + } + + private static final Pattern COLUMN_PATTERN; + + static { + StringBuilder pattern = new StringBuilder(); + + COLUMN_NAMES_MAP.forEach((columnName, isOnlyText) -> { + pattern.append(columnName).append(COLON_REGEX); + + if (!isOnlyText) { + pattern.append(NUMBER_REGEX).append(SEPARATOR_REGEX); + } + + pattern.append(TEXT_REGEX).append(COLUMN_SEPARATOR_REGEX); + }); + + COLUMN_PATTERN = Pattern.compile(pattern.toString(), Pattern.MULTILINE); + } public ChromatographyColumnDocument readColumnDocumentFromFile(String folderPath) throws IOException { ChromatographyColumnDocument columnDocument = new ChromatographyColumnDocument(); - File file = new File(folderPath,"acq.txt"); + File file = new File(folderPath, "acq.txt"); try (FileInputStream fileInputStream = new FileInputStream(file); - InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, UTF_16); - Scanner acquisitionScanner = new Scanner(inputStreamReader)){ + InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, UTF_16); + Scanner acquisitionScanner = new Scanner(inputStreamReader)) { acquisitionScanner.useLocale(Locale.US); //Agilent files are US formatted. skipToColumnInformation(acquisitionScanner); - acquisitionScanner.skip(COLUMN_NAME_PATTERN); - columnDocument.setChromatographyColumnPartNumber(acquisitionScanner.next()); //Model - acquisitionScanner.nextLine(); + MatchResult columnInformation = acquisitionScanner.findAll(COLUMN_PATTERN).findFirst().orElseThrow(() -> new NoSuchElementException("Incorrect column information")); + + int groupIndex = 1; - acquisitionScanner.skip(COLUMN_NAME_PATTERN); - columnDocument.setProductManufacturer(acquisitionScanner.next()); //Manufacturer - acquisitionScanner.nextLine(); + columnDocument.setChromatographyColumnPartNumber(columnInformation.group(groupIndex++)); //Model + columnDocument.setProductManufacturer(columnInformation.group(groupIndex++)); //Manufacturer ColumnInnerDiameter columnInnerDiameter = new ColumnInnerDiameter(); - acquisitionScanner.skip(COLUMN_NAME_PATTERN); - double value = acquisitionScanner.nextDouble(); - String unit = acquisitionScanner.next(); + double value = Double.parseDouble(columnInformation.group(groupIndex++)); + String unit = columnInformation.group(groupIndex++); - if(unit.equals("µm")){ //Allotrope format forces mm. + if (unit.equals("µm")) { //Allotrope format forces mm. unit = "mm"; value = value / 1000; } - columnInnerDiameter.setValue(value); columnInnerDiameter.setUnit(unit); columnDocument.setColumnInnerDiameter(columnInnerDiameter); - acquisitionScanner.nextLine(); ChromatographyColumnLength chromatographyColumnLength = new ChromatographyColumnLength(); - acquisitionScanner.skip(COLUMN_NAME_PATTERN); - chromatographyColumnLength.setValue(acquisitionScanner.nextDouble()); - chromatographyColumnLength.setUnit(acquisitionScanner.next()); + chromatographyColumnLength.setValue(Double.parseDouble(columnInformation.group(groupIndex++))); + chromatographyColumnLength.setUnit(columnInformation.group(groupIndex++)); columnDocument.setChromatographyColumnLength(chromatographyColumnLength); - acquisitionScanner.nextLine(); ChromatographyColumnFilmThickness columnFilmThickness = new ChromatographyColumnFilmThickness(); - acquisitionScanner.skip(COLUMN_NAME_PATTERN); - columnFilmThickness.setValue(acquisitionScanner.nextDouble()); - columnFilmThickness.setUnit(acquisitionScanner.next()); + columnFilmThickness.setValue(Double.parseDouble(columnInformation.group(groupIndex++))); + columnFilmThickness.setUnit(columnInformation.group(groupIndex++)); columnDocument.setChromatographyColumnFilmThickness(columnFilmThickness); columnDocument.setChromatographyColumnSerialNumber("N/A"); return columnDocument; - } catch (InputMismatchException e){ + } catch (InputMismatchException e) { return new ChromatographyColumnDocument(); } } - private void skipToColumnInformation(Scanner acquisitionScanner){ + private void skipToColumnInformation(Scanner acquisitionScanner) { /* Looking for pattern ===================================================================== Column(s) @@ -81,6 +114,7 @@ private void skipToColumnInformation(Scanner acquisitionScanner){ Column Description : HP-PONA */ + boolean columnSectionFound = false; String line; while ((line = acquisitionScanner.nextLine()) != null) { if (line.contains("======")) { @@ -88,10 +122,16 @@ private void skipToColumnInformation(Scanner acquisitionScanner){ if (line.contains("Column(s)")) { acquisitionScanner.nextLine();// === line acquisitionScanner.nextLine();// empty line + columnSectionFound = true; break; } } } + + if (!columnSectionFound) { + throw new NoSuchElementException("No column information found"); + } + acquisitionScanner.nextLine(); //Column Description - Not in model acquisitionScanner.nextLine(); //Inventory # - Not in model } diff --git a/src/test/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapperTests.java b/src/test/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapperTests.java index e40f614..169eac7 100644 --- a/src/test/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapperTests.java +++ b/src/test/java/fr/ifpen/allotropeconverters/gc/chemstation/ColumnInformationMapperTests.java @@ -1,34 +1,36 @@ package fr.ifpen.allotropeconverters.gc.chemstation; -import fr.ifpen.allotropeconverters.gc.chemstation.ColumnInformationMapper; +import java.io.IOException; + import fr.ifpen.allotropeconverters.gc.schema.ChromatographyColumnDocument; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.io.File; -import java.net.URI; -import java.nio.file.Paths; - class ColumnInformationMapperTests { @Test - void MapperCI() throws Exception { - URI uri; - uri = new File("src/test/resources/V179.D").toURI(); + void mapperCI() throws IOException { + readAndAssertColumnInformation("src/test/resources/V179.D"); + } - ColumnInformationMapper columnInformationMapper = new ColumnInformationMapper(); + @Test + void mapperCI_withMultipleColumnInformationPerLine() throws IOException { + readAndAssertColumnInformation("src/test/resources/V179_2.D"); + } - ChromatographyColumnDocument chromatographyColumnDocument = columnInformationMapper.readColumnDocumentFromFile( - Paths.get(uri).toString()); + private static void readAndAssertColumnInformation(String folderPath) throws IOException { + ColumnInformationMapper columnInformationMapper = new ColumnInformationMapper(); - Assertions.assertEquals("19091S-001", chromatographyColumnDocument.getChromatographyColumnPartNumber()); - Assertions.assertEquals("Agilent",chromatographyColumnDocument.getProductManufacturer()); - Assertions.assertEquals(0.2,chromatographyColumnDocument.getColumnInnerDiameter().getValue()); - Assertions.assertEquals("mm",chromatographyColumnDocument.getColumnInnerDiameter().getUnit()); - Assertions.assertEquals(50,chromatographyColumnDocument.getChromatographyColumnLength().getValue()); - Assertions.assertEquals("m",chromatographyColumnDocument.getChromatographyColumnLength().getUnit()); - Assertions.assertEquals(0.50,chromatographyColumnDocument.getChromatographyColumnFilmThickness().getValue()); - Assertions.assertEquals("µm",chromatographyColumnDocument.getChromatographyColumnFilmThickness().getUnit()); + ChromatographyColumnDocument chromatographyColumnDocument = columnInformationMapper.readColumnDocumentFromFile( + folderPath); + Assertions.assertEquals("19091S-001", chromatographyColumnDocument.getChromatographyColumnPartNumber()); + Assertions.assertEquals("Agilent", chromatographyColumnDocument.getProductManufacturer()); + Assertions.assertEquals(0.2, chromatographyColumnDocument.getColumnInnerDiameter().getValue()); + Assertions.assertEquals("mm", chromatographyColumnDocument.getColumnInnerDiameter().getUnit()); + Assertions.assertEquals(50, chromatographyColumnDocument.getChromatographyColumnLength().getValue()); + Assertions.assertEquals("m", chromatographyColumnDocument.getChromatographyColumnLength().getUnit()); + Assertions.assertEquals(0.50, chromatographyColumnDocument.getChromatographyColumnFilmThickness().getValue()); + Assertions.assertEquals("µm", chromatographyColumnDocument.getChromatographyColumnFilmThickness().getUnit()); } } diff --git a/src/test/resources/V179_2.D/acq.txt b/src/test/resources/V179_2.D/acq.txt new file mode 100644 index 0000000000000000000000000000000000000000..d45558c012c26d60870888cb962d97c20a614a9b GIT binary patch literal 16500 zcmeI3%Wm8@6ov_8SD?E-fPgHT6t(M1?m!lf?Zg3`*vPoa#v8}Exv^*58M|rvRNZu! zXKVX^JYq;`q_M2g*eMzaGtzKK{yc~0#zX3#zt`NJJJBa|58Z(~bg%W4xqJGo>U-Dy z>_#dZ>U-C1yLo>dsidz~l&!jV`gP=bzLyjAG0=azZc(4m_pV!V3;HanjPC_^Q=ePH z2<3CWY)-8fRl2DEc;hU`!fM_22i1+oBZNjgr_E_Cf&%o^83`M3dp}=;_FF6&*PdCx$AaR!{d6{WI`mWMo}k zTRHYMudxS0IS}@tyWSbXvTRP~{>)`$Lo^)9#$>W0-OlHtG-vWGcEvMnXryRcvow$< zn_XkJu6ir{%+Fz$@``1j_kF|7pAv~R#OHqIP9-Z{Q3~Rs^dnMhNVAy5;_O|O$3p+m zgD&YY9-{B#JOptTJY14pPbJ1^n*|SVm2?1Ue=2=D)aRv_RP>xkV_H2#x1;5J zqWSvW_gQ7hrjg2JAIU^HG$bqt{sJnh5zW31Me~8IP7_I5Q_Yd+c66d zFPe*e{UY+?|DGsjj{35s%u-L!e>9q}vqp=Ae!8;hQBvd}|IamsLoY>5q@xaPL6$j1 z>iJ&Hw?p%aJ})FYBOl`u^dnoMz9{4*6zfbc%zdij$t0}S}!bdc}h?z=7cQg{`S)AS+aXK0UVilEDe#bn2d!E`H zFDv5X8t3eDAII#=4~_J6BL9lyyq13K`RBjA>C(B7%ev{zMZRK>|| zA{fiS^`%SXs4VW=^sB8E@o%WOWKWb$OH*i1yg;t5&J#vWu&he^*kr~1D4Yj=t}N0@ zH7>-*NX4d0ngT`WH7tD$jpTP$}32V97p1q<%mMP>F;IKdA|ch^h$PTXG-TQlrDIsGKsLV5ea=uv zJl3CkhrcV&M=t|IT=<@?I#pPSF zvBc@!UnxEkj}HwQW+KDK`I(1{+UcB~->R}{-9}02>>$l|q+K^9>E!du(fCRprS9c1 z_;c~yq%LF~(%#PV%vqOtY4#nDyD#LQq{AWa5cbbbWz*7(-+2(K>k^ON7WR_tX&MEi zsqA2|lfn9@yEUfu%=!Tsj<%O*M+Wxc8bDn~A#q z+mwgZ{4C?lXb&xy7WWu8BmvW}DXZvCS7M-6X%#uAS)ixbobReaFV1L}wk1X9Y}z>G ztCA+$ueFtESP#chpe@ue``$d78P~ERo#-AXQyxl^v0;(>Xdd|Fui9tgsw9Cjcml!$ zkI?t)ymeVsB8F3a$B3;QiI04{ZH;M|pQ0n;IJ+OBv?ke#n3W)Ih^K?VAWH$4Y~74C zP-^=zD{k})!p=bdf5C7Gi@5vM}QWFk%7@eCfO zd^-!J9nV0)LNukOEziJ#Z{+#{-_krI@{NaK7agtVv~Bx~b7Fy}AChDtJLAFOB%6p?Jry1khLpbDY{v+ zkTIGRjTbF&`%#UzyolH#ftAIEB^b{!`rU$R}UX#qN#@;_v=-w7Gi<&3Z+>~JJmaR zof7>@&&OyX*2a#eduf5THh1D$JCXYKb1Yx(A1du#F0!|SJj+=;@z4g_+AsokCqdzp z(rRnKw8VCEc<+X2A)dQU>(C3cFq?gWvrvh3IgQ>`M&7tohe{6K^1qgG4?FAa=cBG= zSjmSs_rh-Zc~_Bj{bxR7vZ0gFHh*LXSXtuj!inmT4_Og+N4ZYKH}L9vPX8%#GM?J0 zl0T$vDStz6b`mv70~o>brH|morH|m|rH|m&rH|nDrH|mwg^wU;W!?SDdY3e{Gv|FK zgB#8;T$6P(d;Vq7$1J4dv{ScLe%3tn_#QgeJiOUKru(Dp+c>AJ^`4J327Y&wcfLUn z`%;IM?Ltr1zUs_Cs1fe0$oj&0qdPmQcII#>IWNE23p)GmmM3QimOYHBi?+6NKA-2~ z{!(tes^+G8;pBjwDYjAL2Z>T`?*deN`<@=Cci{KXiJ0(#u~%zt@hxquFnNE2cg5(* tP7v7p{MJj@8ziR}&&QS5tktI=4BbvH*x18cvB(Q=Jws{z?zp`L|1SxT5hVZs literal 0 HcmV?d00001