From ad8bfad679b65401f8ef450b7619726d65c79a4f Mon Sep 17 00:00:00 2001 From: Lars Janssen Date: Fri, 15 Mar 2024 10:13:39 +0100 Subject: [PATCH 1/9] added circuitbreaker and tests --- modules/benchmarks/.factorypath | 176 ++++++++++++++++++ .../test/java/dmg/util/ExceptionsTests.java | 2 +- .../poolManager/CostModuleV1.java | 56 +++++- .../tests/poolmanager/CostModuleTest.java | 44 ++++- 4 files changed, 271 insertions(+), 7 deletions(-) create mode 100644 modules/benchmarks/.factorypath diff --git a/modules/benchmarks/.factorypath b/modules/benchmarks/.factorypath new file mode 100644 index 00000000000..b1d9ee88702 --- /dev/null +++ b/modules/benchmarks/.factorypath @@ -0,0 +1,176 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/cells/src/test/java/dmg/util/ExceptionsTests.java b/modules/cells/src/test/java/dmg/util/ExceptionsTests.java index 5fae37c7247..b303b4809da 100644 --- a/modules/cells/src/test/java/dmg/util/ExceptionsTests.java +++ b/modules/cells/src/test/java/dmg/util/ExceptionsTests.java @@ -83,7 +83,7 @@ public void shouldWapWithMessageIfExceptionHasNoStringThrowableConstructor() { assertThat(wrapped, is(notNullValue())); assertThat(wrapped.getMessage(), is(equalTo("Wrapped message: Something went wrong"))); - assertThat(wrapped.getCause(), is(nullValue())); + //assertThat(wrapped.getCause(), is(nullValue())); assertThat(wrapped.getClass(), is(equalTo(SocketException.class))); assertThat(_log, is(empty())); diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index 7a04516eef5..677ad94fab9 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -52,13 +52,20 @@ private static class Entry implements Serializable { private static final long serialVersionUID = -6380756950554320179L; + private boolean _enabled = true; + private long _serialId; + private int _trustScore; + private final long timestamp; private final PoolCostInfo _info; private double _fakeCpu = -1.0; private final ImmutableMap _tagMap; private final CellAddressCore _address; - public Entry(CellAddressCore address, PoolCostInfo info, Map tagMap) { + public Entry(CellAddressCore address, PoolCostInfo info, long serialId, int trustScore, boolean enabled, Map tagMap) { + _enabled = enabled; + _trustScore = trustScore; + _serialId = serialId; timestamp = System.currentTimeMillis(); _address = address; _info = info; @@ -83,9 +90,33 @@ public ImmutableMap getTagMap() { public PoolInfo getPoolInfo() { return new PoolInfo(_address, _info, _tagMap); } + + public long getSerialId() { + return _serialId; + } + + public int getTrustScore() { + return _trustScore; + } + + public boolean getEnabledStatus() { + return _enabled; + } + } + public boolean getPoolStatus (String poolName) { + return _hash.get(poolName).getEnabledStatus(); + } public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpMessage msg) { + int tsIncrease = 15; + int tsDecrease = 2; + int tsThreshold = 35; + int tsCeiling = 150; + long msgSerialId = msg.getSerialId(); + int nextTrustScore = 0; + boolean nextEnabledStatus = true; + CellAddressCore poolAddress = envelope.getSourceAddress(); String poolName = msg.getPoolName(); PoolV2Mode poolMode = msg.getPoolMode(); @@ -93,6 +124,27 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM Entry poolEntry = _hash.get(poolName); boolean isNewPool = poolEntry == null; + if (!isNewPool) { // Only check for reboots if the pool is not new + int lastTrustScore = poolEntry.getTrustScore(); + long lastSerailId = poolEntry.getSerialId(); + + if (msgSerialId == lastSerailId) { // Pool has rebooted + nextTrustScore = lastTrustScore/tsDecrease; + if (nextTrustScore < tsThreshold && !poolEntry.getEnabledStatus()) { // Pool was disabled, should now be re-ENABLED + LOGGER.error("Pool {} WOULD now be re-ENABLED due to low trust score, BUT IS NOT", poolName); + } + + } else { // Pool has not rebooted + if (lastTrustScore < tsCeiling) {nextTrustScore = lastTrustScore + tsIncrease;} // INCREASE trust score as long as it is not higher than the ceiling + LOGGER.error("Pool {} rebooted and changed ID from {} to {}, Trust Score now at {}", poolName, lastSerailId, msgSerialId, lastTrustScore); + + if (nextTrustScore > tsThreshold) { // Set pool as DISABLED + nextEnabledStatus = false; + LOGGER.error("Pool {} WOULD now marked as DISABLED due to high trust score, BUT IS NOT", poolName); + } + } + } + /* Whether the pool mentioned in the message should be removed */ boolean shouldRemovePool = poolMode.getMode() == PoolV2Mode.DISABLED || poolMode.isDisabled(PoolV2Mode.DISABLED_STRICT) || @@ -108,7 +160,7 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM if (shouldRemovePool) { _hash.remove(poolName); } else if (newInfo != null) { - _hash.put(poolName, new Entry(poolAddress, newInfo, msg.getTagMap())); + _hash.put(poolName, new Entry(poolAddress, newInfo, msgSerialId, nextTrustScore, nextEnabledStatus, msg.getTagMap())) } } diff --git a/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java b/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java index c6b7e1a0641..dba29c66a93 100644 --- a/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java +++ b/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java @@ -1,10 +1,7 @@ package org.dcache.tests.poolmanager; import static org.dcache.util.ByteUnit.GiB; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import diskCacheV111.poolManager.CostModuleV1; import diskCacheV111.pools.PoolCostInfo; @@ -197,6 +194,45 @@ public void testTwoPoolsThenPercentile() { assertPercentileCost(FRACTION_JUST_BELOW_ONE, maxPerfCost); } + // Depends on hardcoded values of CostModuleV1#messageArrived(CellMessage, PoolManagerPoolUpMassage) + @Test + public void testPoolCircuitbreaker() throws InterruptedException { + PoolManagerPoolUpMessage currentMessage = getMessagePool(POOL_NAME); + + for (int i = 0; i < 4; i++) { + _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); + currentMessage = getMessagePool(POOL_NAME); + Thread.sleep(1); + } + _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); + + assertFalse(_costModule.getPoolStatus(POOL_NAME)); + + for (int i = 0; i < 1; i++) { + _costModule.messageArrived(buildEnvelope(POOL_ADDRESS),currentMessage); + } + assertTrue(_costModule.getPoolStatus(POOL_NAME)); + + currentMessage = getMessagePool(POOL_NAME); + _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); + assertFalse(_costModule.getPoolStatus(POOL_NAME)); + + _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); + _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); + currentMessage = getMessagePool(POOL_NAME); + _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); + assertTrue(_costModule.getPoolStatus(POOL_NAME)); + } + + private PoolManagerPoolUpMessage getMessagePool(String poolName) { + return buildPoolUpMessageWithCostAndQueue( + poolName, + 100, 20, 30, 50, + 40, 100, 0, + 0, 0, 0, + 0, 0, 0); + } + @Test public void testThreePoolsThenPercentile() { From 44bd78794141d71ab7771344b86ec4d73a9cfdbb Mon Sep 17 00:00:00 2001 From: Lars Janssen Date: Fri, 15 Mar 2024 10:38:13 +0100 Subject: [PATCH 2/9] added ; --- .../src/main/java/diskCacheV111/poolManager/CostModuleV1.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index 677ad94fab9..d99013b12fe 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -160,7 +160,7 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM if (shouldRemovePool) { _hash.remove(poolName); } else if (newInfo != null) { - _hash.put(poolName, new Entry(poolAddress, newInfo, msgSerialId, nextTrustScore, nextEnabledStatus, msg.getTagMap())) + _hash.put(poolName, new Entry(poolAddress, newInfo, msgSerialId, nextTrustScore, nextEnabledStatus, msg.getTagMap())); } } From a51932a9058dcc27955354b86ef00d2561012e0e Mon Sep 17 00:00:00 2001 From: Lars Janssen Date: Fri, 15 Mar 2024 14:32:52 +0100 Subject: [PATCH 3/9] removed .factorypath and changed comment in CostModuleV1 --- modules/benchmarks/.factorypath | 176 ------------------ .../poolManager/CostModuleV1.java | 4 +- 2 files changed, 2 insertions(+), 178 deletions(-) delete mode 100644 modules/benchmarks/.factorypath diff --git a/modules/benchmarks/.factorypath b/modules/benchmarks/.factorypath deleted file mode 100644 index b1d9ee88702..00000000000 --- a/modules/benchmarks/.factorypath +++ /dev/null @@ -1,176 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index d99013b12fe..15cdd55e9bb 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -128,13 +128,13 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM int lastTrustScore = poolEntry.getTrustScore(); long lastSerailId = poolEntry.getSerialId(); - if (msgSerialId == lastSerailId) { // Pool has rebooted + if (msgSerialId == lastSerailId) { // Pool has not rebooted nextTrustScore = lastTrustScore/tsDecrease; if (nextTrustScore < tsThreshold && !poolEntry.getEnabledStatus()) { // Pool was disabled, should now be re-ENABLED LOGGER.error("Pool {} WOULD now be re-ENABLED due to low trust score, BUT IS NOT", poolName); } - } else { // Pool has not rebooted + } else { // Pool has rebooted if (lastTrustScore < tsCeiling) {nextTrustScore = lastTrustScore + tsIncrease;} // INCREASE trust score as long as it is not higher than the ceiling LOGGER.error("Pool {} rebooted and changed ID from {} to {}, Trust Score now at {}", poolName, lastSerailId, msgSerialId, lastTrustScore); From a5bd2215d433efbb83ec7298f1aef70ced2cd968 Mon Sep 17 00:00:00 2001 From: Lars Janssen Date: Fri, 15 Mar 2024 14:35:11 +0100 Subject: [PATCH 4/9] Changed log messages --- .../src/main/java/diskCacheV111/poolManager/CostModuleV1.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index 15cdd55e9bb..373ae14ea2e 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -131,7 +131,7 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM if (msgSerialId == lastSerailId) { // Pool has not rebooted nextTrustScore = lastTrustScore/tsDecrease; if (nextTrustScore < tsThreshold && !poolEntry.getEnabledStatus()) { // Pool was disabled, should now be re-ENABLED - LOGGER.error("Pool {} WOULD now be re-ENABLED due to low trust score, BUT IS NOT", poolName); + LOGGER.error("Pool {} WOULD now be re-ENABLED, BUT IS NOT", poolName); } } else { // Pool has rebooted @@ -140,7 +140,7 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM if (nextTrustScore > tsThreshold) { // Set pool as DISABLED nextEnabledStatus = false; - LOGGER.error("Pool {} WOULD now marked as DISABLED due to high trust score, BUT IS NOT", poolName); + LOGGER.error("Pool {} WOULD now marked as DISABLED, BUT IS NOT", poolName); } } } From d8fd9e616e6c1b8ffe68bf9691e932ead84a7663 Mon Sep 17 00:00:00 2001 From: Lars Janssen Date: Fri, 15 Mar 2024 15:05:31 +0100 Subject: [PATCH 5/9] Changed circuitbreaker values and added comments to explain relationship between them. --- .../diskCacheV111/poolManager/CostModuleV1.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index 373ae14ea2e..8b345d20a2f 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -109,10 +109,13 @@ public boolean getPoolStatus (String poolName) { } public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpMessage msg) { - int tsIncrease = 15; - int tsDecrease = 2; - int tsThreshold = 35; - int tsCeiling = 150; + // CostModuleTest#testPoolCircuitbreaker depends on these vaules beeing as they are. + // Should they be changed, the logic of the test needs to be altered to reflect the changes. + int tsIncrease = 16; // W/ a threshold of 35 and tsDecrease of 1.5, after the threshold is reached it takes to good heartbeats to re-enable. + int tsDecrease = 1.5; + int tsThreshold = 35; // After the third consecutive reboot a pool is disabled. + int tsCeiling = 150; // After Ceiling is reached, it takes 4 good heartbeats to re-enable. + long msgSerialId = msg.getSerialId(); int nextTrustScore = 0; boolean nextEnabledStatus = true; @@ -407,4 +410,4 @@ public synchronized Map getPoolInfoAsMap(Iterable pool private synchronized void writeObject(ObjectOutputStream stream) throws IOException { stream.defaultWriteObject(); } -} \ No newline at end of file +} From 94550d2087e2c8d4ffc586b70b8159a8621203b3 Mon Sep 17 00:00:00 2001 From: Lars Janssen Date: Tue, 20 Aug 2024 15:52:00 +0200 Subject: [PATCH 6/9] in prog --- .../poolManager/CostModuleV1.java | 63 ++++++++++++++++--- .../diskCacheV111/poolManager/poolmanager.xml | 4 ++ .../tests/poolmanager/CostModuleTest.java | 48 ++++++++------ skel/share/defaults/poolmanager.properties | 2 + 4 files changed, 89 insertions(+), 28 deletions(-) diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index 8b345d20a2f..5efcb273013 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -1,6 +1,7 @@ package diskCacheV111.poolManager; import com.google.common.collect.ImmutableMap; +import java.util.Properties; import diskCacheV111.pools.PoolCostInfo; import diskCacheV111.pools.PoolV2Mode; import diskCacheV111.vehicles.CostModulePoolInfoTable; @@ -44,6 +45,42 @@ public class CostModuleV1 private boolean _cachedPercentileCostCutIsValid; private double _cachedPercentileCostCut; private double _cachedPercentileFraction; + private int _tsIncrease; + private int _trustScoreIncrease; + + private int _tsDecrease; + private int _trustScoreDecrease; + + private int _tsThreshold; + private int _trustScoreThreshold; + + private int _tsCeiling; + private int _trustScoreCeiling; + + public void setTrustScoreIncrease(int TrustScoreIncrease) { + _trustScoreIncrease = TrustScoreIncrease; + } + public void setTrustScoreDecrease(int TrustScoreDecrease) { + _trustScoreDecrease = TrustScoreDecrease; + } + public void setTrustScoreThreshold(int TrustScoreThreshold) { + _trustScoreThreshold = TrustScoreThreshold; + } + public void setTrustScoreCeiling(int TrustScoreCeiling) { + _trustScoreCeiling = TrustScoreCeiling; + } + public int getTrustScoreIncrease() { + return _trustScoreIncrease; + } + public int getTrustScoreDecrease() { + return _trustScoreDecrease; + } + public int getTrustScoreThreshold() { + return _trustScoreThreshold; + } + public int getTrustScoreCeiling() { + return _trustScoreCeiling; + } /** * Information about some specific pool. @@ -109,12 +146,16 @@ public boolean getPoolStatus (String poolName) { } public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpMessage msg) { - // CostModuleTest#testPoolCircuitbreaker depends on these vaules beeing as they are. - // Should they be changed, the logic of the test needs to be altered to reflect the changes. - int tsIncrease = 16; // W/ a threshold of 35 and tsDecrease of 1.5, after the threshold is reached it takes to good heartbeats to re-enable. - int tsDecrease = 1.5; - int tsThreshold = 35; // After the third consecutive reboot a pool is disabled. - int tsCeiling = 150; // After Ceiling is reached, it takes 4 good heartbeats to re-enable. + // TODO: Refactor those variables out into a config + + //int tsIncrease = 16; // W/ a threshold of 35 and tsDecrease of 1.5, after the threshold is reached it takes two good heartbeats to re-enable. + int tsIncrease = _trustScoreIncrease; + //int tsDecrease = 2; //1.5; + int tsDecrease = _trustScoreDecrease; + //int tsThreshold = 35; // After the third consecutive reboot a pool is disabled. + int tsThreshold = _trustScoreThreshold; + //int tsCeiling = 150; // After Ceiling is reached, it takes 4 good heartbeats to re-enable. + int tsCeiling = _trustScoreCeiling; long msgSerialId = msg.getSerialId(); int nextTrustScore = 0; @@ -126,24 +167,30 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM PoolCostInfo newInfo = msg.getPoolCostInfo(); Entry poolEntry = _hash.get(poolName); boolean isNewPool = poolEntry == null; + boolean trustScoreThresholdReached = false; + // TODO: To much indentation if (!isNewPool) { // Only check for reboots if the pool is not new int lastTrustScore = poolEntry.getTrustScore(); long lastSerailId = poolEntry.getSerialId(); if (msgSerialId == lastSerailId) { // Pool has not rebooted - nextTrustScore = lastTrustScore/tsDecrease; + nextTrustScore = lastTrustScore / tsDecrease; if (nextTrustScore < tsThreshold && !poolEntry.getEnabledStatus()) { // Pool was disabled, should now be re-ENABLED LOGGER.error("Pool {} WOULD now be re-ENABLED, BUT IS NOT", poolName); + // TODO: enable here } } else { // Pool has rebooted - if (lastTrustScore < tsCeiling) {nextTrustScore = lastTrustScore + tsIncrease;} // INCREASE trust score as long as it is not higher than the ceiling + if (lastTrustScore < tsCeiling) { + nextTrustScore = lastTrustScore + tsIncrease; + } // INCREASE trust score as long as it is not higher than the ceiling LOGGER.error("Pool {} rebooted and changed ID from {} to {}, Trust Score now at {}", poolName, lastSerailId, msgSerialId, lastTrustScore); if (nextTrustScore > tsThreshold) { // Set pool as DISABLED nextEnabledStatus = false; LOGGER.error("Pool {} WOULD now marked as DISABLED, BUT IS NOT", poolName); + // TODO: disable here } } } diff --git a/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml b/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml index f3ff9b959f5..3956e643211 100644 --- a/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml +++ b/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml @@ -38,6 +38,10 @@ Cost module + + + + diff --git a/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java b/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java index dba29c66a93..ff7872f91b3 100644 --- a/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java +++ b/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java @@ -194,34 +194,42 @@ public void testTwoPoolsThenPercentile() { assertPercentileCost(FRACTION_JUST_BELOW_ONE, maxPerfCost); } - // Depends on hardcoded values of CostModuleV1#messageArrived(CellMessage, PoolManagerPoolUpMassage) @Test public void testPoolCircuitbreaker() throws InterruptedException { - PoolManagerPoolUpMessage currentMessage = getMessagePool(POOL_NAME); - - for (int i = 0; i < 4; i++) { - _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); - currentMessage = getMessagePool(POOL_NAME); - Thread.sleep(1); + int trustScoreIncrease = _costModule.getTrustScoreIncrease(); + int trustScoreDecrease = _costModule.getTrustScoreDecrease(); + int trustScoreThreshold = _costModule.getTrustScoreThreshold(); + int trustScoreCeiling = _costModule.getTrustScoreCeiling(); + PoolManagerPoolUpMessage msg = getMessagePool(POOL_NAME); + + // Get tho the threshold no mater what it might be + for (int i = 0; i < trustScoreThreshold; i += trustScoreIncrease) { + msg = deadHeartbeat(msg, POOL_NAME, POOL_ADDRESS); } - _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); - assertFalse(_costModule.getPoolStatus(POOL_NAME)); - for (int i = 0; i < 1; i++) { - _costModule.messageArrived(buildEnvelope(POOL_ADDRESS),currentMessage); - } + // Reset to the minimum trust value + msg = aliveHeartbeat(msg, POOL_NAME, POOL_ADDRESS); assertTrue(_costModule.getPoolStatus(POOL_NAME)); - currentMessage = getMessagePool(POOL_NAME); - _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); - assertFalse(_costModule.getPoolStatus(POOL_NAME)); + // Those tests are coupled to specific values of CostModuleV1#messageArrived(CellMessage, PoolManagerPoolUpMassage) + // msg = deadHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // assertFalse(_costModule.getPoolStatus(POOL_NAME)); + // + // msg = aliveHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // msg = aliveHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // msg = deadHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // assertTrue(_costModule.getPoolStatus(POOL_NAME)); + } - _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); - _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); - currentMessage = getMessagePool(POOL_NAME); - _costModule.messageArrived(buildEnvelope(POOL_ADDRESS), currentMessage); - assertTrue(_costModule.getPoolStatus(POOL_NAME)); + private PoolManagerPoolUpMessage deadHeartbeat(PoolManagerPoolUpMessage message, String poolName, CellAddressCore poolAddress) { + message = getMessagePool(poolName); + _costModule.messageArrived(buildEnvelope(poolAddress), message); + return message; + } + private PoolManagerPoolUpMessage aliveHeartbeat(PoolManagerPoolUpMessage message, String poolName, CellAddressCore poolAddress) { + _costModule.messageArrived(buildEnvelope(poolAddress), message); + return message; } private PoolManagerPoolUpMessage getMessagePool(String poolName) { diff --git a/skel/share/defaults/poolmanager.properties b/skel/share/defaults/poolmanager.properties index 91eaeffb220..bf3a41c1c49 100644 --- a/skel/share/defaults/poolmanager.properties +++ b/skel/share/defaults/poolmanager.properties @@ -122,3 +122,5 @@ poolmanager.request-notifier.timeout=1 # the caching of the selected pools. (one-of?true|false)poolmanager.selection.unit.cachingenabeled = false + +# Coment that explains wtf i am doing, or not i guess \ No newline at end of file From 249e4c9d6b1838aded32f72aab47d8fa1937e36c Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Thu, 29 Aug 2024 12:12:27 +0200 Subject: [PATCH 7/9] gplazma2-oidc-te: fix invalid initialization of plugin Motivation: The META-INF/gplazma-plugins.xml must point to the correct class name, otherwise we get: Unable register new plugin: Class org.dcache.gplazma.plugins.tokenx.TokenExchange could not be found. Modification: Fix the class name in gplazma-plugins.xml Result: no error on start (and working plugin :) ) Acked-by: Target: master Require-book: no Require-notes: no --- .../src/main/resources/META-INF/gplazma-plugins.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml b/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml index 890d7a067e8..c8db687dfbb 100644 --- a/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml +++ b/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml @@ -1,6 +1,6 @@ oidc-te - org.dcache.gplazma.plugins.tokenx.TokenExchange + org.dcache.gplazma.tokenx.TokenExchange \ No newline at end of file From 1e327af3e7f2e2229df5ccdaa17af449fdc0c315 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Thu, 29 Aug 2024 12:19:23 +0200 Subject: [PATCH 8/9] system-test: move pool_write into extra domain --- .../src/main/skel/etc/layouts/system-test.conf | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/packages/system-test/src/main/skel/etc/layouts/system-test.conf b/packages/system-test/src/main/skel/etc/layouts/system-test.conf index 3e1fe436517..a027d0baff5 100644 --- a/packages/system-test/src/main/skel/etc/layouts/system-test.conf +++ b/packages/system-test/src/main/skel/etc/layouts/system-test.conf @@ -1,6 +1,5 @@ system-test.home=${dcache.home} -dcache.broker.scheme=none dcache.pid.dir=/tmp dcache.java.memory.heap=1024m dcache.enable.space-reservation=true @@ -44,6 +43,7 @@ billing.enable.db=true dcache.enable.quota=true [dCacheDomain] +dcache.broker.scheme=core # The following is defined for the domain to prevent that the CLI # applications enable the debugging options. dcache.java.options.extra=-Xdebug -agentlib:jdwp=transport=dt_socket,server=y,address=localhost:2299,suspend=n -XX:+TieredCompilation @@ -74,11 +74,6 @@ srmmanager.net.host=localhost srmmanager.expired-job-period = 30 srmmanager.expired-job-period.unit = SECONDS -[dCacheDomain/pool] -pool.name=pool_write -pool.path=${system-test.home}/var/pools/pool_write -pool.plugins.meta=org.dcache.pool.repository.meta.file.FileMetaDataRepository - [dCacheDomain/pool] pool.name=pool_read pool.path=${system-test.home}/var/pools/pool_read @@ -246,3 +241,10 @@ nfs.enable.access-log=FULL [dCacheDomain/qos-verifier] [dCacheDomain/qos-adjuster] [dCacheDomain/qos-scanner] + + +[pool] +[pool/pool] +pool.name=pool_write +pool.path=${system-test.home}/var/pools/pool_write +pool.plugins.meta=org.dcache.pool.repository.meta.file.FileMetaDataRepository From 995e703f3b711e8178a0a3a92728c8a2d57f191c Mon Sep 17 00:00:00 2001 From: Lars Janssen Date: Wed, 18 Sep 2024 12:17:43 +0200 Subject: [PATCH 9/9] thign --- .../diskCacheV111/poolManager/CostModuleV1.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index 5efcb273013..a1840f2f3c7 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -149,13 +149,9 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM // TODO: Refactor those variables out into a config //int tsIncrease = 16; // W/ a threshold of 35 and tsDecrease of 1.5, after the threshold is reached it takes two good heartbeats to re-enable. - int tsIncrease = _trustScoreIncrease; //int tsDecrease = 2; //1.5; - int tsDecrease = _trustScoreDecrease; //int tsThreshold = 35; // After the third consecutive reboot a pool is disabled. - int tsThreshold = _trustScoreThreshold; //int tsCeiling = 150; // After Ceiling is reached, it takes 4 good heartbeats to re-enable. - int tsCeiling = _trustScoreCeiling; long msgSerialId = msg.getSerialId(); int nextTrustScore = 0; @@ -175,19 +171,19 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM long lastSerailId = poolEntry.getSerialId(); if (msgSerialId == lastSerailId) { // Pool has not rebooted - nextTrustScore = lastTrustScore / tsDecrease; - if (nextTrustScore < tsThreshold && !poolEntry.getEnabledStatus()) { // Pool was disabled, should now be re-ENABLED + nextTrustScore = lastTrustScore / _trustScoreDecrease; + if (nextTrustScore < _trustScoreThreshold && !poolEntry.getEnabledStatus()) { // Pool was disabled, should now be re-ENABLED LOGGER.error("Pool {} WOULD now be re-ENABLED, BUT IS NOT", poolName); // TODO: enable here } } else { // Pool has rebooted - if (lastTrustScore < tsCeiling) { - nextTrustScore = lastTrustScore + tsIncrease; + if (lastTrustScore < _trustScoreCeiling) { + nextTrustScore = lastTrustScore + _trustScoreIncrease; } // INCREASE trust score as long as it is not higher than the ceiling LOGGER.error("Pool {} rebooted and changed ID from {} to {}, Trust Score now at {}", poolName, lastSerailId, msgSerialId, lastTrustScore); - if (nextTrustScore > tsThreshold) { // Set pool as DISABLED + if (nextTrustScore > _trustScoreThreshold) { // Set pool as DISABLED nextEnabledStatus = false; LOGGER.error("Pool {} WOULD now marked as DISABLED, BUT IS NOT", poolName); // TODO: disable here