Skip to content

Commit

Permalink
HCD-75: Fix and improve EstimatedRowCount metric (#1604)
Browse files Browse the repository at this point in the history
This patch introduces two changes:
- it adds a reading group to guard against sweeping the memtable which
the metric
is going to potentially iterate through (preventing crashes).
- changes the metric calculation by using an estimate (used already by
SAI query planner) instead of iterating
through the whole memtable (which is quite a heavy operation).
  • Loading branch information
szymon-miezal authored Feb 26, 2025
1 parent b6930b5 commit 5ed91d7
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 52 deletions.
16 changes: 0 additions & 16 deletions src/java/org/apache/cassandra/db/memtable/TrieMemtable.java
Original file line number Diff line number Diff line change
Expand Up @@ -300,22 +300,6 @@ public int getShardCount()
return shards.length;
}

public long rowCount(final ColumnFilter columnFilter, final DataRange dataRange)
{
int total = 0;
for (MemtableUnfilteredPartitionIterator iter = makePartitionIterator(columnFilter, dataRange); iter.hasNext(); )
{
for (UnfilteredRowIterator it = iter.next(); it.hasNext(); )
{
Unfiltered uRow = it.next();
if (uRow.isRow())
total++;
}
}

return total;
}

@Override
public long getEstimatedAverageRowSize()
{
Expand Down
29 changes: 20 additions & 9 deletions src/java/org/apache/cassandra/metrics/TableMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import org.apache.cassandra.utils.Hex;
import org.apache.cassandra.utils.MovingAverage;
import org.apache.cassandra.utils.Pair;
import org.apache.cassandra.utils.concurrent.OpOrder;
import org.apache.cassandra.utils.concurrent.Refs;

import static org.apache.cassandra.io.sstable.format.SSTableReader.selectOnlyBigTableReaders;
Expand Down Expand Up @@ -667,28 +668,38 @@ public Long loadValue()
() -> combineHistograms(cfs.getSSTables(SSTableSet.CANONICAL),
SSTableReader::getEstimatedCellPerPartitionCount), null);

estimatedRowCount = createTableGauge("EstimatedRowCount", "EstimatedRowCount", new Gauge<Long>()
estimatedRowCount = createTableGauge("EstimatedRowCount", "EstimatedRowCount", new CachedGauge<>(1, TimeUnit.SECONDS)
{
public Long getValue()
public Long loadValue()
{
long memtableRows = 0;
for (Memtable memtable : cfs.getTracker().getView().getAllMemtables())
OpOrder.Group readGroup = null;
try
{
if (memtable instanceof TrieMemtable)
for (Memtable memtable : cfs.getTracker().getView().getAllMemtables())
{
ColumnFilter.Builder builder = ColumnFilter.allRegularColumnsBuilder(cfs.metadata(), true);
memtableRows += ((TrieMemtable) memtable).rowCount(builder.build(), DataRange.allData(cfs.getPartitioner()));
if (readGroup == null)
{
readGroup = memtable.readOrdering().start();
}
memtableRows += Memtable.estimateRowCount(memtable);
}
}
finally
{
if (readGroup != null)
readGroup.close();
}

long sstableRows = 0;
try(ColumnFamilyStore.RefViewFragment refViewFragment = cfs.selectAndReference(View.selectFunction(SSTableSet.CANONICAL)))
{
long total = 0;
for (SSTableReader reader: refViewFragment.sstables)
{
total += reader.getTotalRows();
sstableRows += reader.getTotalRows();
}
return total + memtableRows;
}
return sstableRows + memtableRows;
}
}, null);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,10 @@
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.cql3.CQLTester;
import org.apache.cassandra.db.ColumnFamilyStore;
import org.apache.cassandra.db.DataRange;
import org.apache.cassandra.db.Keyspace;
import org.apache.cassandra.db.filter.ColumnFilter;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.memory.SlabAllocator;
import org.github.jamm.MemoryMeter;

import static org.assertj.core.api.Assertions.assertThat;

// Note: This test can be run in idea with the allocation type configured in the test yaml and memtable using the
// value memtableClass is initialized with.
public abstract class MemtableSizeTestBase extends CQLTester
Expand Down Expand Up @@ -213,27 +208,6 @@ public void testSize() throws Throwable
}
}

@Test
public void testRowCountInTrieMemtable() throws Throwable
{
buildAndFillTable("TrieMemtable");

String writeStatement = "INSERT INTO " + table + "(userid,picid,commentid)VALUES(?,?,?)";

Memtable memtable = cfs.getTracker().getView().getCurrentMemtable();
System.out.println("Writing " + partitions + " partitions of " + rowsPerPartition + " rows");
for (long i = 0; i < partitions; ++i)
{
for (long j = 0; j < rowsPerPartition; ++j)
execute(writeStatement, i, j, i + j);
}

assertThat(memtable).isExactlyInstanceOf(TrieMemtable.class);
ColumnFilter.Builder builder = ColumnFilter.allRegularColumnsBuilder(cfs.metadata(), true);
long rowCount = ((TrieMemtable)cfs.getTracker().getView().getCurrentMemtable()).rowCount(builder.build(), DataRange.allData(cfs.getPartitioner()));
Assert.assertEquals(rowCount, partitions*rowsPerPartition);
}

@Test
public void testRowSize() throws Throwable
{
Expand All @@ -253,4 +227,4 @@ public void testRowSize() throws Throwable
double expectedRowSize = (double) memtable.getLiveDataSize() / (partitions * rowsPerPartition);
Assert.assertEquals(expectedRowSize, rowSize, (partitions * rowsPerPartition) * 0.05); // 5% accuracy
}
}
}

0 comments on commit 5ed91d7

Please sign in to comment.