Skip to content

Commit

Permalink
Add method keySet(prefix) and metadata.copy
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Dinzinger <[email protected]>
  • Loading branch information
michaeldinzinger committed Nov 12, 2023
1 parent 374e0bb commit 4c762b4
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 37 deletions.
18 changes: 18 additions & 0 deletions core/src/main/java/com/digitalpebble/stormcrawler/Metadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;

/** Wrapper around Map &lt;String,String[]&gt; * */
Expand Down Expand Up @@ -208,6 +209,13 @@ public Set<String> keySet() {
return md.keySet();
}

/** Returns the keySet for all keys starting with a given prefix */
public Set<String> keySet(String prefix) {
return md.keySet().stream()
.filter(key -> key.startsWith(prefix))
.collect(Collectors.toSet());
}

/** Returns the first non empty value found for the keys or null if none found. */
public static String getFirstValue(Metadata md, String... keys) {
for (String key : keys) {
Expand All @@ -218,6 +226,16 @@ public static String getFirstValue(Metadata md, String... keys) {
return null;
}

/**
* Copies the values arrays for a given key to another metadata object
*
* @param targetMetadata the metadata to copy to
* @param key the key to copy
*/
public void copy(Metadata targetMetadata, String key) {
targetMetadata.setValues(key, getValues(key));
}

/** Returns the underlying Map * */
public Map<String, String[]> asMap() {
return md;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -619,8 +619,7 @@ public void run() {

// get any metrics from the protocol metadata
// expect Longs
response.getMetadata().keySet().stream()
.filter(s -> s.startsWith("metrics."))
response.getMetadata().keySet("metrics.").stream()
.forEach(
s ->
averagedMetrics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,7 @@ public void execute(Tuple input) {
final int byteLength = response.getContent().length;

// get any metrics from the protocol metadata
response.getMetadata().keySet().stream()
.filter(s -> s.startsWith("metrics."))
response.getMetadata().keySet("metrics.").stream()
.forEach(
s ->
averagedMetrics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,24 +163,14 @@ public Metadata filter(Metadata metadata) {
private Metadata _filter(Metadata metadata, Set<String> filter) {
Metadata filtered_md = new Metadata();

Set<String> filterKeys = new HashSet<>();
for (String key : filter) {
if (key.endsWith("*")) {
String prefix = key.substring(0, key.length() - 1);
for (String mdKey : metadata.keySet()) {
if (mdKey.startsWith(prefix)) {
filterKeys.add(mdKey);
}
for (String k : metadata.keySet(prefix)) {
metadata.copy(filtered_md, k);
}
} else {
filterKeys.add(key);
}

for (String filterKey : filterKeys) {
String[] values = metadata.getValues(filterKey);
if (values != null) {
filtered_md.setValues(filterKey, values);
}
metadata.copy(filtered_md, key);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.digitalpebble.stormcrawler;

import org.junit.Assert;
import org.junit.Test;

public class MetadataTest {

@Test
public void testCopyWithPrefix() {
Metadata metadata = new Metadata();
metadata.addValue("fetch.statusCode", "500");
metadata.addValue("fetch.error.count", "2");
metadata.addValue("fetch.exception", "java.lang.Exception");
metadata.addValue("fetchInterval", "200");
metadata.addValue("isFeed", "true");
metadata.addValue("depth", "1");

Metadata copy = new Metadata();
for (String key : metadata.keySet("fetch.")) {
metadata.copy(copy, key);
}

Assert.assertEquals(3, copy.size());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,7 @@ public void testNonSitemapParsing() throws IOException {
}

private void assertNewsAttributes(Metadata metadata) {
long numAttributes =
metadata.keySet().stream()
.filter(key -> key.startsWith(Extension.NEWS.name() + "."))
.count();
long numAttributes = metadata.keySet(Extension.NEWS.name() + ".").size();
Assert.assertEquals(7, numAttributes);
Assert.assertEquals(
"The Example Times", metadata.getFirstValue(Extension.NEWS.name() + "." + "name"));
Expand All @@ -265,10 +262,7 @@ private void assertNewsAttributes(Metadata metadata) {
}

private void assertImageAttributes(Metadata metadata) {
long numAttributes =
metadata.keySet().stream()
.filter(key -> key.startsWith(Extension.IMAGE.name() + "."))
.count();
long numAttributes = metadata.keySet(Extension.IMAGE.name() + ".").size();
Assert.assertEquals(5, numAttributes);
Assert.assertEquals(
"This is the caption.",
Expand All @@ -288,10 +282,7 @@ private void assertImageAttributes(Metadata metadata) {
}

private void assertLinksAttributes(Metadata metadata) {
long numAttributes =
metadata.keySet().stream()
.filter(key -> key.startsWith(Extension.LINKS.name() + "."))
.count();
long numAttributes = metadata.keySet(Extension.LINKS.name() + ".").size();
Assert.assertEquals(3, numAttributes);
Assert.assertEquals(
"alternate", metadata.getFirstValue(Extension.LINKS.name() + "." + "params.rel"));
Expand All @@ -303,10 +294,7 @@ private void assertLinksAttributes(Metadata metadata) {
}

private void assertVideoAttributes(Metadata metadata) {
long numAttributes =
metadata.keySet().stream()
.filter(key -> key.startsWith(Extension.VIDEO.name() + "."))
.count();
long numAttributes = metadata.keySet(Extension.VIDEO.name() + ".").size();
Assert.assertEquals(20, numAttributes);
Assert.assertEquals(
"http://www.example.com/thumbs/123.jpg",
Expand Down Expand Up @@ -362,10 +350,7 @@ private void assertVideoAttributes(Metadata metadata) {
}

private void assertMobileAttributes(Metadata metadata) {
long numAttributes =
metadata.keySet().stream()
.filter(key -> key.startsWith(Extension.MOBILE.name() + "."))
.count();
long numAttributes = metadata.keySet(Extension.MOBILE.name() + ".").size();
Assert.assertEquals(0, numAttributes);
}
}

0 comments on commit 4c762b4

Please sign in to comment.