From 4b9da25314244a934ee46683499123581566b5c1 Mon Sep 17 00:00:00 2001 From: Laurent KLOCK Date: Wed, 20 Nov 2024 18:33:14 +0100 Subject: [PATCH] Add URL search filter on CountURLs and ListURLs with optional case sensitivity Signed-off-by: Laurent Klock --- .../urlfrontier/URLFrontierGrpc.java | 342 ++--- .../urlfrontier/Urlfrontier.java | 1099 +++++++++++++++-- API/urlfrontier.proto | 12 +- .../urlfrontier/client/CountURLs.java | 19 + service/pom.xml | 8 +- .../service/AbstractFrontierService.java | 51 +- .../service/MemoryFrontierServiceTest.java | 90 ++ .../service/RocksDBServiceTest.java | 70 ++ 8 files changed, 1420 insertions(+), 271 deletions(-) diff --git a/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java b/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java index da5b880..ae5a273 100644 --- a/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java +++ b/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java @@ -4,14 +4,14 @@ /** */ @javax.annotation.Generated( - value = "by gRPC proto compiler (version 1.66.0)", + value = "by gRPC proto compiler (version 1.50.2)", comments = "Source: urlfrontier.proto") @io.grpc.stub.annotations.GrpcGenerated public final class URLFrontierGrpc { private URLFrontierGrpc() {} - public static final java.lang.String SERVICE_NAME = "urlfrontier.URLFrontier"; + public static final String SERVICE_NAME = "urlfrontier.URLFrontier"; // Static method descriptors that strictly reflect the proto. private static volatile io.grpc.MethodDescriptor< @@ -938,7 +938,7 @@ public URLFrontierFutureStub newStub( } /** */ - public interface AsyncService { + public abstract static class URLFrontierImplBase implements io.grpc.BindableService { /** * @@ -947,7 +947,7 @@ public interface AsyncService { * * Return the list of nodes forming the cluster the current node belongs to * * */ - default void listNodes( + public void listNodes( crawlercommons.urlfrontier.Urlfrontier.Empty request, io.grpc.stub.StreamObserver responseObserver) { @@ -962,7 +962,7 @@ default void listNodes( * * Return the list of crawls handled by the frontier(s) * * */ - default void listCrawls( + public void listCrawls( crawlercommons.urlfrontier.Urlfrontier.Local request, io.grpc.stub.StreamObserver responseObserver) { @@ -977,7 +977,7 @@ default void listCrawls( * * Delete an entire crawl, returns the number of URLs removed this way * * */ - default void deleteCrawl( + public void deleteCrawl( crawlercommons.urlfrontier.Urlfrontier.DeleteCrawlMessage request, io.grpc.stub.StreamObserver responseObserver) { @@ -993,7 +993,7 @@ default void deleteCrawl( * by default the service will return up to 100 results from offset 0 and exclude inactive queues.* * */ - default void listQueues( + public void listQueues( crawlercommons.urlfrontier.Urlfrontier.Pagination request, io.grpc.stub.StreamObserver responseObserver) { @@ -1008,7 +1008,7 @@ default void listQueues( * * Stream URLs due for fetching from M queues with up to N items per queue * * */ - default void getURLs( + public void getURLs( crawlercommons.urlfrontier.Urlfrontier.GetParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1023,7 +1023,7 @@ default void getURLs( * * Push URL items to the server; they get created (if they don't already exist) in case of DiscoveredURLItems or updated if KnownURLItems * * */ - default io.grpc.stub.StreamObserver putURLs( + public io.grpc.stub.StreamObserver putURLs( io.grpc.stub.StreamObserver responseObserver) { return io.grpc.stub.ServerCalls.asyncUnimplementedStreamingCall( @@ -1037,7 +1037,7 @@ default io.grpc.stub.StreamObserver */ - default void getStats( + public void getStats( crawlercommons.urlfrontier.Urlfrontier.QueueWithinCrawlParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1052,7 +1052,7 @@ default void getStats( * * Delete the queue based on the key in parameter, returns the number of URLs removed this way * * */ - default void deleteQueue( + public void deleteQueue( crawlercommons.urlfrontier.Urlfrontier.QueueWithinCrawlParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1069,7 +1069,7 @@ default void deleteQueue( * indicated in argument is reached. This is useful for cases where a server returns a Retry-After for instance. * */ - default void blockQueueUntil( + public void blockQueueUntil( crawlercommons.urlfrontier.Urlfrontier.BlockQueueParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1084,7 +1084,7 @@ default void blockQueueUntil( * * De/activate the crawl. GetURLs will not return anything until SetActive is set to true. PutURLs will still take incoming data. * * */ - default void setActive( + public void setActive( crawlercommons.urlfrontier.Urlfrontier.Active request, io.grpc.stub.StreamObserver responseObserver) { @@ -1099,7 +1099,7 @@ default void setActive( * * Returns true if the crawl is active, false if it has been deactivated with SetActive(Boolean) * * */ - default void getActive( + public void getActive( crawlercommons.urlfrontier.Urlfrontier.Local request, io.grpc.stub.StreamObserver responseObserver) { @@ -1117,7 +1117,7 @@ default void getActive( * Usually informed by the delay setting of robots.txt. * */ - default void setDelay( + public void setDelay( crawlercommons.urlfrontier.Urlfrontier.QueueDelayParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1132,7 +1132,7 @@ default void setDelay( * * Overrides the log level for a given package * * */ - default void setLogLevel( + public void setLogLevel( crawlercommons.urlfrontier.Urlfrontier.LogLevelParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1147,7 +1147,7 @@ default void setLogLevel( * * Sets crawl limit for domain * * */ - default void setCrawlLimit( + public void setCrawlLimit( crawlercommons.urlfrontier.Urlfrontier.CrawlLimitParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1164,7 +1164,7 @@ default void setCrawlLimit( * Used to check current status of an URL within the frontier * */ - default void getURLStatus( + public void getURLStatus( crawlercommons.urlfrontier.Urlfrontier.URLStatusRequest request, io.grpc.stub.StreamObserver responseObserver) { @@ -1181,7 +1181,7 @@ default void getURLStatus( * Used to check current status of all URLs within the frontier * */ - default void listURLs( + public void listURLs( crawlercommons.urlfrontier.Urlfrontier.ListUrlParams request, io.grpc.stub.StreamObserver responseObserver) { @@ -1189,27 +1189,151 @@ default void listURLs( getListURLsMethod(), responseObserver); } - /** */ - default void countURLs( + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ + public void countURLs( crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request, io.grpc.stub.StreamObserver responseObserver) { io.grpc.stub.ServerCalls.asyncUnimplementedUnaryCall( getCountURLsMethod(), responseObserver); } - } - - /** Base class for the server implementation of the service URLFrontier. */ - public abstract static class URLFrontierImplBase - implements io.grpc.BindableService, AsyncService { @java.lang.Override public final io.grpc.ServerServiceDefinition bindService() { - return URLFrontierGrpc.bindService(this); + return io.grpc.ServerServiceDefinition.builder(getServiceDescriptor()) + .addMethod( + getListNodesMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.Empty, + crawlercommons.urlfrontier.Urlfrontier.StringList>( + this, METHODID_LIST_NODES))) + .addMethod( + getListCrawlsMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.Local, + crawlercommons.urlfrontier.Urlfrontier.StringList>( + this, METHODID_LIST_CRAWLS))) + .addMethod( + getDeleteCrawlMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier + .DeleteCrawlMessage, + crawlercommons.urlfrontier.Urlfrontier.Long>( + this, METHODID_DELETE_CRAWL))) + .addMethod( + getListQueuesMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.Pagination, + crawlercommons.urlfrontier.Urlfrontier.QueueList>( + this, METHODID_LIST_QUEUES))) + .addMethod( + getGetURLsMethod(), + io.grpc.stub.ServerCalls.asyncServerStreamingCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.GetParams, + crawlercommons.urlfrontier.Urlfrontier.URLInfo>( + this, METHODID_GET_URLS))) + .addMethod( + getPutURLsMethod(), + io.grpc.stub.ServerCalls.asyncBidiStreamingCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.URLItem, + crawlercommons.urlfrontier.Urlfrontier.AckMessage>( + this, METHODID_PUT_URLS))) + .addMethod( + getGetStatsMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier + .QueueWithinCrawlParams, + crawlercommons.urlfrontier.Urlfrontier.Stats>( + this, METHODID_GET_STATS))) + .addMethod( + getDeleteQueueMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier + .QueueWithinCrawlParams, + crawlercommons.urlfrontier.Urlfrontier.Long>( + this, METHODID_DELETE_QUEUE))) + .addMethod( + getBlockQueueUntilMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.BlockQueueParams, + crawlercommons.urlfrontier.Urlfrontier.Empty>( + this, METHODID_BLOCK_QUEUE_UNTIL))) + .addMethod( + getSetActiveMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.Active, + crawlercommons.urlfrontier.Urlfrontier.Empty>( + this, METHODID_SET_ACTIVE))) + .addMethod( + getGetActiveMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.Local, + crawlercommons.urlfrontier.Urlfrontier.Boolean>( + this, METHODID_GET_ACTIVE))) + .addMethod( + getSetDelayMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.QueueDelayParams, + crawlercommons.urlfrontier.Urlfrontier.Empty>( + this, METHODID_SET_DELAY))) + .addMethod( + getSetLogLevelMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.LogLevelParams, + crawlercommons.urlfrontier.Urlfrontier.Empty>( + this, METHODID_SET_LOG_LEVEL))) + .addMethod( + getSetCrawlLimitMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.CrawlLimitParams, + crawlercommons.urlfrontier.Urlfrontier.Empty>( + this, METHODID_SET_CRAWL_LIMIT))) + .addMethod( + getGetURLStatusMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.URLStatusRequest, + crawlercommons.urlfrontier.Urlfrontier.URLItem>( + this, METHODID_GET_URLSTATUS))) + .addMethod( + getListURLsMethod(), + io.grpc.stub.ServerCalls.asyncServerStreamingCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.ListUrlParams, + crawlercommons.urlfrontier.Urlfrontier.URLItem>( + this, METHODID_LIST_URLS))) + .addMethod( + getCountURLsMethod(), + io.grpc.stub.ServerCalls.asyncUnaryCall( + new MethodHandlers< + crawlercommons.urlfrontier.Urlfrontier.CountUrlParams, + crawlercommons.urlfrontier.Urlfrontier.Long>( + this, METHODID_COUNT_URLS))) + .build(); } } - /** A stub to allow clients to do asynchronous rpc calls to service URLFrontier. */ + /** */ public static final class URLFrontierStub extends io.grpc.stub.AbstractAsyncStub { private URLFrontierStub(io.grpc.Channel channel, io.grpc.CallOptions callOptions) { @@ -1500,7 +1624,13 @@ public void listURLs( responseObserver); } - /** */ + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ public void countURLs( crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request, io.grpc.stub.StreamObserver @@ -1512,7 +1642,7 @@ public void countURLs( } } - /** A stub to allow clients to do synchronous rpc calls to service URLFrontier. */ + /** */ public static final class URLFrontierBlockingStub extends io.grpc.stub.AbstractBlockingStub { private URLFrontierBlockingStub(io.grpc.Channel channel, io.grpc.CallOptions callOptions) { @@ -1730,7 +1860,13 @@ public java.util.Iterator listUR getChannel(), getListURLsMethod(), getCallOptions(), request); } - /** */ + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ public crawlercommons.urlfrontier.Urlfrontier.Long countURLs( crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request) { return io.grpc.stub.ClientCalls.blockingUnaryCall( @@ -1738,7 +1874,7 @@ public crawlercommons.urlfrontier.Urlfrontier.Long countURLs( } } - /** A stub to allow clients to do ListenableFuture-style rpc calls to service URLFrontier. */ + /** */ public static final class URLFrontierFutureStub extends io.grpc.stub.AbstractFutureStub { private URLFrontierFutureStub(io.grpc.Channel channel, io.grpc.CallOptions callOptions) { @@ -1941,7 +2077,13 @@ protected URLFrontierFutureStub build( getChannel().newCall(getGetURLStatusMethod(), getCallOptions()), request); } - /** */ + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ public com.google.common.util.concurrent.ListenableFuture< crawlercommons.urlfrontier.Urlfrontier.Long> countURLs(crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request) { @@ -1973,10 +2115,10 @@ private static final class MethodHandlers io.grpc.stub.ServerCalls.ServerStreamingMethod, io.grpc.stub.ServerCalls.ClientStreamingMethod, io.grpc.stub.ServerCalls.BidiStreamingMethod { - private final AsyncService serviceImpl; + private final URLFrontierImplBase serviceImpl; private final int methodId; - MethodHandlers(AsyncService serviceImpl, int methodId) { + MethodHandlers(URLFrontierImplBase serviceImpl, int methodId) { this.serviceImpl = serviceImpl; this.methodId = methodId; } @@ -2120,132 +2262,6 @@ public io.grpc.stub.StreamObserver invoke( } } - public static final io.grpc.ServerServiceDefinition bindService(AsyncService service) { - return io.grpc.ServerServiceDefinition.builder(getServiceDescriptor()) - .addMethod( - getListNodesMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.Empty, - crawlercommons.urlfrontier.Urlfrontier.StringList>( - service, METHODID_LIST_NODES))) - .addMethod( - getListCrawlsMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.Local, - crawlercommons.urlfrontier.Urlfrontier.StringList>( - service, METHODID_LIST_CRAWLS))) - .addMethod( - getDeleteCrawlMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.DeleteCrawlMessage, - crawlercommons.urlfrontier.Urlfrontier.Long>( - service, METHODID_DELETE_CRAWL))) - .addMethod( - getListQueuesMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.Pagination, - crawlercommons.urlfrontier.Urlfrontier.QueueList>( - service, METHODID_LIST_QUEUES))) - .addMethod( - getGetURLsMethod(), - io.grpc.stub.ServerCalls.asyncServerStreamingCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.GetParams, - crawlercommons.urlfrontier.Urlfrontier.URLInfo>( - service, METHODID_GET_URLS))) - .addMethod( - getPutURLsMethod(), - io.grpc.stub.ServerCalls.asyncBidiStreamingCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.URLItem, - crawlercommons.urlfrontier.Urlfrontier.AckMessage>( - service, METHODID_PUT_URLS))) - .addMethod( - getGetStatsMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier - .QueueWithinCrawlParams, - crawlercommons.urlfrontier.Urlfrontier.Stats>( - service, METHODID_GET_STATS))) - .addMethod( - getDeleteQueueMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier - .QueueWithinCrawlParams, - crawlercommons.urlfrontier.Urlfrontier.Long>( - service, METHODID_DELETE_QUEUE))) - .addMethod( - getBlockQueueUntilMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.BlockQueueParams, - crawlercommons.urlfrontier.Urlfrontier.Empty>( - service, METHODID_BLOCK_QUEUE_UNTIL))) - .addMethod( - getSetActiveMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.Active, - crawlercommons.urlfrontier.Urlfrontier.Empty>( - service, METHODID_SET_ACTIVE))) - .addMethod( - getGetActiveMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.Local, - crawlercommons.urlfrontier.Urlfrontier.Boolean>( - service, METHODID_GET_ACTIVE))) - .addMethod( - getSetDelayMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.QueueDelayParams, - crawlercommons.urlfrontier.Urlfrontier.Empty>( - service, METHODID_SET_DELAY))) - .addMethod( - getSetLogLevelMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.LogLevelParams, - crawlercommons.urlfrontier.Urlfrontier.Empty>( - service, METHODID_SET_LOG_LEVEL))) - .addMethod( - getSetCrawlLimitMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.CrawlLimitParams, - crawlercommons.urlfrontier.Urlfrontier.Empty>( - service, METHODID_SET_CRAWL_LIMIT))) - .addMethod( - getGetURLStatusMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.URLStatusRequest, - crawlercommons.urlfrontier.Urlfrontier.URLItem>( - service, METHODID_GET_URLSTATUS))) - .addMethod( - getListURLsMethod(), - io.grpc.stub.ServerCalls.asyncServerStreamingCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.ListUrlParams, - crawlercommons.urlfrontier.Urlfrontier.URLItem>( - service, METHODID_LIST_URLS))) - .addMethod( - getCountURLsMethod(), - io.grpc.stub.ServerCalls.asyncUnaryCall( - new MethodHandlers< - crawlercommons.urlfrontier.Urlfrontier.CountUrlParams, - crawlercommons.urlfrontier.Urlfrontier.Long>( - service, METHODID_COUNT_URLS))) - .build(); - } - private abstract static class URLFrontierBaseDescriptorSupplier implements io.grpc.protobuf.ProtoFileDescriptorSupplier, io.grpc.protobuf.ProtoServiceDescriptorSupplier { @@ -2270,9 +2286,9 @@ private static final class URLFrontierFileDescriptorSupplier private static final class URLFrontierMethodDescriptorSupplier extends URLFrontierBaseDescriptorSupplier implements io.grpc.protobuf.ProtoMethodDescriptorSupplier { - private final java.lang.String methodName; + private final String methodName; - URLFrontierMethodDescriptorSupplier(java.lang.String methodName) { + URLFrontierMethodDescriptorSupplier(String methodName) { this.methodName = methodName; } diff --git a/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java b/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java index 5a9522a..29858ee 100644 --- a/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java +++ b/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java @@ -21082,6 +21082,71 @@ public interface ListUrlParamsOrBuilder * @return The local. */ boolean getLocal(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return Whether the filter field is set. + */ + boolean hasFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The filter. + */ + java.lang.String getFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The bytes for filter. + */ + com.google.protobuf.ByteString getFilterBytes(); + + /** + * + * + *
+         * Case sensitivity for search filter (default is false)
+         * 
+ * + * optional bool caseSensitive = 7; + * + * @return Whether the caseSensitive field is set. + */ + boolean hasCaseSensitive(); + + /** + * + * + *
+         * Case sensitivity for search filter (default is false)
+         * 
+ * + * optional bool caseSensitive = 7; + * + * @return The caseSensitive. + */ + boolean getCaseSensitive(); } /** Protobuf type {@code urlfrontier.ListUrlParams} */ @@ -21099,6 +21164,7 @@ private ListUrlParams(com.google.protobuf.GeneratedMessageV3.Builder builder) private ListUrlParams() { key_ = ""; crawlID_ = ""; + filter_ = ""; } @java.lang.Override @@ -21122,6 +21188,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { crawlercommons.urlfrontier.Urlfrontier.ListUrlParams.Builder.class); } + private int bitField0_; public static final int START_FIELD_NUMBER = 1; private int start_ = 0; @@ -21285,6 +21352,110 @@ public boolean getLocal() { return local_; } + public static final int FILTER_FIELD_NUMBER = 6; + + @SuppressWarnings("serial") + private volatile java.lang.Object filter_ = ""; + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return Whether the filter field is set. + */ + @java.lang.Override + public boolean hasFilter() { + return ((bitField0_ & 0x00000001) != 0); + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The filter. + */ + @java.lang.Override + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The bytes for filter. + */ + @java.lang.Override + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int CASESENSITIVE_FIELD_NUMBER = 7; + private boolean caseSensitive_ = false; + + /** + * + * + *
+         * Case sensitivity for search filter (default is false)
+         * 
+ * + * optional bool caseSensitive = 7; + * + * @return Whether the caseSensitive field is set. + */ + @java.lang.Override + public boolean hasCaseSensitive() { + return ((bitField0_ & 0x00000002) != 0); + } + + /** + * + * + *
+         * Case sensitivity for search filter (default is false)
+         * 
+ * + * optional bool caseSensitive = 7; + * + * @return The caseSensitive. + */ + @java.lang.Override + public boolean getCaseSensitive() { + return caseSensitive_; + } + private byte memoizedIsInitialized = -1; @java.lang.Override @@ -21315,6 +21486,12 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (local_ != false) { output.writeBool(5, local_); } + if (((bitField0_ & 0x00000001) != 0)) { + com.google.protobuf.GeneratedMessageV3.writeString(output, 6, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + output.writeBool(7, caseSensitive_); + } getUnknownFields().writeTo(output); } @@ -21339,6 +21516,12 @@ public int getSerializedSize() { if (local_ != false) { size += com.google.protobuf.CodedOutputStream.computeBoolSize(5, local_); } + if (((bitField0_ & 0x00000001) != 0)) { + size += com.google.protobuf.GeneratedMessageV3.computeStringSize(6, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(7, caseSensitive_); + } size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; @@ -21360,6 +21543,14 @@ public boolean equals(final java.lang.Object obj) { if (!getKey().equals(other.getKey())) return false; if (!getCrawlID().equals(other.getCrawlID())) return false; if (getLocal() != other.getLocal()) return false; + if (hasFilter() != other.hasFilter()) return false; + if (hasFilter()) { + if (!getFilter().equals(other.getFilter())) return false; + } + if (hasCaseSensitive() != other.hasCaseSensitive()) return false; + if (hasCaseSensitive()) { + if (getCaseSensitive() != other.getCaseSensitive()) return false; + } if (!getUnknownFields().equals(other.getUnknownFields())) return false; return true; } @@ -21381,6 +21572,14 @@ public int hashCode() { hash = (53 * hash) + getCrawlID().hashCode(); hash = (37 * hash) + LOCAL_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getLocal()); + if (hasFilter()) { + hash = (37 * hash) + FILTER_FIELD_NUMBER; + hash = (53 * hash) + getFilter().hashCode(); + } + if (hasCaseSensitive()) { + hash = (37 * hash) + CASESENSITIVE_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getCaseSensitive()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -21526,6 +21725,8 @@ public Builder clear() { key_ = ""; crawlID_ = ""; local_ = false; + filter_ = ""; + caseSensitive_ = false; return this; } @@ -21579,6 +21780,16 @@ private void buildPartial0( if (((from_bitField0_ & 0x00000010) != 0)) { result.local_ = local_; } + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000020) != 0)) { + result.filter_ = filter_; + to_bitField0_ |= 0x00000001; + } + if (((from_bitField0_ & 0x00000040) != 0)) { + result.caseSensitive_ = caseSensitive_; + to_bitField0_ |= 0x00000002; + } + result.bitField0_ |= to_bitField0_; } @java.lang.Override @@ -21649,6 +21860,14 @@ public Builder mergeFrom(crawlercommons.urlfrontier.Urlfrontier.ListUrlParams ot if (other.getLocal() != false) { setLocal(other.getLocal()); } + if (other.hasFilter()) { + filter_ = other.filter_; + bitField0_ |= 0x00000020; + onChanged(); + } + if (other.hasCaseSensitive()) { + setCaseSensitive(other.getCaseSensitive()); + } this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; @@ -21705,6 +21924,18 @@ public Builder mergeFrom( bitField0_ |= 0x00000010; break; } // case 40 + case 50: + { + filter_ = input.readStringRequireUtf8(); + bitField0_ |= 0x00000020; + break; + } // case 50 + case 56: + { + caseSensitive_ = input.readBool(); + bitField0_ |= 0x00000040; + break; + } // case 56 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { @@ -22114,6 +22345,204 @@ public Builder clearLocal() { return this; } + private java.lang.Object filter_ = ""; + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return Whether the filter field is set. + */ + public boolean hasFilter() { + return ((bitField0_ & 0x00000020) != 0); + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return The filter. + */ + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (!(ref instanceof java.lang.String)) { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return The bytes for filter. + */ + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @param value The filter to set. + * @return This builder for chaining. + */ + public Builder setFilter(java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + filter_ = value; + bitField0_ |= 0x00000020; + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return This builder for chaining. + */ + public Builder clearFilter() { + filter_ = getDefaultInstance().getFilter(); + bitField0_ = (bitField0_ & ~0x00000020); + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @param value The bytes for filter to set. + * @return This builder for chaining. + */ + public Builder setFilterBytes(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + checkByteStringIsUtf8(value); + filter_ = value; + bitField0_ |= 0x00000020; + onChanged(); + return this; + } + + private boolean caseSensitive_; + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 7; + * + * @return Whether the caseSensitive field is set. + */ + @java.lang.Override + public boolean hasCaseSensitive() { + return ((bitField0_ & 0x00000040) != 0); + } + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 7; + * + * @return The caseSensitive. + */ + @java.lang.Override + public boolean getCaseSensitive() { + return caseSensitive_; + } + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 7; + * + * @param value The caseSensitive to set. + * @return This builder for chaining. + */ + public Builder setCaseSensitive(boolean value) { + + caseSensitive_ = value; + bitField0_ |= 0x00000040; + onChanged(); + return this; + } + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 7; + * + * @return This builder for chaining. + */ + public Builder clearCaseSensitive() { + bitField0_ = (bitField0_ & ~0x00000040); + caseSensitive_ = false; + onChanged(); + return this; + } + @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { @@ -22239,10 +22668,88 @@ public interface CountUrlParamsOrBuilder * * *
-         * only for the current local instance
+         * Search filter on url (can be empty, default is empty)
          * 
* - * bool local = 3; + * optional string filter = 3; + * + * @return Whether the filter field is set. + */ + boolean hasFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The filter. + */ + java.lang.String getFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The bytes for filter. + */ + com.google.protobuf.ByteString getFilterBytes(); + + /** + * + * + *
+         * Case sensitivity for search filter (default is false)
+         * 
+ * + * optional bool caseSensitive = 4; + * + * @return Whether the caseSensitive field is set. + */ + boolean hasCaseSensitive(); + + /** + * + * + *
+         * Case sensitivity for search filter (default is false)
+         * 
+ * + * optional bool caseSensitive = 4; + * + * @return The caseSensitive. + */ + boolean getCaseSensitive(); + + /** + * + * + *
+         * only for the current local instance (default is false)
+         * 
+ * + * optional bool local = 5; + * + * @return Whether the local field is set. + */ + boolean hasLocal(); + + /** + * + * + *
+         * only for the current local instance (default is false)
+         * 
+ * + * optional bool local = 5; * * @return The local. */ @@ -22264,6 +22771,7 @@ private CountUrlParams(com.google.protobuf.GeneratedMessageV3.Builder builder private CountUrlParams() { key_ = ""; crawlID_ = ""; + filter_ = ""; } @java.lang.Override @@ -22287,6 +22795,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { crawlercommons.urlfrontier.Urlfrontier.CountUrlParams.Builder.class); } + private int bitField0_; public static final int KEY_FIELD_NUMBER = 1; @SuppressWarnings("serial") @@ -22340,70 +22849,190 @@ public com.google.protobuf.ByteString getKeyBytes() { } } - public static final int CRAWLID_FIELD_NUMBER = 2; - - @SuppressWarnings("serial") - private volatile java.lang.Object crawlID_ = ""; - + public static final int CRAWLID_FIELD_NUMBER = 2; + + @SuppressWarnings("serial") + private volatile java.lang.Object crawlID_ = ""; + + /** + * + * + *
+         * crawl ID
+         * 
+ * + * string crawlID = 2; + * + * @return The crawlID. + */ + @java.lang.Override + public java.lang.String getCrawlID() { + java.lang.Object ref = crawlID_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + crawlID_ = s; + return s; + } + } + + /** + * + * + *
+         * crawl ID
+         * 
+ * + * string crawlID = 2; + * + * @return The bytes for crawlID. + */ + @java.lang.Override + public com.google.protobuf.ByteString getCrawlIDBytes() { + java.lang.Object ref = crawlID_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + crawlID_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int FILTER_FIELD_NUMBER = 3; + + @SuppressWarnings("serial") + private volatile java.lang.Object filter_ = ""; + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return Whether the filter field is set. + */ + @java.lang.Override + public boolean hasFilter() { + return ((bitField0_ & 0x00000001) != 0); + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The filter. + */ + @java.lang.Override + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The bytes for filter. + */ + @java.lang.Override + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int CASESENSITIVE_FIELD_NUMBER = 4; + private boolean caseSensitive_ = false; + + /** + * + * + *
+         * Case sensitivity for search filter (default is false)
+         * 
+ * + * optional bool caseSensitive = 4; + * + * @return Whether the caseSensitive field is set. + */ + @java.lang.Override + public boolean hasCaseSensitive() { + return ((bitField0_ & 0x00000002) != 0); + } + /** * * *
-         * crawl ID
+         * Case sensitivity for search filter (default is false)
          * 
* - * string crawlID = 2; + * optional bool caseSensitive = 4; * - * @return The crawlID. + * @return The caseSensitive. */ @java.lang.Override - public java.lang.String getCrawlID() { - java.lang.Object ref = crawlID_; - if (ref instanceof java.lang.String) { - return (java.lang.String) ref; - } else { - com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; - java.lang.String s = bs.toStringUtf8(); - crawlID_ = s; - return s; - } + public boolean getCaseSensitive() { + return caseSensitive_; } + public static final int LOCAL_FIELD_NUMBER = 5; + private boolean local_ = false; + /** * * *
-         * crawl ID
+         * only for the current local instance (default is false)
          * 
* - * string crawlID = 2; + * optional bool local = 5; * - * @return The bytes for crawlID. + * @return Whether the local field is set. */ @java.lang.Override - public com.google.protobuf.ByteString getCrawlIDBytes() { - java.lang.Object ref = crawlID_; - if (ref instanceof java.lang.String) { - com.google.protobuf.ByteString b = - com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); - crawlID_ = b; - return b; - } else { - return (com.google.protobuf.ByteString) ref; - } + public boolean hasLocal() { + return ((bitField0_ & 0x00000004) != 0); } - public static final int LOCAL_FIELD_NUMBER = 3; - private boolean local_ = false; - /** * * *
-         * only for the current local instance
+         * only for the current local instance (default is false)
          * 
* - * bool local = 3; + * optional bool local = 5; * * @return The local. */ @@ -22433,8 +23062,14 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(crawlID_)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 2, crawlID_); } - if (local_ != false) { - output.writeBool(3, local_); + if (((bitField0_ & 0x00000001) != 0)) { + com.google.protobuf.GeneratedMessageV3.writeString(output, 3, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + output.writeBool(4, caseSensitive_); + } + if (((bitField0_ & 0x00000004) != 0)) { + output.writeBool(5, local_); } getUnknownFields().writeTo(output); } @@ -22451,8 +23086,14 @@ public int getSerializedSize() { if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(crawlID_)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, crawlID_); } - if (local_ != false) { - size += com.google.protobuf.CodedOutputStream.computeBoolSize(3, local_); + if (((bitField0_ & 0x00000001) != 0)) { + size += com.google.protobuf.GeneratedMessageV3.computeStringSize(3, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(4, caseSensitive_); + } + if (((bitField0_ & 0x00000004) != 0)) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(5, local_); } size += getUnknownFields().getSerializedSize(); memoizedSize = size; @@ -22472,7 +23113,18 @@ public boolean equals(final java.lang.Object obj) { if (!getKey().equals(other.getKey())) return false; if (!getCrawlID().equals(other.getCrawlID())) return false; - if (getLocal() != other.getLocal()) return false; + if (hasFilter() != other.hasFilter()) return false; + if (hasFilter()) { + if (!getFilter().equals(other.getFilter())) return false; + } + if (hasCaseSensitive() != other.hasCaseSensitive()) return false; + if (hasCaseSensitive()) { + if (getCaseSensitive() != other.getCaseSensitive()) return false; + } + if (hasLocal() != other.hasLocal()) return false; + if (hasLocal()) { + if (getLocal() != other.getLocal()) return false; + } if (!getUnknownFields().equals(other.getUnknownFields())) return false; return true; } @@ -22488,8 +23140,18 @@ public int hashCode() { hash = (53 * hash) + getKey().hashCode(); hash = (37 * hash) + CRAWLID_FIELD_NUMBER; hash = (53 * hash) + getCrawlID().hashCode(); - hash = (37 * hash) + LOCAL_FIELD_NUMBER; - hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getLocal()); + if (hasFilter()) { + hash = (37 * hash) + FILTER_FIELD_NUMBER; + hash = (53 * hash) + getFilter().hashCode(); + } + if (hasCaseSensitive()) { + hash = (37 * hash) + CASESENSITIVE_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getCaseSensitive()); + } + if (hasLocal()) { + hash = (37 * hash) + LOCAL_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getLocal()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -22633,6 +23295,8 @@ public Builder clear() { bitField0_ = 0; key_ = ""; crawlID_ = ""; + filter_ = ""; + caseSensitive_ = false; local_ = false; return this; } @@ -22678,9 +23342,20 @@ private void buildPartial0( if (((from_bitField0_ & 0x00000002) != 0)) { result.crawlID_ = crawlID_; } + int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000004) != 0)) { + result.filter_ = filter_; + to_bitField0_ |= 0x00000001; + } + if (((from_bitField0_ & 0x00000008) != 0)) { + result.caseSensitive_ = caseSensitive_; + to_bitField0_ |= 0x00000002; + } + if (((from_bitField0_ & 0x00000010) != 0)) { result.local_ = local_; + to_bitField0_ |= 0x00000004; } + result.bitField0_ |= to_bitField0_; } @java.lang.Override @@ -22742,7 +23417,15 @@ public Builder mergeFrom(crawlercommons.urlfrontier.Urlfrontier.CountUrlParams o bitField0_ |= 0x00000002; onChanged(); } - if (other.getLocal() != false) { + if (other.hasFilter()) { + filter_ = other.filter_; + bitField0_ |= 0x00000004; + onChanged(); + } + if (other.hasCaseSensitive()) { + setCaseSensitive(other.getCaseSensitive()); + } + if (other.hasLocal()) { setLocal(other.getLocal()); } this.mergeUnknownFields(other.getUnknownFields()); @@ -22783,12 +23466,24 @@ public Builder mergeFrom( bitField0_ |= 0x00000002; break; } // case 18 - case 24: + case 26: { - local_ = input.readBool(); + filter_ = input.readStringRequireUtf8(); bitField0_ |= 0x00000004; break; - } // case 24 + } // case 26 + case 32: + { + caseSensitive_ = input.readBool(); + bitField0_ |= 0x00000008; + break; + } // case 32 + case 40: + { + local_ = input.readBool(); + bitField0_ |= 0x00000010; + break; + } // case 40 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { @@ -23030,16 +23725,230 @@ public Builder setCrawlIDBytes(com.google.protobuf.ByteString value) { return this; } + private java.lang.Object filter_ = ""; + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return Whether the filter field is set. + */ + public boolean hasFilter() { + return ((bitField0_ & 0x00000004) != 0); + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return The filter. + */ + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (!(ref instanceof java.lang.String)) { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return The bytes for filter. + */ + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @param value The filter to set. + * @return This builder for chaining. + */ + public Builder setFilter(java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + filter_ = value; + bitField0_ |= 0x00000004; + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return This builder for chaining. + */ + public Builder clearFilter() { + filter_ = getDefaultInstance().getFilter(); + bitField0_ = (bitField0_ & ~0x00000004); + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @param value The bytes for filter to set. + * @return This builder for chaining. + */ + public Builder setFilterBytes(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + checkByteStringIsUtf8(value); + filter_ = value; + bitField0_ |= 0x00000004; + onChanged(); + return this; + } + + private boolean caseSensitive_; + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 4; + * + * @return Whether the caseSensitive field is set. + */ + @java.lang.Override + public boolean hasCaseSensitive() { + return ((bitField0_ & 0x00000008) != 0); + } + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 4; + * + * @return The caseSensitive. + */ + @java.lang.Override + public boolean getCaseSensitive() { + return caseSensitive_; + } + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 4; + * + * @param value The caseSensitive to set. + * @return This builder for chaining. + */ + public Builder setCaseSensitive(boolean value) { + + caseSensitive_ = value; + bitField0_ |= 0x00000008; + onChanged(); + return this; + } + + /** + * + * + *
+             * Case sensitivity for search filter (default is false)
+             * 
+ * + * optional bool caseSensitive = 4; + * + * @return This builder for chaining. + */ + public Builder clearCaseSensitive() { + bitField0_ = (bitField0_ & ~0x00000008); + caseSensitive_ = false; + onChanged(); + return this; + } + private boolean local_; /** * * *
-             * only for the current local instance
+             * only for the current local instance (default is false)
              * 
* - * bool local = 3; + * optional bool local = 5; + * + * @return Whether the local field is set. + */ + @java.lang.Override + public boolean hasLocal() { + return ((bitField0_ & 0x00000010) != 0); + } + + /** + * + * + *
+             * only for the current local instance (default is false)
+             * 
+ * + * optional bool local = 5; * * @return The local. */ @@ -23052,10 +23961,10 @@ public boolean getLocal() { * * *
-             * only for the current local instance
+             * only for the current local instance (default is false)
              * 
* - * bool local = 3; + * optional bool local = 5; * * @param value The local to set. * @return This builder for chaining. @@ -23063,7 +23972,7 @@ public boolean getLocal() { public Builder setLocal(boolean value) { local_ = value; - bitField0_ |= 0x00000004; + bitField0_ |= 0x00000010; onChanged(); return this; } @@ -23072,15 +23981,15 @@ public Builder setLocal(boolean value) { * * *
-             * only for the current local instance
+             * only for the current local instance (default is false)
              * 
* - * bool local = 3; + * optional bool local = 5; * * @return This builder for chaining. */ public Builder clearLocal() { - bitField0_ = (bitField0_ & ~0x00000004); + bitField0_ = (bitField0_ & ~0x00000010); local_ = false; onChanged(); return this; @@ -23314,40 +24223,44 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { + "\005ERROR\020\004\"?\n\020CrawlLimitParams\022\013\n\003key\030\001 \001(" + "\t\022\r\n\005limit\030\002 \001(\r\022\017\n\007crawlID\030\003 \001(\t\"=\n\020URL" + "StatusRequest\022\013\n\003url\030\001 \001(\t\022\013\n\003key\030\002 \001(\t\022" - + "\017\n\007crawlID\030\003 \001(\t\"Y\n\rListUrlParams\022\r\n\005sta" - + "rt\030\001 \001(\r\022\014\n\004size\030\002 \001(\r\022\013\n\003key\030\003 \001(\t\022\017\n\007c" - + "rawlID\030\004 \001(\t\022\r\n\005local\030\005 \001(\010\"=\n\016CountUrlP" - + "arams\022\013\n\003key\030\001 \001(\t\022\017\n\007crawlID\030\002 \001(\t\022\r\n\005l" - + "ocal\030\003 \001(\0102\343\010\n\013URLFrontier\022:\n\tListNodes\022" - + "\022.urlfrontier.Empty\032\027.urlfrontier.String" - + "List\"\000\022;\n\nListCrawls\022\022.urlfrontier.Local" - + "\032\027.urlfrontier.StringList\"\000\022C\n\013DeleteCra" - + "wl\022\037.urlfrontier.DeleteCrawlMessage\032\021.ur" - + "lfrontier.Long\"\000\022?\n\nListQueues\022\027.urlfron" - + "tier.Pagination\032\026.urlfrontier.QueueList\"" - + "\000\022;\n\007GetURLs\022\026.urlfrontier.GetParams\032\024.u" - + "rlfrontier.URLInfo\"\0000\001\022>\n\007PutURLs\022\024.urlf" - + "rontier.URLItem\032\027.urlfrontier.AckMessage" - + "\"\000(\0010\001\022E\n\010GetStats\022#.urlfrontier.QueueWi" - + "thinCrawlParams\032\022.urlfrontier.Stats\"\000\022G\n" - + "\013DeleteQueue\022#.urlfrontier.QueueWithinCr" - + "awlParams\032\021.urlfrontier.Long\"\000\022F\n\017BlockQ" - + "ueueUntil\022\035.urlfrontier.BlockQueueParams" - + "\032\022.urlfrontier.Empty\"\000\0226\n\tSetActive\022\023.ur" - + "lfrontier.Active\032\022.urlfrontier.Empty\"\000\0227" - + "\n\tGetActive\022\022.urlfrontier.Local\032\024.urlfro" - + "ntier.Boolean\"\000\022?\n\010SetDelay\022\035.urlfrontie" - + "r.QueueDelayParams\032\022.urlfrontier.Empty\"\000" - + "\022@\n\013SetLogLevel\022\033.urlfrontier.LogLevelPa" - + "rams\032\022.urlfrontier.Empty\"\000\022D\n\rSetCrawlLi" - + "mit\022\035.urlfrontier.CrawlLimitParams\032\022.url" - + "frontier.Empty\"\000\022E\n\014GetURLStatus\022\035.urlfr" - + "ontier.URLStatusRequest\032\024.urlfrontier.UR" - + "LItem\"\000\022@\n\010ListURLs\022\032.urlfrontier.ListUr" - + "lParams\032\024.urlfrontier.URLItem\"\0000\001\022=\n\tCou" - + "ntURLs\022\033.urlfrontier.CountUrlParams\032\021.ur" - + "lfrontier.Long\"\000B\034\n\032crawlercommons.urlfr" - + "ontierb\006proto3" + + "\017\n\007crawlID\030\003 \001(\t\"\247\001\n\rListUrlParams\022\r\n\005st" + + "art\030\001 \001(\r\022\014\n\004size\030\002 \001(\r\022\013\n\003key\030\003 \001(\t\022\017\n\007" + + "crawlID\030\004 \001(\t\022\r\n\005local\030\005 \001(\010\022\023\n\006filter\030\006" + + " \001(\tH\000\210\001\001\022\032\n\rcaseSensitive\030\007 \001(\010H\001\210\001\001B\t\n" + + "\007_filterB\020\n\016_caseSensitive\"\232\001\n\016CountUrlP" + + "arams\022\013\n\003key\030\001 \001(\t\022\017\n\007crawlID\030\002 \001(\t\022\023\n\006f" + + "ilter\030\003 \001(\tH\000\210\001\001\022\032\n\rcaseSensitive\030\004 \001(\010H" + + "\001\210\001\001\022\022\n\005local\030\005 \001(\010H\002\210\001\001B\t\n\007_filterB\020\n\016_" + + "caseSensitiveB\010\n\006_local2\343\010\n\013URLFrontier\022" + + ":\n\tListNodes\022\022.urlfrontier.Empty\032\027.urlfr" + + "ontier.StringList\"\000\022;\n\nListCrawls\022\022.urlf" + + "rontier.Local\032\027.urlfrontier.StringList\"\000" + + "\022C\n\013DeleteCrawl\022\037.urlfrontier.DeleteCraw" + + "lMessage\032\021.urlfrontier.Long\"\000\022?\n\nListQue" + + "ues\022\027.urlfrontier.Pagination\032\026.urlfronti" + + "er.QueueList\"\000\022;\n\007GetURLs\022\026.urlfrontier." + + "GetParams\032\024.urlfrontier.URLInfo\"\0000\001\022>\n\007P" + + "utURLs\022\024.urlfrontier.URLItem\032\027.urlfronti" + + "er.AckMessage\"\000(\0010\001\022E\n\010GetStats\022#.urlfro" + + "ntier.QueueWithinCrawlParams\032\022.urlfronti" + + "er.Stats\"\000\022G\n\013DeleteQueue\022#.urlfrontier." + + "QueueWithinCrawlParams\032\021.urlfrontier.Lon" + + "g\"\000\022F\n\017BlockQueueUntil\022\035.urlfrontier.Blo" + + "ckQueueParams\032\022.urlfrontier.Empty\"\000\0226\n\tS" + + "etActive\022\023.urlfrontier.Active\032\022.urlfront" + + "ier.Empty\"\000\0227\n\tGetActive\022\022.urlfrontier.L" + + "ocal\032\024.urlfrontier.Boolean\"\000\022?\n\010SetDelay" + + "\022\035.urlfrontier.QueueDelayParams\032\022.urlfro" + + "ntier.Empty\"\000\022@\n\013SetLogLevel\022\033.urlfronti" + + "er.LogLevelParams\032\022.urlfrontier.Empty\"\000\022" + + "D\n\rSetCrawlLimit\022\035.urlfrontier.CrawlLimi" + + "tParams\032\022.urlfrontier.Empty\"\000\022E\n\014GetURLS" + + "tatus\022\035.urlfrontier.URLStatusRequest\032\024.u" + + "rlfrontier.URLItem\"\000\022@\n\010ListURLs\022\032.urlfr" + + "ontier.ListUrlParams\032\024.urlfrontier.URLIt" + + "em\"\0000\001\022=\n\tCountURLs\022\033.urlfrontier.CountU" + + "rlParams\032\021.urlfrontier.Long\"\000B\034\n\032crawler" + + "commons.urlfrontierb\006proto3" }; descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom( @@ -23550,7 +24463,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_urlfrontier_ListUrlParams_descriptor, new java.lang.String[] { - "Start", "Size", "Key", "CrawlID", "Local", + "Start", "Size", "Key", "CrawlID", "Local", "Filter", "CaseSensitive", }); internal_static_urlfrontier_CountUrlParams_descriptor = getDescriptor().getMessageTypes().get(24); @@ -23558,7 +24471,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_urlfrontier_CountUrlParams_descriptor, new java.lang.String[] { - "Key", "CrawlID", "Local", + "Key", "CrawlID", "Filter", "CaseSensitive", "Local", }); } diff --git a/API/urlfrontier.proto b/API/urlfrontier.proto index eac9fc5..f6680c5 100644 --- a/API/urlfrontier.proto +++ b/API/urlfrontier.proto @@ -326,6 +326,10 @@ message ListUrlParams { string crawlID = 4; // only for the current local instance bool local = 5; + // Search filter on url (can be empty, default is empty) + optional string filter = 6; + // Case sensitivity for search filter (default is false) + optional bool caseSensitive = 7; } message CountUrlParams { @@ -333,6 +337,10 @@ message CountUrlParams { string key = 1; // crawl ID string crawlID = 2; - // only for the current local instance - bool local = 3; + // Search filter on url (can be empty, default is empty) + optional string filter = 3; + // Case sensitivity for search filter (default is false) + optional bool caseSensitive = 4; + // only for the current local instance (default is false) + optional bool local = 5; } \ No newline at end of file diff --git a/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java b/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java index 4240831..60f3aa3 100644 --- a/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java +++ b/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java @@ -37,6 +37,20 @@ public class CountURLs implements Runnable { "restricts the scope to this frontier instance instead of aggregating over the cluster") private Boolean local; + @Option( + names = {"-f", "--filter"}, + defaultValue = "", + paramLabel = "STRING", + description = "String filter applied to URLs") + private String filter; + + @Option( + names = {"-s", "--case-sensitive"}, + defaultValue = "false", + paramLabel = "BOOLEAN", + description = "Search filter is case sensitive") + private Boolean caseSensitive; + @Override public void run() { ManagedChannel channel = @@ -55,6 +69,11 @@ public void run() { builder.setCrawlID(crawl); builder.setLocal(local); + builder.setFilter(filter); + builder.setCaseSensitive(caseSensitive); + + builder.setFilter(filter); + Long s = blockingFrontier.countURLs(builder.build()); System.out.println(s.getValue() + " URLs in frontier"); diff --git a/service/pom.xml b/service/pom.xml index cabe5d7..1485c73 100644 --- a/service/pom.xml +++ b/service/pom.xml @@ -20,6 +20,7 @@ 1.5.8 5.13.0 2.16.1 + 3.17.0 @@ -118,7 +119,12 @@ ${mockito.version} test - + + + org.apache.commons + commons-lang3 + ${commons.lang.version} + diff --git a/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java b/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java index aacde01..4a2c088 100644 --- a/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java +++ b/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java @@ -49,6 +49,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; import org.slf4j.LoggerFactory; public abstract class AbstractFrontierService @@ -905,6 +906,9 @@ public void listURLs( long start = request.getStart(); String key = request.getKey(); + String filter = request.getFilter(); + boolean caseSensitive = request.getCaseSensitive(); + final String normalisedCrawlID = CrawlID.normaliseCrawlID(request.getCrawlID()); // 100 by default @@ -919,7 +923,7 @@ public void listURLs( normalisedCrawlID, key); - long totalCount = -1; + long totalCount = 0; long sentCount = 0; synchronized (getQueues()) { @@ -942,14 +946,23 @@ public void listURLs( CloseableIterator urliter = urlIterator(e); while (urliter.hasNext()) { - totalCount++; - if (totalCount < start) { - urliter.next(); - } else if (sentCount < maxURLs) { - responseObserver.onNext(urliter.next()); - sentCount++; - } else { - break; + URLItem cur = urliter.next(); + + if (StringUtils.isEmpty(filter) + || (caseSensitive && cur.getKnown().getInfo().getUrl().contains(filter)) + || (!caseSensitive + && StringUtils.containsIgnoreCase( + cur.getKnown().getInfo().getUrl(), filter))) { + + if (totalCount < start) { + totalCount++; + } + if (sentCount < maxURLs) { + sentCount++; + responseObserver.onNext(cur); + } else { + break; + } } } @@ -1001,10 +1014,17 @@ public void countURLs( StreamObserver responseObserver) { String key = request.getKey(); + String filter = request.getFilter(); + boolean caseSensitive = request.getCaseSensitive(); final String normalisedCrawlID = CrawlID.normaliseCrawlID(request.getCrawlID()); - LOG.info("Received request to count URLs [crawlId {}, key {}]", normalisedCrawlID, key); + LOG.info( + "Received request to count URLs [crawlId={}, key={}, filter={}, caseSensitive={}]", + normalisedCrawlID, + key, + filter, + caseSensitive); long totalCount = 0; @@ -1028,8 +1048,15 @@ public void countURLs( CloseableIterator urliter = urlIterator(e); while (urliter.hasNext()) { - urliter.next(); - totalCount++; + URLItem cur = urliter.next(); + + if (StringUtils.isBlank(filter) + || (caseSensitive && cur.getKnown().getInfo().getUrl().contains(filter)) + || (!caseSensitive + && StringUtils.containsIgnoreCase( + cur.getKnown().getInfo().getUrl(), filter))) { + totalCount++; + } } try { diff --git a/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java b/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java index d03000a..43ca6ff 100644 --- a/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java +++ b/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java @@ -358,6 +358,96 @@ void testMemoryIteratorSingleQueue() { assertEquals(3, nbUrls); } + @Test + @Order(9) + void testListAllURLsCaseInsensitive() { + + ListUrlParams params = + ListUrlParams.newBuilder() + .setCrawlID("crawl_id") + .setStart(0) + .setSize(100) + .setFilter("COMPLETED") + .setCaseSensitive(false) + .build(); + + final AtomicInteger fetched = new AtomicInteger(0); + final AtomicInteger count = new AtomicInteger(0); + + StreamObserver statusObserver = + new StreamObserver<>() { + + @Override + public void onNext(URLItem value) { + // receives confirmation that the value has been received + logURLItem(value); + + if (value.hasKnown()) { + fetched.incrementAndGet(); + } + count.incrementAndGet(); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + } + + @Override + public void onCompleted() { + LOG.info("completed testListAllURLsCaseInsensitive"); + } + }; + + memoryFrontierService.listURLs(params, statusObserver); + assertEquals(1, count.get()); + } + + @Test + @Order(10) + void testListAllURLsCaseSensitive() { + + ListUrlParams params = + ListUrlParams.newBuilder() + .setCrawlID("crawl_id") + .setStart(0) + .setSize(100) + .setFilter("COMPLETED") + .setCaseSensitive(true) + .build(); + + final AtomicInteger fetched = new AtomicInteger(0); + final AtomicInteger count = new AtomicInteger(0); + + StreamObserver statusObserver = + new StreamObserver<>() { + + @Override + public void onNext(URLItem value) { + // receives confirmation that the value has been received + logURLItem(value); + + if (value.hasKnown()) { + fetched.incrementAndGet(); + } + count.incrementAndGet(); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + } + + @Override + public void onCompleted() { + LOG.info("completed testListAllURLsCaseSensitive"); + } + }; + + memoryFrontierService.listURLs(params, statusObserver); + assertEquals(0, count.get()); + } + @Test @Order(99) void testNoRescheduleCompleted() { diff --git a/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java b/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java index 0c13f37..69ed53c 100644 --- a/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java +++ b/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java @@ -423,6 +423,76 @@ public void onCompleted() { rocksDBService.countURLs(builder.build(), responseObserver); } + @Test + @Order(10) + void testCountURLsCaseSensitive() { + + Urlfrontier.CountUrlParams.Builder builder = Urlfrontier.CountUrlParams.newBuilder(); + + builder.setKey("queue_mysite"); + builder.setCrawlID("crawl_id"); + builder.setFilter("COMPLETED"); + builder.setCaseSensitive(true); + + StreamObserver responseObserver = + new StreamObserver<>() { + + @Override + public void onNext(Urlfrontier.Long value) { + // receives confirmation that the value has been received + assertEquals(0, value.getValue()); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + fail(); + } + + @Override + public void onCompleted() { + LOG.info("completed testNoRescheduleCompleted 1/2"); + } + }; + + rocksDBService.countURLs(builder.build(), responseObserver); + } + + @Test + @Order(9) + void testCountURsLCaseInsensitive() { + + Urlfrontier.CountUrlParams.Builder builder = Urlfrontier.CountUrlParams.newBuilder(); + + builder.setKey("queue_mysite"); + builder.setCrawlID("crawl_id"); + builder.setFilter("COMPLETED"); + builder.setCaseSensitive(false); + + StreamObserver responseObserver = + new StreamObserver<>() { + + @Override + public void onNext(Urlfrontier.Long value) { + // receives confirmation that the value has been received + assertEquals(1, value.getValue()); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + fail(); + } + + @Override + public void onCompleted() { + LOG.info("completed testNoRescheduleCompleted 1/2"); + } + }; + + rocksDBService.countURLs(builder.build(), responseObserver); + } + @Test @Order(99) void testNoRescheduleCompleted() {