Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve failover mechanism for primary and secondary replica connections #243

Merged
merged 20 commits into from
Jan 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Grakn.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ interface Transaction extends AutoCloseable {

enum Type {
READ(0),
READ_REPLICA(2),
WRITE(1);

private final int id;
Expand Down
75 changes: 38 additions & 37 deletions GraknClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,21 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

import static grakn.client.common.exception.ErrorMessage.Client.CLUSTER_NOT_AVAILABLE;
import static grakn.client.common.exception.ErrorMessage.Client.CLUSTER_UNABLE_TO_CONNECT;
import static grakn.client.common.exception.ErrorMessage.Client.ILLEGAL_ARGUMENT;
import static grakn.common.collection.Collections.pair;

public class GraknClient {
public static final String DEFAULT_ADDRESS = "localhost:1729";

public static GraknClient.Core core() {
public static Core core() {
return core(DEFAULT_ADDRESS);
}

public static GraknClient.Core core(String address) {
return new GraknClient.Core(address);
public static Core core(String address) {
return new Core(address);
}

public static GraknClient.Cluster cluster() {
Expand All @@ -72,7 +72,7 @@ private Core(String address) {

@Override
public RPCSession.Core session(String database, Grakn.Session.Type type) {
return session(database, type, new GraknOptions());
return session(database, type, GraknOptions.core());
}

@Override
Expand Down Expand Up @@ -106,23 +106,20 @@ public Channel channel() {

public static class Cluster implements Grakn.Client {
private static final Logger LOG = LoggerFactory.getLogger(Cluster.class);
private final Map<Address.Cluster.Server, Core> coreClientMap;
private final Core[] coreClientArray;
private final AtomicInteger selectedCoreClientIndex;
private GraknClusterGrpc.GraknClusterBlockingStub clusterDiscoveryRPC;
private final Map<Address.Cluster.Server, Core> coreClients;
private final Map<Address.Cluster.Server, GraknClusterGrpc.GraknClusterBlockingStub> graknClusterRPCs;
private final RPCDatabaseManager.Cluster databases;
private boolean isOpen;

private Cluster(String address) {
Pair<GraknClusterGrpc.GraknClusterBlockingStub, Set<Address.Cluster.Server>> discovery = discoverCluster(address);
clusterDiscoveryRPC = discovery.first();
coreClientMap = discovery.second().stream()
coreClients = discoverCluster(address).stream()
.map(addr -> pair(addr, new Core(addr.client())))
.collect(Collectors.toMap(Pair::first, Pair::second));
coreClientArray = coreClientMap.values().toArray(new Core[] {});
selectedCoreClientIndex = new AtomicInteger();
graknClusterRPCs = coreClients.entrySet().stream()
.map(client -> pair(client.getKey(), GraknClusterGrpc.newBlockingStub(client.getValue().channel())))
.collect(Collectors.toMap(Pair::first, Pair::second));
databases = new RPCDatabaseManager.Cluster(
coreClientMap.entrySet().stream()
coreClients.entrySet().stream()
.map(client -> pair(client.getKey(), client.getValue().databases()))
.collect(Collectors.toMap(Pair::first, Pair::second))
);
Expand All @@ -131,18 +128,13 @@ private Cluster(String address) {

@Override
public RPCSession.Cluster session(String database, Grakn.Session.Type type) {
return session(database, type, new GraknOptions());
return session(database, type, GraknOptions.cluster());
}

@Override
public RPCSession.Cluster session(String database, Grakn.Session.Type type, GraknOptions options) {
return new RPCSession.Cluster(this, database, type, options, clusterDiscoveryRPC);
}

public GraknClusterGrpc.GraknClusterBlockingStub selectNextClusterDiscoveryRPC() {
Core selected = selectNextCoreClient();
clusterDiscoveryRPC = GraknClusterGrpc.newBlockingStub(selected.channel());
return clusterDiscoveryRPC;
if (!options.isCluster()) throw new GraknClientException(ILLEGAL_ARGUMENT, options);
return new RPCSession.Cluster(this, database, type, options.asCluster());
}

@Override
Expand All @@ -157,33 +149,42 @@ public boolean isOpen() {

@Override
public void close() {
coreClientMap.values().forEach(GraknClient.Core::close);
coreClients.values().forEach(GraknClient.Core::close);
isOpen = false;
}

public Map<Address.Cluster.Server, Core> coreClients() {
return coreClientMap;
public Set<Address.Cluster.Server> servers() {
return coreClients.keySet();
}

public Core coreClient(Address.Cluster.Server address) {
return coreClients.get(address);
}

public GraknClusterGrpc.GraknClusterBlockingStub graknClusterRPC(Address.Cluster.Server address) {
return graknClusterRPCs.get(address);
}

private Pair<GraknClusterGrpc.GraknClusterBlockingStub, Set<Address.Cluster.Server>> discoverCluster(String... addresses) {
private Set<Address.Cluster.Server> discoverCluster(String... addresses) {
for (String address: addresses) {
try (GraknClient.Core client = new Core(address)) {
LOG.info("Performing server discovery to {}...", address);
GraknClusterGrpc.GraknClusterBlockingStub clusterDiscoveryRPC = GraknClusterGrpc.newBlockingStub(client.channel());
try (Core client = new Core(address)) {
LOG.debug("Performing server discovery to {}...", address);
GraknClusterGrpc.GraknClusterBlockingStub graknClusterRPC = GraknClusterGrpc.newBlockingStub(client.channel());
ClusterProto.Cluster.Discover.Res res =
clusterDiscoveryRPC.clusterDiscover(ClusterProto.Cluster.Discover.Req.newBuilder().build());
graknClusterRPC.clusterDiscover(ClusterProto.Cluster.Discover.Req.newBuilder().build());
Set<Address.Cluster.Server> servers = res.getServersList().stream().map(Address.Cluster.Server::parse).collect(Collectors.toSet());
LOG.info("Discovered {}", servers);
return pair(clusterDiscoveryRPC, servers);
LOG.debug("Discovered {}", servers);
return servers;
} catch (StatusRuntimeException e) {
LOG.error("Server discovery to {} failed.", address);
}
}
throw new GraknClientException(CLUSTER_NOT_AVAILABLE.message((Object) addresses)); // remove ambiguity by casting to Object
throw clusterNotAvailableException();
}

private Core selectNextCoreClient() {
return coreClientArray[selectedCoreClientIndex.getAndIncrement() % coreClientMap.size()];
private GraknClientException clusterNotAvailableException() {
String addresses = servers().stream().map(Address.Cluster.Server::toString).collect(Collectors.joining(","));
return new GraknClientException(CLUSTER_UNABLE_TO_CONNECT, addresses); // remove ambiguity by casting to Object
}
}
}
43 changes: 42 additions & 1 deletion GraknOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import java.util.Optional;

import static grakn.client.common.exception.ErrorMessage.Client.NEGATIVE_BATCH_SIZE;
import static grakn.client.common.exception.ErrorMessage.Internal.ILLEGAL_CAST;

public class GraknOptions {

private Boolean infer = null;
private Boolean explain = null;
private Integer batchSize = null;
Expand Down Expand Up @@ -60,4 +60,45 @@ public GraknOptions batchSize(int batchSize) {
this.batchSize = batchSize;
return this;
}

public static GraknOptions core() {
return new GraknOptions();
}

public static GraknOptions.Cluster cluster() {
return new Cluster();
}

public boolean isCluster() {
return false;
}

public Cluster asCluster() {
throw new GraknClientException(ILLEGAL_CAST, Cluster.class);
}

public static class Cluster extends GraknOptions {
private Boolean allowSecondaryReplica = null;

Cluster() {}

public Optional<Boolean> allowSecondaryReplica() {
return Optional.ofNullable(allowSecondaryReplica);
}

public Cluster allowSecondaryReplica(boolean primaryReplica) {
this.allowSecondaryReplica = primaryReplica;
return this;
}

@Override
public boolean isCluster() {
return true;
}

@Override
public Cluster asCluster() {
return this;
}
}
}
5 changes: 5 additions & 0 deletions GraknProtoBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ public static OptionsProto.Options options(GraknOptions options) {
options.infer().ifPresent(builder::setInfer);
options.explain().ifPresent(builder::setExplain);
options.batchSize().ifPresent(builder::setBatchSize);

if (options.isCluster()) {
options.asCluster().allowSecondaryReplica().ifPresent(builder::setAllowSecondaryReplica);
}

return builder.build();
}
}
33 changes: 22 additions & 11 deletions common/exception/ErrorMessage.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,15 @@ public static class Client extends ErrorMessage {
public static final Client MISSING_RESPONSE =
new Client(4, "The required field 'res' of type '%s' was not set.");
public static final Client UNKNOWN_REQUEST_ID =
new Client(5, "Received a response with unknown request id '%s'.");
new Client(5, "Received a response with unknown request id '%s'.") ;
public static final Client ILLEGAL_ARGUMENT = new Client(6, "Illegal argument passed into the method: '%s'.");

public static final Client CLUSTER_LEADER_NOT_YET_ELECTED =
new Client(7, "No leader has been elected for latest known term '%s'.");

public static final Client CLUSTER_NOT_AVAILABLE =
new Client(8, "Attempted connecting to these servers, but none are available: '%s'.");
public static final Client UNABLE_TO_CONNECT =
new Client(9, "Unable to connect to Grakn Core Server.");
public static final Client UNABLE_TO_CONNECT = new Client(7, "Unable to connect to Grakn Core Server.");
public static final Client CLUSTER_NO_PRIMARY_REPLICA_YET =
new Client(8, "No replica has been marked as the primary replica for latest known term '%s'.");
public static final Client CLUSTER_UNABLE_TO_CONNECT =
new Client(9, "Unable to connect to Grakn Cluster. Attempted connecting to these servers, but none are available: '%s'.");
public static final Client CLUSTER_REPLICA_NOT_PRIMARY =
new Client(10, "The replica is not the primary replica");

private static final String codePrefix = "CLI";
private static final String messagePrefix = "Illegal Client State";
Expand Down Expand Up @@ -87,8 +86,6 @@ public static class Query extends ErrorMessage {
new Query(3, "The answer type '%s' was not recognised.");
public static final Query MISSING_ANSWER =
new Query(4, "The required field 'answer' of type '%s' was not set.");
public static final Query ILLEGAL_CAST =
new Query(5, "Illegal casting operation to '%s'.");

private static final String codePrefix = "QRY";
private static final String messagePrefix = "Query Error";
Expand All @@ -97,4 +94,18 @@ public static class Query extends ErrorMessage {
super(codePrefix, number, messagePrefix, message);
}
}

public static class Internal extends ErrorMessage {
public static final Internal UNEXPECTED_INTERRUPTION =
new Internal(1, "Unexpected thread interruption!");
public static final Internal ILLEGAL_CAST =
new Internal(2, "Illegal casting operation to '%s'.");

private static final String codePrefix = "INT";
private static final String messagePrefix = "Internal Error";

Internal(int number, String message) {
super(codePrefix, number, messagePrefix, message);
}
}
}
11 changes: 9 additions & 2 deletions common/exception/GraknClientException.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,17 @@ public GraknClientException(String error) {
this.errorMessage = null;
}

public GraknClientException(ErrorMessage error) {
super(error.toString());
public GraknClientException(ErrorMessage error, Object... parameters) {
super(error.message(parameters));
assert !getMessage().contains("%s");
this.errorMessage = error;
}

public static GraknClientException of(StatusRuntimeException statusRuntimeException) {
if (statusRuntimeException.getStatus().getCode() == Status.Code.UNAVAILABLE) {
return new GraknClientException(ErrorMessage.Client.UNABLE_TO_CONNECT);
} else if (isReplicaNotPrimaryException(statusRuntimeException)) {
return new GraknClientException(ErrorMessage.Client.CLUSTER_REPLICA_NOT_PRIMARY);
}
return new GraknClientException(statusRuntimeException.getStatus().getDescription());
}
Expand All @@ -59,4 +61,9 @@ public String getName() {
public ErrorMessage getErrorMessage() {
return errorMessage;
}

// TODO: propagate exception from the server side in a less-brittle way
private static boolean isReplicaNotPrimaryException(StatusRuntimeException statusRuntimeException) {
return statusRuntimeException.getStatus().getCode() == Status.Code.INTERNAL && statusRuntimeException.getStatus().getDescription().contains("[RPL01]");
}
}
2 changes: 0 additions & 2 deletions common/tracing/TracingProtoBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
package grakn.client.common.tracing;

import grabl.tracing.client.GrablTracingThreadStatic;
import grakn.client.GraknOptions;
import grakn.protocol.OptionsProto;

import java.util.Collections;
import java.util.HashMap;
Expand Down
2 changes: 1 addition & 1 deletion concept/answer/Numeric.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import javax.annotation.Nullable;

import static grakn.client.common.exception.ErrorMessage.Query.BAD_ANSWER_TYPE;
import static grakn.client.common.exception.ErrorMessage.Query.ILLEGAL_CAST;
import static grakn.client.common.exception.ErrorMessage.Internal.ILLEGAL_CAST;

public class Numeric {
@Nullable
Expand Down
4 changes: 2 additions & 2 deletions dependencies/graknlabs/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def graknlabs_dependencies():
def graknlabs_protocol():
git_repository(
name = "graknlabs_protocol",
remote = "https://github.com/graknlabs/protocol",
tag = "2.0.0-alpha-6", # sync-marker: do not remove this comment, this is used for sync-dependencies by @graknlabs_protocol
remote = "https://github.com/lolski/protocol",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert to graknlabs when typedb/typedb-protocol#110 is merged.

commit = "6d5a5e1b58d91fd001e06b2820363e7194f7fd3f", # sync-marker: do not remove this comment, this is used for sync-dependencies by @graknlabs_protocol
)

def graknlabs_behaviour():
Expand Down
Loading