Skip to content

Commit

Permalink
feat: add default user limit to model
Browse files Browse the repository at this point in the history
  • Loading branch information
astsiapanay committed Feb 2, 2024
1 parent 0a87d83 commit d306d52
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 16 deletions.
28 changes: 26 additions & 2 deletions sample/aidial.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@
"endpoint": "http://localhost:7003",
"key": "modelKey3"
}
]
],
"userRoles": ["role1", "role2"],
"defaultUserLimit": {
"minute": "100000",
"day": "10000000"
}
},
"embedding-ada": {
"type": "embedding",
Expand All @@ -55,7 +60,8 @@
"key": "modelKey4"
}
]
}
},
"userRoles": ["role3"]
},
"keys": {
"proxyKey1": {
Expand Down Expand Up @@ -85,5 +91,23 @@
"app": {}
}
}
},
"userRoles": {
"role1": {
"limits": {
"chat-gpt-35-turbo": {
"minute": "200000",
"day": "10000000"
}
}
},
"role2": {
"limits": {
"chat-gpt-35-turbo": {
"minute": "100000",
"day": "20000000"
}
}
}
}
}
1 change: 1 addition & 0 deletions src/main/java/com/epam/aidial/core/config/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ public class Model extends Deployment {
private List<Upstream> upstreams = List.of();
// if it's set then the model name is overridden with that name in the request body to the model adapter
private String overrideName;
private Limit defaultUserLimit;
}
32 changes: 18 additions & 14 deletions src/main/java/com/epam/aidial/core/limiter/RateLimiter.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.epam.aidial.core.config.Deployment;
import com.epam.aidial.core.config.Key;
import com.epam.aidial.core.config.Limit;
import com.epam.aidial.core.config.Model;
import com.epam.aidial.core.config.Role;
import com.epam.aidial.core.data.ResourceType;
import com.epam.aidial.core.service.ResourceService;
Expand All @@ -20,11 +21,14 @@

import java.util.List;
import java.util.Map;
import java.util.Optional;

@Slf4j
@RequiredArgsConstructor
public class RateLimiter {

private static final Limit DEFAULT_LIMIT = new Limit();

private final Vertx vertx;

private final ResourceService resourceService;
Expand Down Expand Up @@ -131,28 +135,21 @@ private Limit getLimitByApiKey(ProxyContext context) {

private Limit getLimitByUser(ProxyContext context) {
List<String> userRoles = context.getUserRoles();
Limit defaultUserLimit = getDefaultUserLimit(context.getDeployment());
if (userRoles.isEmpty()) {
return defaultUserLimit;
}
String deploymentName = context.getDeployment().getName();
Map<String, Role> userRoleToDeploymentLimits = context.getConfig().getUserRoles();
long minuteLimit = 0;
long dayLimit = 0;
for (String userRole : userRoles) {
Role role = userRoleToDeploymentLimits.get(userRole);
if (role == null) {
continue;
}
Limit limit = role.getLimits().get(deploymentName);
if (limit == null) {
continue;
}
Limit limit = Optional.ofNullable(userRoleToDeploymentLimits.get(userRole))
.map(role -> role.getLimits().get(deploymentName))
.orElse(defaultUserLimit);
minuteLimit = Math.max(minuteLimit, limit.getMinute());
dayLimit = Math.max(dayLimit, limit.getDay());
}
if (minuteLimit == 0) {
minuteLimit = Long.MAX_VALUE;
}
if (dayLimit == 0) {
dayLimit = Long.MAX_VALUE;
}
Limit limit = new Limit();
limit.setMinute(minuteLimit);
limit.setDay(dayLimit);
Expand All @@ -163,4 +160,11 @@ private static String getPath(String deploymentName) {
return String.format("%s/tokens", deploymentName);
}

private static Limit getDefaultUserLimit(Deployment deployment) {
if (deployment instanceof Model model) {
return model.getDefaultUserLimit() == null ? DEFAULT_LIMIT : model.getDefaultUserLimit();
}
return DEFAULT_LIMIT;
}

}

0 comments on commit d306d52

Please sign in to comment.