Skip to content

Commit

Permalink
support for more than one NIC per socket in the enhanced machine mode…
Browse files Browse the repository at this point in the history
…l and fix some typos (flexflow#223)
  • Loading branch information
dycz0fx authored Aug 11, 2021
1 parent ccc9c22 commit 2bcb41a
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 46 deletions.
23 changes: 11 additions & 12 deletions include/simulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class MachineModel {
virtual int get_num_gpus() const = 0;
virtual float get_intra_node_gpu_bandwidth() const = 0;
virtual float get_inter_node_gpu_bandwidth() const = 0;
virtual std::vector<CommDevice *> get_comm_path(MemDevice *src_mem, MemDevice *tar_mem) const = 0;
virtual std::vector<CommDevice *> get_comm_path(MemDevice *src_mem, MemDevice *tar_mem) = 0;
virtual std::string to_string() const = 0;
int version;
};
Expand All @@ -120,7 +120,7 @@ class SimpleMachineModel : public MachineModel {
int get_num_gpus() const;
float get_intra_node_gpu_bandwidth() const;
float get_inter_node_gpu_bandwidth() const;
std::vector<CommDevice *> get_comm_path(MemDevice *src_mem, MemDevice *tar_mem) const;
std::vector<CommDevice *> get_comm_path(MemDevice *src_mem, MemDevice *tar_mem);
std::string to_string() const;
private:
int num_nodes;
Expand Down Expand Up @@ -152,10 +152,6 @@ class SimpleMachineModel : public MachineModel {
*/
class EnhancedMachineModel : public MachineModel {
public:
enum NicDistribution {
PER_NODE,
PER_SOCKET,
};
EnhancedMachineModel(std::string file, size_t gpu_fb_mem_capacity);
~EnhancedMachineModel();
int get_version() const;
Expand All @@ -168,10 +164,12 @@ class EnhancedMachineModel : public MachineModel {
MemDevice *get_gpu_fb_mem(int device_id) const;
MemDevice *get_gpu_fb_mem(int socket_id, int local_id) const;
CommDevice *get_nvlink(MemDevice *src_mem, MemDevice *tar_mem) const;
CommDevice *get_next_nic_in(int socket_id);
CommDevice *get_next_nic_out(int socket_id) const;
int get_num_gpus() const;
float get_intra_node_gpu_bandwidth() const;
float get_inter_node_gpu_bandwidth() const;
std::vector<CommDevice *> get_comm_path(MemDevice *src_mem, MemDevice *tar_mem) const;
std::vector<CommDevice *> get_comm_path(MemDevice *src_mem, MemDevice *tar_mem);
std::string to_string() const;
private:
int num_nodes;
Expand All @@ -188,7 +186,8 @@ class EnhancedMachineModel : public MachineModel {
float upi_bandwidth;
float nic_latency;
float nic_bandwidth;
NicDistribution nic_distribution;
int nic_persocket;
int cur_nic_local_id;
float pci_latency;
float pci_bandwidth;
float nvlink_latency;
Expand All @@ -214,8 +213,8 @@ class EnhancedMachineModel : public MachineModel {
std::vector<CommDevice *> membuses; // socket_id
std::vector<CommDevice *> upi_ins; // socket_id
std::vector<CommDevice *> upi_outs; // socket_id
std::vector<CommDevice *> nic_ins; // socket_id
std::vector<CommDevice *> nic_outs; // socket_id
std::vector<std::vector<CommDevice *>> nic_ins; // socket_id, local_id
std::vector<std::vector<CommDevice *>> nic_outs; // socket_id, local_id
std::vector<CommDevice *> pcis_to_host; // from gpu to main memory, socket_id
std::vector<CommDevice *> pcis_to_device; // from main memory to gpu, socket_id
std::vector<std::vector<CommDevice *>> nvlinks; // node_id, local_id
Expand All @@ -226,13 +225,13 @@ class EnhancedMachineModel : public MachineModel {
void add_gpus();
void add_membuses(float latency, float bandwidth);
void add_upis(float latency, float bandwidth);
void add_nics(float latency, float bandwidth, NicDistribution nic_distribution);
void add_nics(float latency, float bandwidth, int nic_persocket);
void add_pcis(float latency, float bandwidth);
void add_nvlinks(float latency, float bandwidth);
// attach a nvlink communication device to a pair of GPU framebuffer memories
void attach_nvlink(MemDevice *src_mem, MemDevice *tar_mem, CommDevice *comm);
// return a list of specific communication devices based on the descriptions of a communication path
void add_comm_path(std::vector<CommDevice::CommDevType> const &comm_device_list, MemDevice *src_mem, MemDevice *tar_mem, std::vector<CommDevice *> &ret) const;
void add_comm_path(std::vector<CommDevice::CommDevType> const &comm_device_list, MemDevice *src_mem, MemDevice *tar_mem, std::vector<CommDevice *> &ret);
};

class SimTask {
Expand Down
4 changes: 2 additions & 2 deletions machine_config_example
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ membus_bandwidth = 4.26623
# inter-socket links
upi_latency = 0.0004
upi_bandwidth = 10.14039
# inter-node links, the third argument means the distribution of the NICs (O means one NIC per node and 1 means one NIC per socket)
# inter-node links, the third argument means the number of NICs per socket (O means one NIC per node)
nic_latency = 0.000507
nic_bandwidth = 10.9448431
nic_distribution = 0
nic_persocket = 0
# pci-e between CPU and GPU
pci_latency = 0.001
pci_bandwidth = 12.578468749999999
Expand Down
96 changes: 67 additions & 29 deletions src/runtime/machine_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ float SimpleMachineModel::get_inter_node_gpu_bandwidth() const
}


std::vector<CommDevice *> SimpleMachineModel::get_comm_path(MemDevice *src_mem, MemDevice *tar_mem) const
std::vector<CommDevice *> SimpleMachineModel::get_comm_path(MemDevice *src_mem, MemDevice *tar_mem)
{
std::vector<CommDevice *> ret;
// on the same memory
Expand Down Expand Up @@ -222,9 +222,9 @@ EnhancedMachineModel::EnhancedMachineModel(std::string file, size_t gpu_fb_mem_c
nic_bandwidth = stof(words[2]);
printf("nic_bandwidth = %f\n", nic_bandwidth);
}
else if (words[0] == "nic_distribution") {
nic_distribution = static_cast<NicDistribution>(stoi(words[2]));
printf("nic_distribution = %d\n", nic_distribution);
else if (words[0] == "nic_persocket") {
nic_persocket = stoi(words[2]);
printf("nic_persocket = %d\n", nic_persocket);
}
else if (words[0] == "pci_latency") {
pci_latency = stof(words[2]);
Expand Down Expand Up @@ -345,15 +345,17 @@ EnhancedMachineModel::EnhancedMachineModel(std::string file, size_t gpu_fb_mem_c
num_sockets = num_nodes * num_sockets_per_node;
num_cpus = num_sockets * num_cpus_per_socket;
num_gpus = num_sockets * num_gpus_per_socket;
cur_nic_local_id = 0;
num_nvlinks_per_node = 0;
mem_to_nvlink.clear();
this->add_cpus();
this->add_gpus();
this->add_membuses(membus_latency, membus_bandwidth * 1024 * 1024);
this->add_upis(upi_latency / 2, upi_bandwidth * 2 * 1024 * 1024);
this->add_nics(nic_latency / 2, nic_bandwidth * 2 * 1024 * 1024, nic_distribution);
this->add_nics(nic_latency / 2, nic_bandwidth * 2 * 1024 * 1024, nic_persocket);
this->add_pcis(pci_latency, pci_bandwidth * 1024 * 1024);
this->add_nvlinks(nvlink_latency, nvlink_bandwidth * 1024 * 1024);
// printf("%s", this->to_string().c_str());
}

EnhancedMachineModel::~EnhancedMachineModel()
Expand Down Expand Up @@ -467,9 +469,9 @@ void EnhancedMachineModel::add_upis(float latency, float bandwidth)
}
}

void EnhancedMachineModel::add_nics(float latency, float bandwidth, NicDistribution nic_distribution)
void EnhancedMachineModel::add_nics(float latency, float bandwidth, int nic_persocket)
{
if (nic_distribution == PER_NODE) {
if (nic_persocket == 0) {
for (int i = 0; i < num_nodes; i++) {
int node_id = i;
for (int j = 0; j < num_sockets_per_node; j++) {
Expand All @@ -480,36 +482,41 @@ void EnhancedMachineModel::add_nics(float latency, float bandwidth, NicDistribut
if (j == 0) {
std::string nic_in_name = "NIC_IN " + std::to_string(device_id);
nic_in = new CommDevice(nic_in_name, CommDevice::NIC_IN_COMM, node_id, socket_id, device_id, latency, bandwidth);
nic_ins.push_back(nic_in);
nic_ins.push_back({});
nic_ins[socket_id].push_back(nic_in);
std::string nic_out_name = "NIC_OUT " + std::to_string(device_id);
nic_out = new CommDevice(nic_out_name, CommDevice::NIC_OUT_COMM, node_id, socket_id, device_id, latency, bandwidth);
nic_outs.push_back(nic_out);
nic_outs.push_back({});
nic_outs[socket_id].push_back(nic_out);
}
else {
nic_ins.push_back(nic_in);
nic_outs.push_back(nic_out);
nic_ins.push_back({});
nic_ins[socket_id].push_back(nic_in);
nic_outs.push_back({});
nic_outs[socket_id].push_back(nic_out);
}
}
}
}
else if (nic_distribution == PER_SOCKET) {
else {
for (int i = 0; i < num_nodes; i++) {
int node_id = i;
for (int j = 0; j < num_sockets_per_node; j++) {
int socket_id = i * num_sockets_per_node + j;
int device_id = socket_id;
std::string nic_in_name = "NIC_IN " + std::to_string(device_id);
CommDevice *nic_in = new CommDevice(nic_in_name, CommDevice::NIC_IN_COMM, node_id, socket_id, device_id, latency, bandwidth);
nic_ins.push_back(nic_in);
std::string nic_out_name = "NIC_OUT " + std::to_string(device_id);
CommDevice *nic_out = new CommDevice(nic_out_name, CommDevice::NIC_OUT_COMM, node_id, socket_id, device_id, latency, bandwidth);
nic_outs.push_back(nic_out);
nic_ins.push_back({});
nic_outs.push_back({});
for (int k = 0; k < nic_persocket; k++) {
int device_id = socket_id * nic_persocket + k;
std::string nic_in_name = "NIC_IN " + std::to_string(device_id);
CommDevice *nic_in = new CommDevice(nic_in_name, CommDevice::NIC_IN_COMM, node_id, socket_id, device_id, latency, bandwidth);
nic_ins[socket_id].push_back(nic_in);
std::string nic_out_name = "NIC_OUT " + std::to_string(device_id);
CommDevice *nic_out = new CommDevice(nic_out_name, CommDevice::NIC_OUT_COMM, node_id, socket_id, device_id, latency, bandwidth);
nic_outs[socket_id].push_back(nic_out);
}
}
}
}
else {
assert(false && "Unknown nic distribution type");
}
}

void EnhancedMachineModel::add_pcis(float latency, float bandwidth)
Expand Down Expand Up @@ -560,7 +567,7 @@ void EnhancedMachineModel::add_nvlinks(float latency, float bandwidth)
local_nvlink_id--;
}
attach_nvlink(src_gpu_fb_mem, tar_gpu_fb_mem, nvlinks[i][local_nvlink_id]);
printf("add nvlink: gdb_fb_mem %d , gou_fb_mem %d, nvlink %d %d\n", src_gpu_fb_mem->device_id, tar_gpu_fb_mem->device_id, node_id, local_nvlink_id);
printf("add nvlink: gpu_fb_mem %d , gpu_fb_mem %d, nvlink %d %d\n", src_gpu_fb_mem->device_id, tar_gpu_fb_mem->device_id, node_id, local_nvlink_id);
}
}
}
Expand Down Expand Up @@ -648,13 +655,42 @@ CommDevice *EnhancedMachineModel::get_nvlink(MemDevice *src_mem, MemDevice *tar_
}
}

CommDevice *EnhancedMachineModel::get_next_nic_in(int socket_id)
{
if (nic_persocket == 0) {
return nic_ins[socket_id][0];
}
if (socket_id < num_sockets) {
CommDevice *ret = nic_ins[socket_id][cur_nic_local_id];
cur_nic_local_id = (cur_nic_local_id + 1) % nic_persocket;
return ret;
}
else {
printf("MachineModel: get_next_nic_in - cannot find next nic_in socket_id %d cur_nic_local_id %d\n", socket_id, cur_nic_local_id);
assert(false);
}
}

CommDevice *EnhancedMachineModel::get_next_nic_out(int socket_id) const
{
if (nic_persocket == 0) {
return nic_outs[socket_id][0];
}
if (socket_id < num_sockets) {
return nic_outs[socket_id][cur_nic_local_id];
}
else {
printf("MachineModel: get_next_nic_out - cannot find next nic_out socket_id %d cur_nic_local_id %d\n", socket_id, cur_nic_local_id);
assert(false);
}
}
int EnhancedMachineModel::get_num_gpus() const
{
return num_gpus;
}

void EnhancedMachineModel::add_comm_path(std::vector<CommDevice::CommDevType> const &comm_device_list, MemDevice *src_mem,
MemDevice *tar_mem, std::vector<CommDevice *> &ret) const
MemDevice *tar_mem, std::vector<CommDevice *> &ret)
{
MemDevice *cur_mem = src_mem;
for (size_t i = 0; i < comm_device_list.size(); i++) {
Expand All @@ -672,10 +708,10 @@ void EnhancedMachineModel::add_comm_path(std::vector<CommDevice::CommDevType> co
break;
case CommDevice::NIC_IN_COMM:
cur_mem = tar_mem;
ret.emplace_back(nic_ins[cur_mem->socket_id]);
ret.emplace_back(get_next_nic_in(cur_mem->socket_id));
break;
case CommDevice::NIC_OUT_COMM:
ret.emplace_back(nic_outs[cur_mem->socket_id]);
ret.emplace_back(get_next_nic_out(cur_mem->socket_id));
break;
case CommDevice::PCI_TO_HOST_COMM:
ret.emplace_back(pcis_to_host[cur_mem->socket_id]);
Expand All @@ -692,7 +728,7 @@ void EnhancedMachineModel::add_comm_path(std::vector<CommDevice::CommDevType> co
}
}

std::vector<CommDevice *> EnhancedMachineModel::get_comm_path(MemDevice *src_mem, MemDevice *tar_mem) const
std::vector<CommDevice *> EnhancedMachineModel::get_comm_path(MemDevice *src_mem, MemDevice *tar_mem)
{
std::vector<CommDevice *> ret;
if (src_mem->device_id == tar_mem->device_id) {
Expand Down Expand Up @@ -790,10 +826,12 @@ std::string EnhancedMachineModel::to_string() const
s += membuses[socket_id]->name + '\n';
s += upi_ins[socket_id]->name + '\n';
s += upi_outs[socket_id]->name + '\n';
s += nic_ins[socket_id]->name + '\n';
s += nic_outs[socket_id]->name + '\n';
s += pcis_to_host[socket_id]->name + '\n';
s += pcis_to_device[socket_id]->name + '\n';
for (int k = 0; k < nic_persocket; k++) {
s += nic_ins[socket_id][k]->name + '\n';
s += nic_outs[socket_id][k]->name + '\n';
}
}
s += "------------------------------------------\n";
for (int j = 0; j < num_nvlinks_per_node * 2; j++) {
Expand Down
12 changes: 9 additions & 3 deletions src/runtime/simulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ void Simulator::add_task_dependencies_with_xfer(SimTask* src_task,
{
std::vector<CommDevice *> path = machine->get_comm_path(src_task->mem, dst_task->mem);
// print the communication path
// printf("Path from %s to %s is: ", src_task->mem->name.c_str(), dst_task->mem->name.c_str());
// printf("Message: %zu B\nPath from %s to %s is: ", message_size, src_task->mem->name.c_str(), dst_task->mem->name.c_str());
// for (size_t i = 0; i < path.size(); i++) {
// printf("%s ", path[i]->name.c_str());
// }
Expand All @@ -243,6 +243,12 @@ void Simulator::add_task_dependencies_with_xfer(SimTask* src_task,
num_segment = max_num_segments;
seg_size = message_size / num_segment;
}
// optional optimization: can reduce the simulation time, but could also impact the accuracy of the simulation
// (a communication can be occupied by a message for long time without be used by other concurrent communication
// if (path.size() == 1) {
// num_segment = 1;
// seg_size = message_size;
// }
// Create all the comm tasks
// Divide messages into segments
for (size_t i = 0; i < path.size(); i++) {
Expand Down Expand Up @@ -277,10 +283,10 @@ void Simulator::add_task_dependencies_with_xfer(SimTask* src_task,
// overlap between upi_ins and upi_outs, and between nic_ins and nic_outs.
if (num_segment > 1 and path.size() >= 2) {
for (size_t i = 0; i < path.size(); i++) {
for (int j = 1; j < num_segment; j++) {
for (int j = 0; j < num_segment - 1; j++) {
if (((CommDevice *)all_tasks[i][j]->device)->comm_type == CommDevice::NIC_OUT_COMM or
((CommDevice *)all_tasks[i][j]->device)->comm_type == CommDevice::UPI_OUT_COMM) {
all_tasks[i+1][j-1]->add_next_task(all_tasks[i][j]);
all_tasks[i][j]->add_next_task(all_tasks[i-1][j+1]);
}
}
}
Expand Down

0 comments on commit 2bcb41a

Please sign in to comment.