-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtensor.h
executable file
·423 lines (317 loc) · 13.5 KB
/
tensor.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
#ifndef _TENSOR_H_
#define _TENSOR_H_
#include <assert.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <list>
#include <math.h>
#include <assert.h>
#include <iostream>
#include <algorithm>
#include <map>
#include <climits>
#include <set>
//#include "dltc_tensor.h"
#include "helper.h"
#include "grid.h"
#include <vector>
#define SYM_GROUP_0 0
#define SYM_GROUP_1 1
#define NON_SYM 3
#define CONTRACTED 4
namespace RRR {
using namespace std;
class Tensor
{
private:
// Declare TensorRedistributor as a friend class
friend class TensorRedistributor;
friend class Contraction;
friend class cost;
friend class GridRedistribute;
Grid* g;
int rank; // MPI rank
int* proc_addr; // Processor's multi-dimensional address
// Dimension of the tensor and the physical grid
int dims, grid_dims;
// Tensor string
std::string tensor_str;
// Tensor Size
int* tensor_size;
//stores the phase of each dimension of the tensor virtual blocks
int* index_phase;
//stores the range of local indices of each dimension
int* local_range;
//multi-dimensional size of block
int* block_range;
//stores the range of virtual indices of each dimension
int* tensor_range;
//holds the actual tensor data.
//stored as contiguous array of virtual blocks
double* tensor_tiles;
//stores the virtual index of each block stored by the class
//example tile_address[d][k] gives the virtual index of
//the d dimension for the k th block stored in tensor_tiles
int* tile_address;
//variable for correctness checking to be removed after debugging is over
int* is_touched;
//maximum blocks possible
int num_max_tiles;
//total blocks stored
int num_actual_tiles;
//maps a given dimension and local index to a list
//containing all the tiles with that dimension and local index
//example for a 4d tensor index_table[1][2]
//gives a list with tiles whose indices are
//[*,2*phase[1],*,*]
std::list<int> **index_table;
// Physical and Virtual grid sizes
int *pgrid, *vgrid;
int numSym; // Number of symmetry groups
int numNonSym; // Number of non-Symmetric dimensions
int sym_dims[2]; // Number of dimensions in each symmetry
int sym_ratio[2]; // Length of symmetric dimension of local tensor in each symmetry
// 1 if an index is contraction index 0 otherwise
int *cntr_map;
// all the indices and belonging to the same symmetry group have the same value
int *SG_index_map;
// all the indices and belonging to the same symmetry group have the same value
int *SG_index_map_permanent;
// maps tensor indices to physical qrid
int *index_dimension_map;
// maps processor grid dimension to tensor indices
int *reverse_index_map;
//maps block address to the location of that block in the tensor
std::map<int,int> block_addr_to_blk_num;
// Contraction string set
std::string* contr_dim_str;
//initializes the index table which is an array of array of lists
//storing offsets required to access local blocks for a given index
void init_index_table();
// fills up the index table
void fill_index_table();
// fills up the index table
void fill_index_table_tmp();
// Return a value based on the index passed
double default_get_value(int* &indices);
// Find the max index in current dimension based on the symmetry it is involved in and current indices of dimensions involved in the symmetry
int get_bound(int cur_dim, int *cur_indices, int* &idmap, int* &phy_grid);
// Generate data in tensor blocks
void generate_data(int cur_dim, int* &cur_indices, int &offset);
// Get number of tiles for a tensor at a particular rank and generate tile addresses for that
int get_num_tiles(int cur_dim, int* &cur_indices, int &offset, int* &idmap, int* &proc_address, int* &addresses, int* &phy_grid, int &num_tiles);
// Fill data in the newly created sub-tensor
void fill_data(double* &blocks, int* &addresses);
//stores virtual address of the data block given a local data block
void getVirtualAddress(int* &cur_indices, int* &virtual_address, int* &processor_addr, int* &idmap, int* &phy_grid);
//returns true if the data block is unique given the symmetry
bool isValid(int* &cur_indicies, int* &processor_addr, int* &idmap, int* &phy_grid);
// Assert if the idmap is valid while creating the tensor
void assert_idmap_validity();
void redistribute_broadcast(int* &new_idx_map,
int bcast_proc_count,
int* bcast_dims_sizes,
int* &old_repl_dims,
int* &new_repl_dims,
int old_repl_dims_count,
int new_repl_dims_count,
int* &bcast_dims,
int bcast_dims_count,
int* common_repl_dims,
int common_repl_dims_count,
int* non_common_repl_dims,
int non_common_repl_dims_count);
void redistribute_point_to_point(int* &new_idx_map,
int bcast_proc_count,
int* bcast_dims_sizes,
int* &old_repl_dims,
int* &new_repl_dims,
int old_repl_dims_count,
int new_repl_dims_count,
int* &bcast_dims,
int bcast_dims_count,
int* common_repl_dims,
int common_repl_dims_count,
int* non_common_repl_dims,
int non_common_repl_dims_count);
map<int, int> compile_senders_data(map<int, list<int> > proc_block_map,
double** &send_blocks,
int** &send_addr,
int* &num_send_blocks,
int* &old_repl_dims,
int old_repl_dims_count);
public:
//uses ints to idetify different indices. Each integer
//value corresponds to a particular index defined in ctce
//code
int* ctce_index_name;
//number of elements per block
int block_size;
// Default Constructor
Tensor(){};
//Main Constructor
Tensor(std::string tnsr_str, int* &idx_map, int* &tnsr_size, int* &virt_grid, Grid* &grid);
// Destructor
~Tensor();
//////////////////////////Interface is Global Arrays///////////////////////////
//void initialize_from_ga_tensor(DLTC::Tensor &t);
//void accumulate_to_ga_tensor(DLTC::Tensor &t);
///////////////////////////////////////////////////////////////////////////////
// Compute the number of maximum possible tiles in the tensor at this processor
int compute_num_max_tiles(int* &idmap, int* &phy_grid);
// Compute the number of maximum possible tiles in the tensor at this processor
int compute_num_max_tiles_rect(int* &idmap, int* &phy_grid);
//sets function to the get_value function
void set_get_value(double (*value_function)(int* &indices));
// Initializes the tensor (fills in values)
void initialize();
// Returns the number of tiles with address value indx_id in dimension dim
// Stores the tiles in tile_block and addresses in virt_addr
int getTiles(int dim, int indx_id, double* &tile_block, int* &virt_addr);
// Returns the number of tiles with address value indx_id in dimension dim
// Stores addresses in virt_addr tile location in tile_location
int getTileAddresses(int dim, int indx_id, int* &tile_location, int* &virt_addr);
int getTile(int* tile_address);
void printTiles(int dim, int indx_id);
// Marks the index as already contracted
void removecntrIndex(int indx);
// gives a list of indices that need to be bounced
// it is used by get bouncer to receive addresses of
//the nodes that will send bouncing data
std::list<int>* get_bounce_indx(int indx);
//outputs symmetry group and contraction index information
//contraction index is 1 if it is part of some symmetry group
//if it is not then it is either 1 or 2. The numbers are used
//to identify the contraction index. In case of symmetry,
//it is not important to distinguish them
void printInfo();
// Print all tile addresses
void print_all_tile_addr();
// Extract a sub-tensor from the parent tensor w.r.t given contracting dimension
Tensor* generate_tensor(int contr_dim, double* &data_blocks, int* &block_addresses, int num_tiles);
//Generate a new tensor from a parent tensor with different data block and addresses
Tensor* generate_tensor_with_new_tiles(double* &data_blocks,
int* &block_addresses,
int num_tiles);
// Generate a deserialized subtensor
Tensor* generate_deserialized_tensor(int num_des_dims, int* &des_tensor_dims, int* &des_grid_dims, double* &data_blocks, int* &block_addresses, int num_tiles);
// Generates a value for the given address
double get_value(int* &indices);
// Free the memory allocated for index table
void free_index_table();
//returns number of processor addresses from which data needs to be
//bounced.THe array of addresses are put in bouncers. my_address holds
//the invoking nodes address, index_dimension_map gives the mapping
//from index to processor dimension
int get_bouncers(int index, int** &bouncers);
//returns number of processor addresses from which data needs to be
//bounced, when the processor grid is not square .THe array of addresses
//are put in bouncers. my_address holds the invoking nodes address,
//index_dimension_map gives the mapping from index to processor
//dimension
int get_rect_bouncers(int contr_dim, int contr_idx, int** &bouncers);
// Get the number of blocks held by a processor
int get_tensor_size(int cur_dim, int* &cur_indices, int* &processor_addr);
// Gets receiver addresses based on current contraction dimension and index
int get_receivers(int contr_dim, int contr_idx, int** &receivers, int* &matching_indices);
// Gets matching indices based on current contraction
// dimension and index on a rectangular grid
int get_matching_indices_rect(int contr_dim, int contr_idx, int* &matching_indices);
// Gets receiver addresses based on current contraction
// dimension and index on a rectangular grid.
//for testing purpose, remove it after debugging
int get_receivers_rect(int contr_dim, int contr_idx, int** &receivers, int* &matching_indices, int &num_matches);
//returns block number using block address
//int get_block_number(int* &block_address);
//returns the pointer to the data block specified by block_address;
int get_tile_offset(int* &block_address);
// Check if the given tensor has the same tile addresses as this tensor
// in this processor
bool check_same_tiles(Tensor* &T);
// Check if the given tile exists in this tensor and return its index
int tile_exists(int* &search_addr);
//set the boolean for this address to true
//only for debugging. remove later
//asserts if address not found
int touch_address(int* &search_addr);
//sets boolean for all address to false
void untouch_all_address();
//checks if all addresses have been touched
int is_all_touched();
// Compare two tile addresses
bool compare_addresses(int* &addr_1, int* &addr_2);
// Compare two tile addresses
bool compare_addresses(int dims, int* &addr_1, int* &addr_2);
// Update physical grid
void update_pgrid(int d, int* phy_grid);
// Get a list of replicated dimensions in the given index dimension map
int get_replicated_dims(int* &idmap, int* &repl_dims);
// Redistribute this tensor as per the provided new index dimension map
void redistribute(int* &new_idx_map);
// Create generic proc address for broadcast receivers from the sender for redistribution
map<int, list<int> > get_generic_proc_addresses(
int* &tile_addresses,
int num_tiles,
int* &new_idx_map,
int non_common_repl_dims_count,
int* &non_common_repl_dims);
// Find senders for all the blocks this processor will hold after redistribution
// and return a map of sender rank with a list of block offsets that will be received from it
map<int, list<int> > get_recv_proc_block_map(
int* &new_idx_map,
int old_repl_dims_count,
int* &old_repl_dims,
int &num_new_tiles);
// Create broadcast group for redistribution from the sender
void get_bcast_groups(
map<int, list<int> > proc_block_map,
map<int, list<int> > recv_proc_block_map,
int old_repl_dims_count,
int* &old_repl_dims,
int bcast_dims_count,
int* &bcast_dims,
MPI_Comm &recv_comm,
MPI_Comm* &bcast_send_comm,
MPI_Comm* &bcast_recv_comm);
// Post MPI_Intercomm_Create from senders
void post_send_creates(
map<int, list<int> > recv_leader_block_map,
int bcast_dims_count,
int* &bcast_dims,
MPI_Comm &self_comm,
MPI_Comm &recv_comm,
MPI_Comm* &bcast_send_comm);
// Post sends for redistribute
void post_broadcast_sends(
map< int, list<int> > &proc_block_map,
MPI_Comm* &bcast_send_comm,
MPI_Comm &recv_intra_comm,
int intra_comm_rank,
double** &bcast_blocks,
int** &bcast_addr,
double* &bcast_recv_blocks,
int* &bcast_recv_addr,
int &offset);
void copy_bcast_send_data(map< int, list<int> > proc_block_map, double** &bcast_blocks, int** &bcast_addr);
// Check if the address satisfies tensor symmetry criterion
bool satisfies_sym(int* &addr);
void set_index_name(int* index_name);
// Getter functions
void set_cntr_map(int dim, int value) { cntr_map[dim] = value;}
int get_block_size() { return block_size;}
int* get_block_range() { return block_range;}
int get_dims() { return dims; }
int* get_SG_index_map() { return SG_index_map; }
int* get_index_dimension_map() { return index_dimension_map; }
int* get_vgrid() { return vgrid; }
int get_num_actual_tiles() { return num_actual_tiles;}
void set_num_actual_tiles(int num_tiles){ num_actual_tiles = num_tiles; }
string get_tensor_string() { return tensor_str;}
double* get_tensor_tiles() { return tensor_tiles;}
int* get_tile_addresses() { return tile_address;}
int get_rank() { return rank;}
};
}
#endif