-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcpg.proto
596 lines (565 loc) · 18.5 KB
/
cpg.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
syntax = "proto3";
package cpg;
option go_package = "github.com/ShiftLeftSecurity/proto/cpg";
option java_package = "io.shiftleft.proto.cpg";
option java_outer_classname = "Cpg";
option csharp_namespace = "io.shiftleft.proto.cpg";
enum NodePropertyName {
UNKNOWN_NODE_PROPERTY = 0;
//
NODE_LABEL = 105;
//
CATEGORIES = 814;
// Hash value of the artifact that this CPG is built from.
HASH = 120;
// The static types a TYPE_DECL inherits from. This property is matched against the FULL_NAME of TYPE nodes and thus it is required to have at least one TYPE node for each TYPE_FULL_NAME
INHERITS_FROM_TYPE_FULL_NAME = 53;
// The FULL_NAME of a method. Used to link CALL and METHOD nodes. It is required to have exactly one METHOD node for each METHOD_FULL_NAME
METHOD_FULL_NAME = 54;
//
SYMBOL = 100;
// The dispatch type of a call, which is either static or dynamic. See dispatchTypes
DISPATCH_TYPE = 25;
//
PARAMETER_INDEX = 113;
// Column where the code starts
COLUMN_NUMBER = 11;
//
EVAL_TYPE = 815;
//
ANNOTATION_NAME = 107;
//
METHOD_SHORT_NAME = 102;
// Full path of canonical file that contained this node; will be linked into corresponding FILE nodes. Possible for METHOD, TYPE_DECL and NAMESPACE_BLOCK
FILENAME = 106;
// Type full name of which a TYPE_DECL is an alias of
ALIAS_TYPE_FULL_NAME = 158;
// Binary type signature
BINARY_SIGNATURE = 14;
// Identifier which uniquely describes a CLOSURE_BINDING. This property is used to match captured LOCAL nodes with the corresponding CLOSURE_BINDING nodes
CLOSURE_BINDING_ID = 50;
// Type name emitted by parser, only present for logical type UNKNOWN
PARSER_TYPE_NAME = 3;
// Evaluation strategy for function parameters and return values. One of the values in "evaluationStrategies"
EVALUATION_STRATEGY = 15;
// The depth first ordering number used to detect back edges in reducible CFGs
DEPTH_FIRST_ORDER = 17;
// AST-children of CALL nodes have an argument index, that is used to match call-site arguments with callee parameters. Explicit parameters are numbered from 1 to N, while index 0 is reserved for implicit self / this parameter. CALLs without implicit parameter therefore have arguments starting with index 1. AST-children of BLOCK nodes may have an argument index as well; in this case, the last argument index determines the return-value of a BLOCK expression
ARGUMENT_INDEX = 40;
// The static type decl of a TYPE. This property is matched against the FULL_NAME of TYPE_DECL nodes. It is required to have exactly one TYPE_DECL for each different TYPE_DECL_FULL_NAME
TYPE_DECL_FULL_NAME = 52;
// Full name of an element, e.g., the class name along, including its package (e.g. "io.shiftleft.dataflowenging.layers.dataflows.DataFlowRunner.run"). In theory, the FULL_NAME just needs to be unique and is used for linking references, so a consecutive integer would be valid. In practice, this should be human readable
FULL_NAME = 6;
// The type of the AST parent. Since this is only used in some parts of the graph the list does not include all possible parents by intention. Possible parents: METHOD, TYPE_DECL, NAMESPACE_BLOCK
AST_PARENT_TYPE = 56;
//
ANNOTATION_FULL_NAME = 108;
// The code snippet the node represents
CODE = 21;
// General ordering property, such that the children of each AST-node are typically numbered from 1, ..., N (this is not enforced). The ordering has no technical meaning, but is used for pretty printing and OUGHT TO reflect order in the source code
ORDER = 4;
// Line where the code starts
LINE_NUMBER = 2;
//
CATEGORY = 117;
// Marks that a method has at least one mapping defined from the policies
HAS_MAPPING = 23;
// Canonical token of a FIELD_IDENTIFIER. Typically identical to the CODE field, but canonicalized according to source language semantics. Human readable names are preferable. FIELD_IDENTIFIERs must share identical CANONICAL_NAME if and only if they alias, e.g. in C-style unions (if the aliasing relationship is unknown or there are partial overlaps, then one must make a reasonable guess, and trade off between false negatives and false positives)
CANONICAL_NAME = 2001092;
// Method signature. The format is defined by the language front-end, and the backend simply compares strings to resolve function overloading, i.e. match call-sites to METHODs. In theory, consecutive integers would be valid, but in practice this should be human readable
SIGNATURE = 22;
//
CLASS_NAME = 104;
// A version, given as a string
VERSION = 13;
//
IS_STATIC = 110;
//
PACKAGE_NAME = 103;
// Line where the code ends
LINE_NUMBER_END = 12;
// The FULL_NAME of a the AST parent of an entity
AST_PARENT_FULL_NAME = 57;
//
VAR_TYPE = 111;
//
CLASS_SHORT_NAME = 132;
// The programming language this graph originates from
LANGUAGE = 19;
// True if the referenced method is never overridden by the subclasses and false otherwise
IS_METHOD_NEVER_OVERRIDDEN = 1002;
// Indicates whether a call was already resolved. If not set this means not yet resolved
RESOLVED = 24;
// Sub directories of the policy directory that should be loaded when processing the CPG
POLICY_DIRECTORIES = 119;
// Indicates the modifier which is represented by a MODIFIER node. See modifierTypes
MODIFIER_TYPE = 26;
// The static type of an entity. E.g. expressions, local, parameters etc. This property is matched against the FULL_NAME of TYPE nodes and thus it is required to have at least one TYPE node for each TYPE_FULL_NAME
TYPE_FULL_NAME = 51;
// Indicates that the construct (METHOD or TYPE_DECL) is external, that is, it is referenced but not defined in the code (applies both to insular parsing and to library functions where we have header files only)
IS_EXTERNAL = 7;
// References to other nodes. This is not a real property; it exists here for the sake of proto serialization only. valueType and cardinality are meaningless.
CONTAINED_REF = 2007161;
//
SPID = 122;
// Deprecated
METHOD_INST_FULL_NAME = 55;
// Internal flags
INTERNAL_FLAGS = 78;
// Name of represented object, e.g., method name (e.g. "run")
NAME = 5;
//
NODE_ID = 99;
// Tag value
VALUE = 8;
//
SOURCE_TYPE = 115;
//
LITERALS_TO_SINK = 123;
// Content of CONFIG_FILE node
CONTENT = 20;
// The original name of the (potentially mangled) captured variable
CLOSURE_ORIGINAL_NAME = 159;
// Names of overlays applied to this graph, in order of application
OVERLAYS = 118;
// Column where the code ends
COLUMN_NUMBER_END = 16;
//
FINGERPRINT = 114;
//
KEY = 131;
//
PATH = 121;
// Type hint for the dynamic type
DYNAMIC_TYPE_HINT_FULL_NAME = 1591;
//
EVALUATION_TYPE = 112;
//
PATTERN = 813;
// The group ID for a dependency
DEPENDENCY_GROUP_ID = 58;
//
SINK_TYPE = 116;
}
enum EdgePropertyName {
UNKNOWN_EDGE_PROPERT = 0;
// Local name of referenced CONTAINED node. This key is deprecated.
LOCAL_NAME = 6;
// Defines whether a PROPAGATE edge creates an alias
ALIAS = 1;
// Index of referenced CONTAINED node (0 based) - used together with cardinality=list. This key is deprecated.
INDEX = 8;
// A variable propagated by a reaching-def edge
VARIABLE = 11;
}
enum EvaluationStrategies {
UNKNOWN_EVALUATION_STRATEGY = 0;
// A parameter or return of a function is passed by reference which means an address is used behind the scenes
BY_REFERENCE = 1;
// Only applicable to object parameter or return values. The pointer to the object is passed by value but the object itself is not copied and changes to it are thus propagated out of the method context
BY_SHARING = 2;
// A parameter or return of a function passed by value which means a flat copy is used
BY_VALUE = 3;
}
enum DispatchTypes {
UNKNOWN_DISPATCH_TYPE = 0;
// For statically dispatched calls the call target is known before program execution
STATIC_DISPATCH = 1;
// For dynamically dispatched calls the target is determined during runtime
DYNAMIC_DISPATCH = 2;
}
enum LANGUAGES {
UNKNOWN_LANGUAGE = 0;
//
JAVA = 1;
//
JAVASCRIPT = 2;
//
GOLANG = 3;
//
CSHARP = 4;
//
C = 5;
//
PYTHON = 6;
//
LLVM = 7;
//
PHP = 8;
}
enum FRAMEWORKS {
UNKNOWN_FRAMEWORK = 0;
// Java spring framework
SPRING = 3;
// Microsoft ASP.NET MVC
ASP_NET_MVC = 11;
// JAX-WS
JAXWS = 12;
// Framework facilities directly provided by Java
JAVA_INTERNAL = 14;
// Microsoft ASP.NET Web UI
ASP_NET_WEB_UI = 13;
// JAX-RS
JAXRS = 7;
// Dropwizard framework
DROPWIZARD = 15;
// Play framework
PLAY = 1;
// Spark micro web framework
SPARK = 8;
// Polyglot event-driven framework
VERTX = 4;
// JavaServer Faces
JSF = 5;
// Microsoft ASP.NET Web API
ASP_NET_WEB_API = 10;
// WCF HTTP and REST
WCF = 16;
// Google web toolkit
GWT = 2;
// Java Servlet based frameworks
SERVLET = 6;
// Microsoft ASP.NET Core
ASP_NET_CORE = 9;
}
message PropertyValue {
oneof value {
string string_value = 1;
bool bool_value = 2;
int32 int_value = 3;
int64 long_value = 4;
float float_value = 5;
double double_value = 6;
StringList string_list = 7;
BoolList bool_list = 8;
IntList int_list = 9;
LongList long_list = 10;
FloatList float_list = 11;
DoubleList double_list = 12;
ContainedRefs contained_refs = 13;
}
}
message ContainedRefs {
string local_name = 1;
repeated int64 refs = 2;
}
message StringList {
repeated string values = 1;
}
message BoolList {
repeated bool values = 1;
}
message IntList {
repeated int32 values = 1;
}
message LongList {
repeated int64 values = 1;
}
message FloatList {
repeated float values = 1;
}
message DoubleList {
repeated double values = 1;
}
message CpgStruct {
message Node {
int64 key = 1;
// Logical node type.
enum NodeType {
UNKNOWN_NODE_TYPE = 0;
// A local variable
LOCAL = 23;
// A comment
COMMENT = 511;
// A method/function/procedure
METHOD = 1;
// A string tag
TAG = 24;
// Reference to a type/class
TYPE_REF = 335;
// A literal value assigned to an ANNOTATION_PARAMETER
ANNOTATION_LITERAL = 49;
// Indicates the existence of a call executed on a return value or out parameter of a method after this method has been executed. This is used to model framework code calling functors returned from user code. The outgoing REF edge indicates on which returned entitity the call will happen.
POST_EXECUTION_CALL = 3071;
//
IOFLOW = 212;
//
TRANSFORM = 211;
//
READ = 209;
// This node represents a formal parameter going towards the callee side
METHOD_PARAMETER_IN = 34;
// This node represents a namespace as a whole whereas the NAMESPACE_BLOCK is used for each grouping occurrence of a namespace in code. Single representing NAMESPACE node is required for easier navigation in the query language
NAMESPACE = 40;
//
VARIABLE_INFO = 206;
//
FLOW = 207;
// Represents the binding of a LOCAL or METHOD_PARAMETER_IN into the closure of a method
CLOSURE_BINDING = 334;
// A language-specific node
UNKNOWN = 44;
// Formal annotation parameter
ANNOTATION_PARAMETER = 7;
//
DETACHED_TRACKING_POINT = 1001;
//
SENSITIVE_DATA_TYPE = 52;
// Member of a class struct or union
MEMBER = 9;
//
CALL_SITE = 201;
// A method instance which always has to reference a method and may have type argument children if the referred to method is a template
METHOD_INST = 32;
// Configuration file contents. Might be in use by a framework
CONFIG_FILE = 50;
//
SENSITIVE_REFERENCE = 55;
//
LOCATION = 25;
// Node to save meta data about the graph on its properties. Exactly one node of this type per graph
META_DATA = 39;
// A return instruction
RETURN = 30;
//
TAG_NODE_PAIR = 208;
// Attribute of a DOM node
DOM_ATTRIBUTE = 601;
// Argument for a TYPE_PARAMETER that belongs to a TYPE. It binds another TYPE to a TYPE_PARAMETER
TYPE_ARGUMENT = 48;
//
SOURCE = 202;
//
SENSITIVE_VARIABLE = 54;
// A node that represents which field is accessed in a <operator>.fieldAccess, in e.g. obj.field. The CODE part is used for human display and matching to MEMBER nodes. The CANONICAL_NAME is used for dataflow tracking; typically both coincide. However, suppose that two fields foo and bar are a C-style union; then CODE refers to whatever the programmer wrote (obj.foo or obj.bar), but both share the same CANONICAL_NAME (e.g. GENERATED_foo_bar)
FIELD_IDENTIFIER = 2001081;
//
WRITE = 210;
//
KEY_VALUE_PAIR = 217;
// A node in a Document Object Model (Tree) as obtained from, e.g., an HTML parser
DOM_NODE = 600;
//
METHOD_SUMMARY = 199;
// A reference to a namespace
NAMESPACE_BLOCK = 41;
// Initialization construct for arrays
ARRAY_INITIALIZER = 14;
//
PROGRAM_POINT = 205;
// A method annotation
ANNOTATION = 5;
//
SENSITIVE_MEMBER = 53;
// An arbitrary identifier/reference
IDENTIFIER = 27;
//
TRANSFORMATION = 213;
// Node representing a source file - the root of the AST
FILE = 38;
// This node represents a dependency
DEPENDENCY = 35;
// This node records what package prefix is most common to all analysed classes in the CPG
PACKAGE_PREFIX = 36;
// An implicit call site hidden in a method indicated by METHOD_MAP policy entries
IMPLICIT_CALL = 307;
//
MATCH_INFO = 51;
// A jump target made explicit in the code using a label
JUMP_TARGET = 340;
// Indicates the usage of a framework. E.g. java spring. The name key is one of the values from frameworks list
FRAMEWORK = 42;
// A modifier, e.g., static, public, private
MODIFIER = 300;
// A control structure such as if, while, or for
CONTROL_STRUCTURE = 339;
// A structuring block in the AST
BLOCK = 31;
//
SP_BLACKLIST = 216;
// Data used by a framework
FRAMEWORK_DATA = 43;
// Literal/Constant
LITERAL = 8;
//
ROUTE = 215;
// A formal method return
METHOD_RETURN = 3;
// This node represents a formal parameter going towards the caller side
METHOD_PARAMETER_OUT = 33;
//
CALL_CHAIN = 204;
// A type which always has to reference a type declaration and may have type argument children if the referred to type declaration is a template
TYPE = 45;
// Reference to a method instance
METHOD_REF = 333;
// A type declaration
TYPE_DECL = 46;
//
SINK = 203;
//
SP_ANNOTATION_PARAMETER = 200;
//
FINDING = 214;
// A (method)-call
CALL = 15;
// Type parameter of TYPE_DECL or METHOD
TYPE_PARAMETER = 47;
// Multiple tags
TAGS = 301;
// Assignment of annotation argument to annotation parameter
ANNOTATION_PARAMETER_ASSIGN = 6;
// A binding of a METHOD into a TYPE_DECL
BINDING = 146;
}
NodeType type = 2;
// Node properties.
message Property {
NodePropertyName name = 1;
PropertyValue value = 2;
}
repeated Property property = 3;
}
repeated Node node = 1;
message Edge {
reserved 5;
reserved "key";
// Source node.
int64 src = 1;
// Destination node.
int64 dst = 2;
// Edge type.
enum EdgeType {
UNKNOWN_EDGE_TYPE = 0;
// Relation between TYPE_DECL and BINDING node
BINDS = 155;
// Indicates taint removal. Only present between corresponding METHOD_PARAMETER_IN and METHOD_PARAMETER_OUT nodes
TAINT_REMOVE = 17;
// Link to evaluation type
EVAL_TYPE = 21;
// Alias relation between two TYPE_DECL
TYPE_DECL_ALIAS = 139;
// Connection between a captured LOCAL and the corresponding CLOSURE_BINDING
CAPTURED_BY = 41;
// Points to dominated node in post DOM tree
POST_DOMINATE = 182;
//
IS_SENSITIVE_DATA_OF_TYPE = 902;
// Encodes propagation of data from on node to another. The ALIAS property is deprecated.
PROPAGATE = 1;
//
IS_SENSITIVE_DATA_DESCR_OF_REF = 901;
// Edge from control structure node to the expression that holds the condition
CONDITION = 56;
// The receiver of a method call which is either an object or a pointer
RECEIVER = 55;
// Represents the capturing of a variable into a closure
CAPTURE = 40;
// Link between FRAMEWORK and FRAMEWORK_DATA nodes
ATTACHED_DATA = 18;
// Relation between a CALL and its arguments and RETURN and the returned expression
ARGUMENT = 156;
// Edges from nodes to tags
TAGGED_BY = 11;
// Alias relation between types
ALIAS_OF = 138;
// Control dependency graph
CDG = 183;
// Membership relation for a compound object. This edge is deprecated.
CONTAINS_NODE = 9;
// Links together corresponding METHOD_PARAMETER_IN and METHOD_PARAMETER_OUT nodes
PARAMETER_LINK = 12;
// Syntax tree edge
AST = 3;
// A reference to e.g. a LOCAL
REF = 10;
// Shortcut over multiple AST edges
CONTAINS = 28;
// Indicates the dynamic type(s) of an entity. This comes initially from the frontend provided DYNAMIC_TYPE_HINT_FULL_NAME property and is extended by our type resolution
DYNAMIC_TYPE = 20;
//
IS_SENSITIVE_DATA_DESCR_OF = 900;
// Source file of a node, in which its LINE_NUMBER and COLUMN_NUMBER are valid
SOURCE_FILE = 157;
// Type argument binding to a type parameter
BINDS_TO = 22;
// Indicates that a method is part of the vtable of a certain type declaration
VTABLE = 30;
// Inheritance relation between types
INHERITS_FROM = 23;
// Reaching definition edge
REACHING_DEF = 137;
// Points to dominated node in DOM tree
DOMINATE = 181;
// Control flow edge
CFG = 19;
// Referencing to e.g. a LOCAL
CALL = 6;
}
EdgeType type = 3;
// Edge properties.
message Property {
EdgePropertyName name = 1;
PropertyValue value = 2;
}
repeated Property property = 4;
}
repeated Edge edge = 2;
}
message AdditionalNodeProperty {
int64 node_id = 1;
CpgStruct.Node.Property property = 2;
}
message AdditionalEdgeProperty {
int64 edge_id = 1;
CpgStruct.Edge.Property property = 2;
int64 out_node_key = 3;
int64 in_node_key = 4;
CpgStruct.Edge.EdgeType edge_type = 5;
}
// Overlays can be stacked onto each other, therefor their node ids must be globally unique.
message CpgOverlay {
repeated CpgStruct.Node node = 1;
repeated CpgStruct.Edge edge = 2;
repeated AdditionalNodeProperty node_property = 3;
repeated AdditionalEdgeProperty edge_property = 4;
}
// DiffGraphs can be created independently of each other and therefor when _adding_ nodes|edges,
// each DiffGraph has its own ID space. However, when removing nodes|edges, the nodeIds refer to the
// globally unique graph id space.
message DiffGraph {
message RemoveNode {
int64 key = 1;
}
message RemoveNodeProperty {
int64 key = 1;
NodePropertyName name = 2;
string local_name = 3;
}
message RemoveEdge {
int64 out_node_key = 1;
int64 in_node_key = 2;
CpgStruct.Edge.EdgeType edge_type = 3;
bytes propertiesHash = 4; // used to identify edges (since our edges don't have ids)
}
message RemoveEdgeProperty {
int64 out_node_key = 1;
int64 in_node_key = 2;
CpgStruct.Edge.EdgeType edge_type = 3;
bytes propertiesHash = 4; // used to identify edges (since our edges don't have ids)
EdgePropertyName property_name = 5;
}
message Entry {
oneof value {
CpgStruct.Node node = 1;
CpgStruct.Edge edge = 2;
AdditionalNodeProperty node_property = 3;
AdditionalEdgeProperty edge_property = 4;
RemoveNode remove_node = 5;
RemoveNodeProperty remove_node_property = 6;
RemoveEdge remove_edge = 7;
RemoveEdgeProperty remove_edge_property = 8;
}
}
repeated Entry entries = 1;
}