-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDecoder.cpp
626 lines (514 loc) · 15.5 KB
/
Decoder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
/************************************************************************
* DICOMLIB
* Copyright 2003 Sunnybrook and Women's College Health Science Center
* Implemented by Trevor Morgan ([email protected])
*
* See LICENSE.txt for copyright and licensing info.
*************************************************************************/
#include <iostream>
#include <boost/static_assert.hpp>
#include <boost/type_traits.hpp>
#include <boost/tokenizer.hpp>
#include "DataSet.hpp"
#include "Types.hpp"
#include "Decoder.hpp"
#include "VR.hpp"
#include "Exceptions.hpp"
#include "DataDictionary.hpp"
#include "ValueToStream.hpp"
#include "Dumper.hpp"
using namespace std;
namespace dicom{
struct Decoder
{
void Decode();
void DecodeElement();
Decoder(Buffer& buffer,DataSet& ds,TS ts):buffer_(buffer),dataset_(ds),ts_(ts){}
//!see 5/7.5.2
struct EndOfSequence{};
/**
This is to handle those images with DICOM standard compliance problems.
Trevor might dislike it. However it's an unfortunate fact that not every single element
in the header of a DICOM image always comply perfectly with the DICOM standard.
When the problem is not critical, the capbility of opening the image is almost always expected.
Mei Ge, June 19, 2006
*/
void NonStandardDecode();
private:
void DecodeVRAndLength(Tag tag, VR& vr, UINT32& length);
Buffer& buffer_;
DataSet& dataset_;
TS ts_;
void DecodeSequence(Tag tag, UINT32 length);
/*!
Extract one element onto dataset.
*/
template <VR vr>
void GetElementValue(Tag tag)
{
typename TypeFromVR<vr>::Type data;
buffer_>>data;
//the 'template' keyword in the following line is technically redundant
//but works round a known bug in gcc.
//See http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=9510
dataset_.template Put<vr>(tag,data);
}
/*!
Only use previous function directly if we're sure that multiplicity is 1.
(ie. if VR is one of SQ,OB,OW,or UN). Otherwise use this function, which
takes multiplicity into account.
*/
template <VR vr>
void GetElementValue(Tag tag, size_t length)
{
StaticMultiplicityCheck<vr>();
typedef typename TypeFromVR<vr>::Type DataType;
if(length==0)
{
dataset_.template Put<vr>(tag);
return;
}
Buffer::iterator end = buffer_.position()+length;
while(buffer_.position()!=end)
GetElementValue<vr>(tag);
}
/*!
slightly different than above, as string multiplicity is handled differently than
other types, using a backslash as a seperator.
*/
template <VR vr>
void DecodeString(Tag tag, size_t length)
{
BOOST_STATIC_ASSERT((boost::is_same<std::string,typename TypeFromVR<vr>::Type>::value));
string s(length,' ');
buffer_>>s;
if (length == 0) //Sam Shen put this block here to avoid missing the blank strings
{
dataset_.template Put<vr>(tag,s);
return;
}
StripTrailingWhitespace(s);
/*
Technically speaking, we shouldn't have to do this, but there seem to be
many people producing images with null characters (0x00) in strings.
*/
StripTrailingNull(s);
if (vr==VR_CS || vr==VR_AS || vr==VR_LT || vr==VR_ST || vr==VR_UT)
dataset_.template Put<vr>(tag,s);
else
{
// parse multiplicity using boost::tokenizer
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
boost::char_separator<char> sep("\\","",boost::keep_empty_tokens);//strings are seperated by backslashes;
tokenizer tokens(s, sep);
for (tokenizer::iterator tok_iter = tokens.begin();tok_iter != tokens.end(); ++tok_iter)
dataset_.template Put<vr>(tag,*tok_iter);
}
/** Question: Is the multiplicity of CS 1 for the above block?
Mei June 09, 2006
*/
}
void DecodeUID(Tag tag, size_t length)
{
string s(length, ' ');
buffer_ >> s;
StripTrailingNull(s);
if(length==0)
{
UID uid(s);
//dataset_.template Put<VR_UI>(tag,uid);
dataset_.Put<VR_UI>(tag,uid);
return;
}
/*
We shouldn't have to do this, but some people are producing UIDS
with trailing whitespace instead of NULLs, so we'll be lenient!
*/
StripTrailingWhitespace(s);
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
boost::char_separator<char> sep("\\","",boost::keep_empty_tokens);//UIDS are seperated by backslashes;
tokenizer tokens(s, sep);
for (tokenizer::iterator tok_iter = tokens.begin();tok_iter != tokens.end(); ++tok_iter)
//dataset_.template Put<VR_UI>(tag,UID(*tok_iter));
dataset_.Put<VR_UI>(tag,UID(*tok_iter));
}
void DecodeOB(Tag tag, size_t length)
{
if(length == UNDEFINED_LENGTH)
{
Enforce(TAG_PIXEL_DATA==tag,"only pixel data can be encoded");
/*
this probably means we're decoding encapsulated compressed
pixel data, see Part 5, Annex A.4.
-Transfer syntax should be one of the JPEG transfer syntaxes,
-tag should be 'Pixel Data'
-Data is encapsulated as shown in Part5, Table A.4-2
I think we could handle this by treating the fragments as multiple values
for this tag. We could throw away the offset table, sacrificing potential
speed gains for simplicity, but this might upset clients that expect
it to be persisted?
*/
Enforce(ts_.isEncapsulated(),"Undefined value length on non-encoded transfer syntax.");
//extract offset table, then pull out multiple elements until we hit a Sequence Delimiter Item
Tag offset_table_tag;
buffer_ >> offset_table_tag;
Enforce(TAG_ITEM==offset_table_tag,"Offset table must be defined in encoded data");
UINT32 length;
buffer_ >> length;
buffer_.Increment(length);//we ignore the offset table...
for(;;)
{
Tag tag;
UINT32 length;
buffer_ >> tag;
buffer_ >> length;
if(TAG_SEQ_DELIM_ITEM==tag)
break;
Enforce(TAG_ITEM==tag,"Tag must be sequence item");
TypeFromVR<VR_OB>::Type data(length,0);
buffer_>>data;
dataset_.Put<VR_OB>(TAG_PIXEL_DATA,data);
}
}
else
{
TypeFromVR<VR_OB>::Type data(length,0);
buffer_>>data;
dataset_.Put<VR_OB>(tag,data);
}
}
};
//!DICOM messages need to have even byte length.(Part 5 section 7.1)
void CheckEven(int ByteLength)throw (exception)
{
if(ByteLength & 1)
throw dicom::exception("Byte length not even.");
}
void Decoder::DecodeVRAndLength(Tag tag, VR& vr, UINT32& length)
{
if(ts_.isExplicitVR()) //then get VR from stream, VR always little_endian -Sam
{
BYTE b1,b2;
buffer_ >> b1;
buffer_ >> b2;
UINT16 w = (UINT16(b2)<<8)|b1;
vr=VR(w);
if (vr == VR_UN || vr == VR_SQ || vr == VR_OW || vr == VR_OB || vr == VR_UT)//see Part5 / 7.1.2
{
buffer_ >> w; // 2 bytes unused
buffer_>>length; //4 bytes of length info
}
else
{
buffer_>>w; //only 2 bytes of length info
length = w;
}
}
else //VR is implicit, look up in data dictionary
{
vr=GetVR(tag); //look up in data dictionary.
//This is a hack to determine if it is OW or OB in case implicit vr and tag==TAG_PIXEL_DATA -Sam
if(tag==TAG_PIXEL_DATA)
{
UINT16 bits=0;
if(dataset_.exists(TAG_BITS_ALLOC))
dataset_(TAG_BITS_ALLOC)>>bits;
if(bits==8)
vr = VR_OB;
else if(bits==16)
vr = VR_OW;
//else, something wrong if reaching here, don't know what to do -Sam
}
buffer_ >> length; //4 bytes of length info.
}
}
//TAG_NULL not handled?
/*!
This is described in Part 5, section 7.1
*/
void Decoder::DecodeElement()
{
Tag tag;
buffer_ >> tag;
if(tag==TAG_ITEM_DELIM_ITEM)
{
UINT32 dummy_length;
buffer_>> dummy_length; //deliminator item always has length 0x00000000;
throw EndOfSequence(); //ie jump to end of loop in Decoder::Decode()
//This is a bit of a hack, not an optimal approach to handling
//this kind of situation, as it's not really 'unexpected'.
}
VR vr;
UINT32 length;
DecodeVRAndLength(tag,vr,length);
if(tag==TAG_NULL)
{
//cout<< "null tag, length=" << length << endl;
//what am I supposed to do here?(I think this has something to do with group length, which we're ignoring)
//I think this gets sent at the beginning of a DICOM message. TODO
}
if (vr == VR_SQ || (vr==VR_UN && length == UNDEFINED_LENGTH))//See Part 5, section 6.2.2, Notes 4
//if (vr == VR_SQ)//is this correct?
{
return DecodeSequence(tag,length);
}
if(UNDEFINED_LENGTH!=length)
CheckEven(length);
if(TAG_DATA_SET_PADDING==tag)
{//throw away padding - we don't maintain it.
vector<unsigned char> dummy(length);
buffer_>>dummy;
return;
}
/*
use the 'switch' statement to explicitly instantiate instances
of GetElementValue templated on required VR.
*/
/*
Now we read the relevant data from the byte stream, transfrom it
to the correct C++ type and push it onto the data set.
*/
/*
Part5, Annex A actually says we have to do some magic on a bunch of the
OB/OW tags, depending on the transfer syntax, but currently we're ignoring
that, and letting the end user sort it out.
*/
switch(vr)
{
case VR_AT://attribute tag??
GetElementValue<VR_AT>(tag,length);
return;
case VR_US://Unsigned Short
GetElementValue<VR_US>(tag,length);
return;
case VR_SS://signed short
GetElementValue<VR_SS>(tag,length);
return;
case VR_OB://other byte string
return DecodeOB(tag,length);
case VR_OW://other word string
{
TypeFromVR<VR_OW>::Type data((length>>1),0);// divide length by 2 because this is an array of
// two-byte characters, and 'length' is the number
// of bytes.
buffer_>>data;
dataset_.Put<VR_OW>(tag,data);
}
break;
case VR_UL://unsigned long
GetElementValue<VR_UL>(tag,length);
return;
case VR_SL://signed long
GetElementValue<VR_SL>(tag,length);
return;
case VR_FL://float
GetElementValue<VR_FL>(tag,length);
return;
case VR_FD://double
GetElementValue<VR_FD>(tag,length);
return;
case VR_UI://unique identifier.
return DecodeUID(tag,length);
case VR_DA://date
return DecodeString<VR_DA>(tag,length);
case VR_UN:
{
if(length>0)///////////////////////////////////////////////////
{
vector <BYTE> v (length);
buffer_>>v;
dataset_.Put<VR_UN>(tag,v);
}
}
break;
//all the string types!
//(This list will get shorter as we introduce more intelligent types for
// some VR's, e.g. a proper Date type,)
case VR_AS:
return DecodeString<VR_AS>(tag,length);
case VR_CS:
return DecodeString<VR_CS>(tag,length);
case VR_DS:
return DecodeString<VR_DS>(tag,length);
case VR_DT:
return DecodeString<VR_DT>(tag,length);//really should be a datetime object!!
case VR_LO:
return DecodeString<VR_LO>(tag,length);
case VR_LT:
return DecodeString<VR_LT>(tag,length);
case VR_PN:
return DecodeString<VR_PN>(tag,length);
case VR_SH:
return DecodeString<VR_SH>(tag,length);
case VR_ST:
return DecodeString<VR_ST>(tag,length);
case VR_TM:
return DecodeString<VR_TM>(tag,length);
case VR_UT:
return DecodeString<VR_UT>(tag,length);
case VR_IS:
return DecodeString<VR_IS>(tag,length);
case VR_AE:
return DecodeString<VR_AE>(tag,length);
default:
cout << "Unknown VR: " << UINT32(vr) << " in DecodeElement()" << endl;
cout << "Tag is: " << UINT32(tag) << " dataset size is " << dataset_.size() << endl;
throw UnknownVR(vr);
}
}
/*!
Sequences are described in Part 5, section 7.5.
*/
void Decoder::DecodeSequence(Tag SequenceTag,UINT32 SequenceLength)
{
//need to keep track of bytes read if SequenceLength is not UNDEFINED_LENGTH
Sequence sequence;
UINT32 BytesLeftToRead=SequenceLength;
while(BytesLeftToRead>0)
{
UINT16 Group;
UINT16 Element;
buffer_>>Group;
buffer_>>Element;
Tag tag=makeTag(Group,Element);
UINT32 ItemLength;
buffer_ >> ItemLength;
if(BytesLeftToRead!=UNDEFINED_LENGTH)
BytesLeftToRead -= 8;
switch (tag)
{
case TAG_ITEM:
{
DataSet data;
if(ItemLength!=UNDEFINED_LENGTH)
{
/* make a copy of relevant bit of buffer.
* This is time-intensive, but simplifies the code considerably.
*/
Buffer b(buffer_.GetEndian());
std::copy(buffer_.position(),buffer_.position()+ItemLength,std::back_inserter(b));
Decoder D(b,data,ts_);
D.Decode();
//buffer_.position()+=ItemLength;
buffer_.Increment(ItemLength);
if(BytesLeftToRead!=UNDEFINED_LENGTH)
BytesLeftToRead-=ItemLength;
}
else
{
/*
Just feed in current buffer and trust system to correctly increment I
*/
Buffer::iterator I=buffer_.position();
Decoder D(buffer_,data,ts_);
D.Decode();
UINT32 BytesRead=buffer_.position()-I;
if(BytesLeftToRead!=UNDEFINED_LENGTH)
BytesLeftToRead-=BytesRead;
}
sequence.push_back(data);
if(BytesLeftToRead==0)
{
dataset_.Put<VR_SQ>(SequenceTag,sequence);
return;
}
}
break;
case TAG_SEQ_DELIM_ITEM:
//maybe check that length is 0x00000000
if(ItemLength!=0)
cout<<"sequence delimination item length should really be zero." << endl;
//we're done
dataset_.Put<VR_SQ>(SequenceTag,sequence);
return;
default:
cout << "some funny tag in DecodeSequence() :" << tag << endl;
throw UnknownTag(tag);
}
}
dataset_.Put<VR_SQ>(SequenceTag,sequence);
}
void Decoder::Decode()
{
try
{
while(buffer_.position()!=buffer_.end())
DecodeElement();
}
catch(EndOfSequence)
{
return;
}
}
/**
When an exception is caught, just skip the tag element which causes the problem
and then jump to the next tag keeping the decode process going on.
More catch blocks might be added.
Mei Ge, June 2006
*/
void Decoder::NonStandardDecode()
{
try
{
while(buffer_.position()!=buffer_.end())
DecodeElement();
}
catch(ReadBeyondBuffer e)
{
cout<<e.what()<<endl;
return;
}
catch(EndOfSequence)
{
return;
}
catch(DecoderError e)
{
NonStandardDecode();
}
catch(UnknownTag e)
{
NonStandardDecode();
}
catch(InvalidUID e)
{
NonStandardDecode();
}
}
/** For Cumulus3, we definately need to open those files with small problems.
* Also, it won't cause problems for you guys.
* Mei, Nov 2006
*/
// void ReadFromBuffer(Buffer& buffer, DataSet& data, TS transfer_syntax)
// {
// Decoder d(buffer,data,transfer_syntax);
// d.NonStandardDecode();
// }
/*
[email protected] / December 2012
For now I've removed the above hack, because I want to write a more robust mechanism
for handling 'incorrect' files. This should ideally be handled in the context of a user
option; for example a 'strict' mode and a 'lax' mode for the parser. Furthermore we
really need a solid suite of sample files to test against.
*/
void ReadFromBuffer(Buffer& buffer, DataSet& data, TS transfer_syntax)
{
Decoder d(buffer,data,transfer_syntax);
d.Decode();
}
void ReadElementFromBuffer(Buffer& buffer, DataSet& ds,TS transfer_syntax)
{
Decoder d(buffer,ds,transfer_syntax);
d.DecodeElement();
}
};//namespace dicom
/*
Some notes on data time:
Part5/Table 6.2-1 specifies how VR_DT is encoded.
We're not currently converting this interally to a boost time object, but
hopefully we will in the future.
However boost doesn't have a mechanism for managing Coordinated Universal Time
offsets, so it might be safest to just leave it as a string. If we do that,
then we should probably also make VR_DA stay as a string, and get rid of
the dependency on boost::date_time entirely?
*/