diff --git a/docs/apireference.rst b/docs/apireference.rst
index 2634a9a..f3fef76 100644
--- a/docs/apireference.rst
+++ b/docs/apireference.rst
@@ -18,6 +18,15 @@ Reading TDMS Files
   :members:
   :exclude-members: group, channel, has_data, property, number_values
 
+.. autoclass:: DataChunk()
+  :members:
+
+.. autoclass:: GroupDataChunk()
+  :members:
+
+.. autoclass:: ChannelDataChunk()
+  :members:
+
 Writing TDMS Files
 ------------------
 
diff --git a/docs/reading.rst b/docs/reading.rst
index 99dbe00..7a53087 100644
--- a/docs/reading.rst
+++ b/docs/reading.rst
@@ -79,6 +79,23 @@ For example, to read 200 data points, beginning at offset 1,000::
         length = 200
         channel_data = channel.read_data(offset, length)
 
+Alternatively, you may have an application where you wish to stream all data chunk by chunk.
+:py:meth:`~nptdms.TdmsFile.data_chunks` is a generator that produces instances of
+:py:class:`~nptdms.DataChunk`, which can be used after opening a TDMS file with
+:py:meth:`~nptdms.TdmsFile.open`.
+For example, to compute the mean of a channel::
+
+    channel_sum = 0.0
+    channel_length = 0
+    with TdmsFile.open(tdms_file_path) as tdms_file:
+        for chunk in tdms_file.data_chunks():
+            channel_chunk = chunk[group_name][channel_name]
+            channel_length += len(channel_chunk)
+            channel_sum += channel_chunk[:].sum()
+    channel_mean = channel_sum / channel_length
+
+This approach can also be useful to stream TDMS data to another format on disk or into a data store.
+
 In cases where you don't need to read the file data and only need to read metadata, you can
 also use the static :py:meth:`~nptdms.TdmsFile.read_metadata` method::
 
diff --git a/docs/writing.rst b/docs/writing.rst
index 0620af3..e4d4eca 100644
--- a/docs/writing.rst
+++ b/docs/writing.rst
@@ -7,22 +7,22 @@ speeding up the writing of files and minimising file size are not
 implemented by npTDMS, but the basic functionality required to
 write TDMS files is available.
 
-To write a TDMS file, the :py:class:`nptdms.TdmsWriter` class is used, which
+To write a TDMS file, the :py:class:`~nptdms.TdmsWriter` class is used, which
 should be used as a context manager.
-The ``__init__`` method accepts the path to the file to create, or a file
+The :py:meth:`~nptdms.TdmsWriter.__init__` method accepts the path to the file to create, or a file
 that has already been opened in binary write mode::
 
     with TdmsWriter("my_file.tdms") as tdms_writer:
         # write data
 
-The :py:meth:`nptdms.TdmsWriter.write_segment` method is used to write
+The :py:meth:`~nptdms.TdmsWriter.write_segment` method is used to write
 a segment of data to the TDMS file. Because the TDMS file format is designed
 for streaming data applications, it supports writing data one segment at a time
 as data becomes available.
 If you don't require this functionality you can simple call ``write_segment`` once
 with all of your data.
 
-The ``write_segment`` method takes a list of objects, each of which must be an
+The :py:meth:`~nptdms.TdmsWriter.write_segment` method takes a list of objects, each of which must be an
 instance of one of:
 
 - :py:class:`nptdms.RootObject`. This is the TDMS root object, and there may only be one root object in a segment.
@@ -69,7 +69,7 @@ is given below::
             channel_object])
 
 You could also read a TDMS file and then re-write it by passing
-:py:class:`nptdms.TdmsGroup` and :py:class:`nptdms.TdmsChannel`
+:py:class:`~nptdms.TdmsGroup` and :py:class:`~nptdms.TdmsChannel`
 instances to the ``write_segment`` method. If you want
 to only copy certain channels for example, you could do something like::
 
@@ -83,3 +83,6 @@ to only copy certain channels for example, you could do something like::
         root_object = RootObject(original_file.properties)
         channels_to_copy = [chan for chan in original_channels if include_channel(chan)]
         copied_file.write_segment([root_object] + original_groups + channels_to_copy)
+
+Note that this isn't suitable for copying channels with scaled data, as the channel data
+will already have scaling applied.
diff --git a/nptdms/__init__.py b/nptdms/__init__.py
index 12bf7bf..f39c6a5 100644
--- a/nptdms/__init__.py
+++ b/nptdms/__init__.py
@@ -7,5 +7,5 @@
 from .version import __version_info__, __version__
 
 # Export public objects
-from .tdms import TdmsFile, TdmsGroup, TdmsChannel
+from .tdms import TdmsFile, TdmsGroup, TdmsChannel, DataChunk, GroupDataChunk, ChannelDataChunk
 from .writer import TdmsWriter, RootObject, GroupObject, ChannelObject
diff --git a/nptdms/base_segment.py b/nptdms/base_segment.py
index d8792aa..c56958c 100644
--- a/nptdms/base_segment.py
+++ b/nptdms/base_segment.py
@@ -182,12 +182,12 @@ def _read_object_properties(self, file, object_path):
     def read_raw_data(self, f):
         """Read raw data from a TDMS segment
 
-        :returns: A generator of DataChunk objects with raw channel data for
+        :returns: A generator of RawDataChunk objects with raw channel data for
             objects in this segment.
         """
 
         if not self.toc_mask & toc_properties['kTocRawData']:
-            yield DataChunk.empty()
+            yield RawDataChunk.empty()
 
         f.seek(self.data_position)
 
@@ -207,12 +207,12 @@ def read_raw_data_for_channel(self, f, channel_path, chunk_offset=0, num_chunks=
         :param channel_path: Path of channel to read data for
         :param chunk_offset: Index of chunk to begin reading from
         :param num_chunks: Number of chunks to read, or None to read to the end
-        :returns: A generator of ChannelDataChunk objects with raw channel data for
+        :returns: A generator of RawChannelDataChunk objects with raw channel data for
             a single channel in this segment.
         """
 
         if not self.toc_mask & toc_properties['kTocRawData']:
-            yield ChannelDataChunk.empty()
+            yield RawChannelDataChunk.empty()
 
         f.seek(self.data_position)
 
@@ -277,14 +277,9 @@ def _read_channel_data_chunk(self, file, data_objects, chunk_index, channel_path
         # Derived classes can implement more optimised reading.
         data_chunk = self._read_data_chunk(file, data_objects, chunk_index)
         try:
-            if data_chunk.raw_data:
-                return ChannelDataChunk.channel_data(data_chunk.raw_data[channel_path])
-            elif data_chunk.daqmx_raw_data:
-                return ChannelDataChunk.scaler_data(data_chunk.daqmx_raw_data[channel_path])
-            else:
-                return ChannelDataChunk.empty()
+            return data_chunk.channel_data[channel_path]
         except KeyError:
-            return ChannelDataChunk.empty()
+            return RawChannelDataChunk.empty()
 
     def _new_segment_object(self, object_path):
         """ Create a new segment object for a segment
@@ -325,56 +320,67 @@ def scaler_data_types(self):
         return None
 
 
-class DataChunk(object):
+class RawDataChunk(object):
     """Data read from a single chunk in a TDMS segment
 
-    :ivar raw_data: A dictionary of object data in this chunk for standard
-        TDMS channels. Keys are object paths and values are numpy arrays.
-    :ivar daqmx_raw_data: A dictionary of data in this segment for
-        DAQmx raw data. Keys are object paths and values are dictionaries of
-        numpy arrays keyed by scaler id.
+    :ivar channel_data: A dictionary of channel data chunks.
+        Keys are object paths and values are RawChannelDataChunk instances.
     """
 
-    def __init__(self, data, daqmx_data):
-        self.raw_data = data
-        self.daqmx_raw_data = daqmx_data
+    def __init__(self, channel_data):
+        self.channel_data = channel_data
 
     @staticmethod
     def empty():
-        return DataChunk({}, {})
+        return RawDataChunk({})
 
     @staticmethod
     def channel_data(data):
-        return DataChunk(data, {})
+        channel_chunks = {
+            path: RawChannelDataChunk.channel_data(d)
+            for (path, d) in data.items()
+        }
+        return RawDataChunk(channel_chunks)
 
     @staticmethod
     def scaler_data(data):
-        return DataChunk({}, data)
+        channel_chunks = {
+            path: RawChannelDataChunk.scaler_data(d)
+            for (path, d) in data.items()
+        }
+        return RawDataChunk(channel_chunks)
 
 
-class ChannelDataChunk(object):
+class RawChannelDataChunk(object):
     """Data read for a single channel from a single chunk in a TDMS segment
 
-    :ivar raw_data: Raw data in this chunk for a standard TDMS channel.
-    :ivar daqmx_raw_data: A dictionary of scaler data in this segment for
+    :ivar data: Raw data in this chunk for a standard TDMS channel.
+    :ivar scaler_data: A dictionary of scaler data in this segment for
         DAQmx raw data. Keys are the scaler id and values are data arrays.
     """
 
-    def __init__(self, data, daqmx_data):
-        self.raw_data = data
-        self.daqmx_raw_data = daqmx_data
+    def __init__(self, data, scaler_data):
+        self.data = data
+        self.scaler_data = scaler_data
+
+    def __len__(self):
+        if self.data is not None:
+            return len(self.data)
+        elif self.scaler_data is not None:
+            return next(len(d) for d in self.scaler_data.values())
+        return 0
 
     @staticmethod
     def empty():
-        return ChannelDataChunk(None, None)
+        return RawChannelDataChunk(None, None)
 
     @staticmethod
     def channel_data(data):
-        return ChannelDataChunk(data, None)
+        return RawChannelDataChunk(data, None)
 
     @staticmethod
     def scaler_data(data):
-        return ChannelDataChunk(None, data)
+        return RawChannelDataChunk(None, data)
 
 
 def read_property(f, endianness="<"):
diff --git a/nptdms/channel_data.py b/nptdms/channel_data.py
index de967f7..191213a 100644
--- a/nptdms/channel_data.py
+++ b/nptdms/channel_data.py
@@ -25,7 +25,7 @@ def get_data_receiver(obj, num_values, memmap_dir=None):
         return DaqmxDataReceiver(obj, num_values, memmap_dir)
 
     if obj.data_type.nptype is None:
-        return ListDataReceiver()
+        return ListDataReceiver(obj)
 
     return NumpyDataReceiver(obj, num_values, memmap_dir)
 
@@ -37,9 +37,13 @@ class ListDataReceiver(object):
        :ivar data: List of data points
     """
 
-    def __init__(self):
+    def __init__(self, channel):
         """Initialise new data receiver for a TDMS object
         """
+        if channel.data_type == types.String:
+            self._dtype = np.dtype('O')
+        else:
+            self._dtype = None
         self._data = []
         self.scaler_data = {}
 
@@ -50,7 +54,7 @@ def append_data(self, data):
 
     @property
     def data(self):
-        return np.array(self._data)
+        return np.array(self._data, dtype=self._dtype)
 
 
 class NumpyDataReceiver(object):
diff --git a/nptdms/daqmx.py b/nptdms/daqmx.py
index 6171c09..2c63a37 100644
--- a/nptdms/daqmx.py
+++ b/nptdms/daqmx.py
@@ -3,7 +3,7 @@
 
 from nptdms import types
 from nptdms.base_segment import (
-    BaseSegment, BaseSegmentObject, DataChunk, read_interleaved_segment_bytes)
+    BaseSegment, BaseSegmentObject, RawDataChunk, read_interleaved_segment_bytes)
 from nptdms.log import log_manager
 
 
@@ -78,7 +78,7 @@ def _read_data_chunk(self, file, data_objects, chunk_index):
                         scaler.data_type.nptype.newbyteorder(self.endianness))
                     scaler_data[obj.path][scaler.scale_id] = this_scaler_data
 
-        return DataChunk.scaler_data(scaler_data)
+        return RawDataChunk.scaler_data(scaler_data)
 
 
 class DaqmxSegmentObject(BaseSegmentObject):
diff --git a/nptdms/export/hdf_export.py b/nptdms/export/hdf_export.py
index dadd2dd..77b9a5e 100644
--- a/nptdms/export/hdf_export.py
+++ b/nptdms/export/hdf_export.py
@@ -23,7 +23,6 @@ def from_tdms_file(tdms_file, filepath, mode='w', group='/'):
 
     h5file = h5py.File(filepath, mode)
 
-    container_group = None
     if group in h5file:
         container_group = h5file[group]
     else:
@@ -34,7 +33,8 @@ def from_tdms_file(tdms_file, filepath, mode='w', group='/'):
         container_group.attrs[property_name] = _hdf_attr_value(property_value)
 
     # Now iterate through groups and channels,
-    # writing the properties and data
+    # writing the properties and creating data sets
+    datasets = {}
     for group in tdms_file.groups():
         # Write the group's properties
         container_group.create_group(group.name)
@@ -47,26 +47,47 @@ def from_tdms_file(tdms_file, filepath, mode='w', group='/'):
 
             if channel.data_type is types.String:
                 # Encode as variable length UTF-8 strings
-                channel_data = container_group.create_dataset(
-                    channel_key, (len(channel.data),), dtype=h5py.string_dtype())
-                channel_data[...] = channel.data
+                datasets[channel.path] = container_group.create_dataset(
+                    channel_key, (len(channel),), dtype=h5py.string_dtype())
             elif channel.data_type is types.TimeStamp:
                 # Timestamps are represented as fixed length ASCII strings
                 # because HDF doesn't natively support timestamps
-                channel_data = container_group.create_dataset(
-                    channel_key, (len(channel.data),), dtype='S27')
-                string_data = np.datetime_as_string(channel.data, unit='us', timezone='UTC')
-                encoded_data = [s.encode('ascii') for s in string_data]
-                channel_data[...] = encoded_data
+                datasets[channel.path] = container_group.create_dataset(
+                    channel_key, (len(channel),), dtype='S27')
             else:
-                container_group[channel_key] = channel.data
+                datasets[channel.path] = container_group.create_dataset(
+                    channel_key, (len(channel),), dtype=channel.dtype)
 
             for prop_name, prop_value in channel.properties.items():
                 container_group[channel_key].attrs[prop_name] = _hdf_attr_value(prop_value)
 
+    # Set data
+    if tdms_file.data_read:
+        for group in tdms_file.groups():
+            for channel in group.channels():
+                datasets[channel.path][...] = _hdf_array(channel, channel.data)
+    else:
+        # Data hasn't been read into memory, stream it from disk
+        for chunk in tdms_file.data_chunks():
+            for group in chunk.groups():
+                for channel_chunk in group.channels():
+                    channel = tdms_file[group.name][channel_chunk.name]
+                    offset = channel_chunk.offset
+                    end = offset + len(channel_chunk)
+                    datasets[channel.path][offset:end] = _hdf_array(channel, channel_chunk[:])
+
     return h5file
 
 
+def _hdf_array(channel, data):
+    """ Convert data array into a format suitable for initialising HDF data
+    """
+    if channel.data_type is types.TimeStamp:
+        string_data = np.datetime_as_string(data, unit='us', timezone='UTC')
+        return [s.encode('ascii') for s in string_data]
+    return data
+
+
 def _hdf_attr_value(value):
     """ Convert a value into a format suitable for an HDF attribute
     """
diff --git a/nptdms/reader.py b/nptdms/reader.py
index c2f5b1f..c9a07c9 100644
--- a/nptdms/reader.py
+++ b/nptdms/reader.py
@@ -4,7 +4,7 @@
 import numpy as np
 from nptdms.utils import Timer, OrderedDict
 from nptdms.tdms_segment import read_segment_metadata
-from nptdms.base_segment import ChannelDataChunk
+from nptdms.base_segment import RawChannelDataChunk
 from nptdms.log import log_manager
 
 log = log_manager.get_logger(__name__)
@@ -75,7 +75,7 @@ def read_metadata(self):
     def read_raw_data(self):
         """ Read raw data from all segments, chunk by chunk
 
-        :returns: A generator that yields DataChunk objects
+        :returns: A generator that yields RawDataChunk objects
         """
         if self._segments is None:
             raise RuntimeError(
@@ -92,7 +92,7 @@ def read_raw_data_for_channel(self, channel_path, offset=0, length=None):
         :param length: Number of values to attempt to read.
             If None, then all values starting from the offset will be read.
             Fewer values will be returned if attempting to read beyond the end of the available data.
-        :returns: A generator that yields ChannelDataChunk objects
+        :returns: A generator that yields RawChannelDataChunk objects
         """
         if self._segments is None:
             raise RuntimeError("Cannot read data unless metadata has first been read")
@@ -262,12 +262,12 @@ def __init__(self):
 def _trim_channel_chunk(chunk, skip=0, trim=0):
     if skip == 0 and trim == 0:
         return chunk
-    raw_data = None
-    daqmx_raw_data = None
-    if chunk.raw_data is not None:
-        raw_data = chunk.raw_data[skip:len(chunk.raw_data) - trim]
-    if chunk.daqmx_raw_data is not None:
-        daqmx_raw_data = {
+    data = None
+    scaler_data = None
+    if chunk.data is not None:
+        data = chunk.data[skip:len(chunk.data) - trim]
+    if chunk.scaler_data is not None:
+        scaler_data = {
             scale_id: d[skip:len(d) - trim]
-            for (scale_id, d) in chunk.daqmx_raw_data.items()}
-    return ChannelDataChunk(raw_data, daqmx_raw_data)
+            for (scale_id, d) in chunk.scaler_data.items()}
+    return RawChannelDataChunk(data, scaler_data)
diff --git a/nptdms/scaling.py b/nptdms/scaling.py
index 6b2eddb..6e7996d 100644
--- a/nptdms/scaling.py
+++ b/nptdms/scaling.py
@@ -61,6 +61,8 @@ def from_properties(properties, scale_index):
         return PolynomialScaling(coefficients, input_source)
 
     def scale(self, data):
+        # Ensure data is double type before scaling
+        data = data.astype(np.dtype('float64'), copy=False)
         return np.polynomial.polynomial.polyval(data, self.coefficients)
 
 
@@ -229,7 +231,7 @@ def scale(self, data):
         """ Convert voltage data to temperature in Kelvin
         """
         # Ensure data is double precision
-        data = data.astype(np.float64, copy=False)
+        data = data.astype(np.dtype('float64'), copy=False)
         if self.excitation_type == CURRENT_EXCITATION:
             r_t = data / self.excitation_value
         elif self.excitation_type == VOLTAGE_EXCITATION:
@@ -376,6 +378,26 @@ def scale(self, raw_channel_data):
         final_scale = len(self.scalings) - 1
         return self._compute_scaled_data(final_scale, raw_channel_data)
 
+    def get_dtype(self, raw_data_type, scaler_data_types):
+        """ Get the numpy dtype for scaled data
+        """
+        final_scale = len(self.scalings) - 1
+        return self._compute_scale_dtype(final_scale, raw_data_type, scaler_data_types)
+
+    def _compute_scale_dtype(self, scale_index, raw_data_type, scaler_data_types):
+        if scale_index == RAW_DATA_INPUT_SOURCE:
+            return raw_data_type.nptype
+        scaling = self.scalings[scale_index]
+        if isinstance(scaling, DaqMxScalerScaling):
+            return scaler_data_types[scaling.scale_id].nptype
+        elif isinstance(scaling, AddScaling) or isinstance(scaling, SubtractScaling):
+            return np.result_type(
+                self._compute_scale_dtype(scaling.left_input_source, raw_data_type, scaler_data_types),
+                self._compute_scale_dtype(scaling.right_input_source, raw_data_type, scaler_data_types))
+        else:
+            # Any other scaling type should produce double data
+            return np.dtype('float64')
+
     def _compute_scaled_data(self, scale_index, raw_channel_data):
         """ Compute output data from a single scale in the set of all scalings,
             computing any required input scales recursively.
diff --git a/nptdms/tdms.py b/nptdms/tdms.py
index 54a4889..02d35da 100644
--- a/nptdms/tdms.py
+++ b/nptdms/tdms.py
@@ -3,16 +3,18 @@
     This module contains the public facing API for reading TDMS files
 """
 
+from collections import defaultdict
 import warnings
 import numpy as np
 
-from nptdms import scaling
+from nptdms import scaling, types
 from nptdms.utils import Timer, OrderedDict
 from nptdms.log import log_manager
 from nptdms.common import ObjectPath
 from nptdms.reader import TdmsReader
 from nptdms.channel_data import get_data_receiver
 from nptdms.export import hdf_export, pandas_export
+from nptdms.base_segment import RawChannelDataChunk
 
 
 log = log_manager.get_logger(__name__)
@@ -91,6 +93,7 @@ def __init__(self, file, memmap_dir=None, read_metadata_only=False, keep_open=Fa
         self._properties = {}
         self._channel_data = {}
         self._reader = None
+        self.data_read = False
 
         reader = TdmsReader(file)
         try:
@@ -144,6 +147,21 @@ def as_hdf(self, filepath, mode='w', group='/'):
         """
         return hdf_export.from_tdms_file(self, filepath, mode, group)
 
+    def data_chunks(self):
+        """ A generator that streams chunks of data from disk.
+        This method may only be used when the TDMS file was opened without reading all data immediately.
+
+        :rtype: Generator that yields :class:`DataChunk` objects
+        """
+        if self._reader is None:
+            raise RuntimeError(
+                "Cannot read data chunks after the underlying TDMS reader is closed")
+        channel_offsets = defaultdict(int)
+        for chunk in self._reader.read_raw_data():
+            yield DataChunk(self, chunk, channel_offsets)
+            for path, data in chunk.channel_data.items():
+                channel_offsets[path] += len(data)
+
     def close(self):
         """ Close the underlying file if it was opened by this TdmsFile
 
@@ -219,14 +237,13 @@ def _read_data(self, tdms_reader):
         with Timer(log, "Read data"):
             # Now actually read all the data
             for chunk in tdms_reader.read_raw_data():
-                for (path, data) in chunk.raw_data.items():
-                    channel_data = self._channel_data[path]
-                    channel_data.append_data(data)
-                for (path, data) in chunk.daqmx_raw_data.items():
+                for (path, data) in chunk.channel_data.items():
                     channel_data = self._channel_data[path]
-                    for scaler_id, scaler_data in data.items():
-                        channel_data.append_scaler_data(
-                            scaler_id, scaler_data)
+                    if data.data is not None:
+                        channel_data.append_data(data.data)
+                    elif data.scaler_data is not None:
+                        for scaler_id, scaler_data in data.scaler_data.items():
+                            channel_data.append_scaler_data(scaler_id, scaler_data)
 
             for group in self.groups():
                 for channel in group.channels():
@@ -234,6 +251,8 @@ def _read_data(self, tdms_reader):
                     if channel_data is not None:
                         channel._set_raw_data(channel_data)
 
+        self.data_read = True
+
     def _read_channel_data(self, channel, offset=0, length=None):
         if offset < 0:
             raise ValueError("offset must be non-negative")
@@ -255,10 +274,10 @@ def _read_channel_data(self, channel, offset=0, length=None):
         with Timer(log, "Read data"):
             # Now actually read all the data
             for chunk in self._reader.read_raw_data_for_channel(channel.path, offset, length):
-                if chunk.raw_data is not None:
-                    channel_data.append_data(chunk.raw_data)
-                if chunk.daqmx_raw_data is not None:
-                    for scaler_id, scaler_data in chunk.daqmx_raw_data.items():
+                if chunk.data is not None:
+                    channel_data.append_data(chunk.data)
+                if chunk.scaler_data is not None:
+                    for scaler_id, scaler_data in chunk.scaler_data.items():
                         channel_data.append_scaler_data(scaler_id, scaler_data)
 
         return channel_data
@@ -489,6 +508,24 @@ def name(self):
         """
         return self._path.channel
 
+    @_property_builtin
+    def dtype(self):
+        """ NumPy data type of the channel data
+
+        For data with a scaling this is the data type of the scaled data
+
+        :rtype: numpy.dtype
+        """
+        if self.data_type is types.String:
+            return np.dtype('O')
+        elif self.data_type is types.TimeStamp:
+            return np.dtype('<M8[us]')
+
+        channel_scaling = self._get_scaling()
+        if channel_scaling is not None:
+            return channel_scaling.get_dtype(self.data_type, self.scaler_data_types)
+        return self.data_type.nptype
+
     @_property_builtin
     def data(self):
         """
@@ -498,7 +535,7 @@ def data(self):
             raise RuntimeError("Channel data has not been read")
 
         if self._raw_data is None:
-            return np.empty((0, 1))
+            return np.empty((0, ))
         if self._data_scaled is None:
             self._data_scaled = self._scale_data(self._raw_data)
         return self._data_scaled
@@ -513,7 +550,7 @@ def raw_data(self):
             raise RuntimeError("Channel data has not been read")
 
         if self._raw_data is None:
-            return np.empty((0, 1))
+            return np.empty((0, ))
         if self._raw_data.scaler_data:
             if len(self._raw_data.scaler_data) == 1:
                 return next(v for v in self._raw_data.scaler_data.values())
@@ -693,6 +730,114 @@ def number_values(self):
         return self._length
 
 
+class DataChunk(object):
+    """ A chunk of data in a TDMS file
+
+    Can be indexed by group name to get the data for a group in this channel,
+    which can then be indexed by channel name to get the data for a channel in this chunk.
+    For example::
+
+        group_chunk = data_chunk[group_name]
+        channel_chunk = group_chunk[channel_name]
+    """
+    def __init__(self, tdms_file, raw_data_chunk, channel_offsets):
+        self._groups = OrderedDict(
+            (group.name, GroupDataChunk(tdms_file, group, raw_data_chunk, channel_offsets))
+            for group in tdms_file.groups())
+
+    def __getitem__(self, group_name):
+        return self._groups[group_name]
+
+    def groups(self):
+        """ Returns chunks of data for a group
+
+        :rtype: List of :class:`GroupDataChunk`
+        """
+        return list(self._groups.values())
+
+
+class GroupDataChunk(object):
+    """ A chunk of data for a group in a TDMS file
+
+    Can be indexed by channel name to get the data for a channel in this chunk.
+    For example::
+
+        channel_chunk = group_chunk[channel_name]
+
+    :ivar ~.name: Name of the group
+    """
+    def __init__(self, tdms_file, group, raw_data_chunk, channel_offsets):
+        self.name = group.name
+        self._channels = OrderedDict(
+            (channel.name, ChannelDataChunk(tdms_file, channel, raw_data_chunk, channel_offsets[channel.path]))
+            for channel in group.channels())
+
+    def __getitem__(self, channel_name):
+        return self._channels[channel_name]
+
+    def channels(self):
+        """ Returns chunks of channel data
+
+        :rtype: List of :class:`ChannelDataChunk`
+        """
+        return list(self._channels.values())
+
+
+class ChannelDataChunk(object):
+    """ A chunk of data for a channel in a TDMS file
+
+    Is an array-like object that supports indexing to access data, for example::
+
+        chunk_length = len(channel_data_chunk)
+        chunk_data = channel_data_chunk[:]
+
+    :ivar ~.name: Name of the channel
+    :ivar ~.offset: Starting index of this chunk of data in the entire channel
+    """
+    def __init__(self, tdms_file, channel, raw_data_chunk, offset):
+        self._path = channel._path
+        self._tdms_file = tdms_file
+        self._channel = channel
+        self.name = channel.name
+        self.offset = offset
+        try:
+            self._raw_data = raw_data_chunk.channel_data[channel.path]
+        except KeyError:
+            self._raw_data = RawChannelDataChunk.empty()
+        self._scaled_data = None
+
+    def __len__(self):
+        return len(self._raw_data)
+
+    def __getitem__(self, index):
+        return self._data()[index]
+
+    def __iter__(self):
+        return iter(self._data())
+
+    def _data(self):
+        if self._scaled_data is not None:
+            return self._scaled_data
+        if self._raw_data.data is None and self._raw_data.scaler_data is None:
+            return np.empty((0, ))
+
+        scale = self._get_scaling()
+        if scale is not None:
+            self._scaled_data = scale.scale(self._raw_data)
+        elif self._raw_data.scaler_data:
+            raise ValueError("Missing scaling information for DAQmx data")
+        else:
+            self._scaled_data = self._raw_data.data
+        return self._scaled_data
+
+    def _get_scaling(self):
+        file_properties = self._tdms_file.properties
+        group_properties = self._tdms_file[self._path.group].properties
+        channel_properties = self._channel.properties
+        return scaling.get_scaling(
+            channel_properties, group_properties, file_properties)
+
+
 class RootObject(object):
     def __init__(self, properties):
         self.properties = properties
diff --git a/nptdms/tdms_segment.py b/nptdms/tdms_segment.py
index 8ed9874..972f2df 100644
--- a/nptdms/tdms_segment.py
+++ b/nptdms/tdms_segment.py
@@ -7,8 +7,8 @@
 from nptdms.base_segment import (
     BaseSegment,
     BaseSegmentObject,
-    ChannelDataChunk,
-    DataChunk,
+    RawChannelDataChunk,
+    RawDataChunk,
     read_interleaved_segment_bytes,
     fromfile)
 from nptdms.daqmx import DaqmxSegment
@@ -152,7 +152,7 @@ def _read_interleaved_numpy(self, file, data_objects):
             channel_data[obj.path] = object_data
             data_pos += obj.data_type.size
 
-        return DataChunk.channel_data(channel_data)
+        return RawDataChunk.channel_data(channel_data)
 
     def _read_interleaved(self, file, data_objects):
         """Read interleaved data that doesn't have a numpy type"""
@@ -171,7 +171,7 @@ def _read_interleaved(self, file, data_objects):
                         obj.read_value(file))
                     points_added[obj.path] += 1
 
-        return DataChunk.channel_data(object_data)
+        return RawDataChunk.channel_data(object_data)
 
 
 class ContiguousDataSegment(BaseSegment):
@@ -189,16 +189,16 @@ def _read_data_chunk(self, file, data_objects, chunk_index):
         for obj in data_objects:
             number_values = self._get_channel_number_values(obj, chunk_index)
             object_data[obj.path] = obj.read_values(file, number_values)
-        return DataChunk.channel_data(object_data)
+        return RawDataChunk.channel_data(object_data)
 
     def _read_channel_data_chunk(self, file, data_objects, chunk_index, channel_path):
         """ Read data from a chunk for a single channel
         """
-        channel_data = ChannelDataChunk.empty()
+        channel_data = RawChannelDataChunk.empty()
         for obj in data_objects:
             number_values = self._get_channel_number_values(obj, chunk_index)
             if obj.path == channel_path:
-                channel_data = ChannelDataChunk.channel_data(obj.read_values(file, number_values))
+                channel_data = RawChannelDataChunk.channel_data(obj.read_values(file, number_values))
             elif number_values == obj.number_values:
                 # Seek over data for other channel data
                 file.seek(obj.data_size, os.SEEK_CUR)
diff --git a/nptdms/tdmsinfo.py b/nptdms/tdmsinfo.py
index b3f1194..47780ab 100644
--- a/nptdms/tdmsinfo.py
+++ b/nptdms/tdmsinfo.py
@@ -45,8 +45,7 @@ def tdmsinfo(file, show_properties=False):
             if show_properties:
                 level = 3
                 if channel.data_type is not None:
-                    display("data type: %s" % channel.data_type.__name__,
-                            level)
+                    display("data type: %s" % channel.data_type.__name__, level)
                 display_properties(channel, level)
 
 
diff --git a/nptdms/test/test_daqmx.py b/nptdms/test/test_daqmx.py
index 7cb1954..7c93a36 100644
--- a/nptdms/test/test_daqmx.py
+++ b/nptdms/test/test_daqmx.py
@@ -1,6 +1,7 @@
 """Test reading of TDMS files with DAQmx data
 """
 
+from collections import defaultdict
 import numpy as np
 
 from nptdms import TdmsFile
@@ -525,6 +526,52 @@ def test_lazily_reading_a_subset_of_raw_channel_data():
             np.testing.assert_array_equal(data_2[0], [18, 19])
 
 
+def test_stream_data_chunks():
+    """Test streaming chunks of DAQmx data from a TDMS file
+    """
+    properties = {
+        "NI_Number_Of_Scales": (3, "01 00 00 00"),
+    }
+    scaler_1 = daqmx_scaler_metadata(0, 3, 0)
+    scaler_2 = daqmx_scaler_metadata(0, 3, 2)
+    metadata = segment_objects_metadata(
+        root_metadata(),
+        group_metadata(),
+        daqmx_channel_metadata("Channel1", 4, [4], [scaler_1], properties),
+        daqmx_channel_metadata("Channel2", 4, [4], [scaler_2], properties))
+    data = (
+        # Data for segment
+        "01 00" "11 00"
+        "02 00" "12 00"
+        "03 00" "13 00"
+        "04 00" "14 00"
+        "05 00" "15 00"
+        "06 00" "16 00"
+        "07 00" "17 00"
+        "08 00" "18 00"
+    )
+
+    test_file = GeneratedFile()
+    test_file.add_segment(segment_toc(), metadata, data)
+    data_arrays = defaultdict(list)
+    with test_file.get_tempfile() as temp_file:
+        with TdmsFile.open(temp_file.file) as tdms_file:
+            for chunk in tdms_file.data_chunks():
+                for group in chunk.groups():
+                    for channel in group.channels():
+                        key = (group.name, channel.name)
+                        assert channel.offset == len(data_arrays[key])
+                        data_arrays[key].extend(channel[:])
+
+    expected_channel_data = {
+        ("Group", "Channel1"): [1, 2, 3, 4, 5, 6, 7, 8],
+        ("Group", "Channel2"): [17, 18, 19, 20, 21, 22, 23, 24],
+    }
+    for ((group, channel), expected_data) in expected_channel_data.items():
+        actual_data = data_arrays[(group, channel)]
+        np.testing.assert_equal(actual_data, expected_data)
+
+
 def segment_toc():
     return (
         "kTocMetaData", "kTocRawData", "kTocNewObjList", "kTocDAQmxRawData")
diff --git a/nptdms/test/test_hdf.py b/nptdms/test/test_hdf.py
index 11738ef..7da7259 100644
--- a/nptdms/test/test_hdf.py
+++ b/nptdms/test/test_hdf.py
@@ -7,6 +7,7 @@
 except ImportError:
     pytest.skip("Skipping HDF tests as h5py is not installed", allow_module_level=True)
 
+from nptdms import TdmsFile
 from nptdms.test.util import (
     GeneratedFile,
     basic_segment,
@@ -35,6 +36,22 @@ def test_hdf_channel_data(tmp_path):
     h5.close()
 
 
+def test_streaming_to_hdf(tmp_path):
+    """ Test conversion of channel data to HDF when streaming data from disk
+    """
+    test_file, expected_data = scenarios.chunked_segment().values
+
+    with test_file.get_tempfile() as temp_file:
+        with TdmsFile.open(temp_file.file) as tdms_file:
+            h5_path = tmp_path / 'h5_streaming_data_test.h5'
+            h5 = tdms_file.as_hdf(h5_path)
+
+    for ((group, channel), expected_data) in expected_data.items():
+        h5_channel = h5[group][channel]
+        np.testing.assert_almost_equal(h5_channel[...], expected_data)
+    h5.close()
+
+
 def test_int_data_types(tmp_path):
     """ Test conversion of signed and unsigned integer types to HDF
     """
diff --git a/nptdms/test/test_scaling.py b/nptdms/test/test_scaling.py
index 8a4741a..caf85e6 100644
--- a/nptdms/test/test_scaling.py
+++ b/nptdms/test/test_scaling.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pytest
 
+from nptdms import types
 from nptdms.scaling import get_scaling
 
 try:
@@ -31,7 +32,7 @@ def test_unsupported_scaling_type():
 def test_linear_scaling():
     """Test linear scaling"""
 
-    data = StubTdmsData(np.array([1.0, 2.0, 3.0]))
+    data = StubTdmsData(np.array([1, 2, 3], dtype=np.dtype('int32')))
     expected_scaled_data = np.array([12.0, 14.0, 16.0])
 
     properties = {
@@ -43,13 +44,15 @@ def test_linear_scaling():
     scaling = get_scaling(properties, {}, {})
     scaled_data = scaling.scale(data)
 
+    assert scaling.get_dtype(types.Int32, None) == np.dtype('float64')
+    assert scaled_data.dtype == np.dtype('float64')
     np.testing.assert_almost_equal(expected_scaled_data, scaled_data)
 
 
 def test_polynomial_scaling():
     """Test polynomial scaling"""
 
-    data = StubTdmsData(np.array([1.0, 2.0, 3.0]))
+    data = StubTdmsData(np.array([1, 2, 3], dtype=np.dtype('int32')))
     expected_scaled_data = np.array([16.0, 44.0, 112.0])
 
     properties = {
@@ -63,13 +66,15 @@ def test_polynomial_scaling():
     scaling = get_scaling(properties, {}, {})
     scaled_data = scaling.scale(data)
 
+    assert scaling.get_dtype(types.Int32, None) == np.dtype('float64')
+    assert scaled_data.dtype == np.dtype('float64')
     np.testing.assert_almost_equal(expected_scaled_data, scaled_data)
 
 
 def test_polynomial_scaling_with_3_coefficients():
     """Test polynomial scaling"""
 
-    data = StubTdmsData(np.array([1.0, 2.0, 3.0]))
+    data = StubTdmsData(np.array([1, 2, 3], dtype=np.dtype('int32')))
     expected_scaled_data = np.array([13.0, 20.0, 31.0])
 
     properties = {
@@ -117,6 +122,7 @@ def test_rtd_scaling(resistance_configuration, lead_resistance, expected_data):
     scaling = get_scaling(properties, {}, {})
     scaled_data = scaling.scale(data)
 
+    assert scaling.get_dtype(types.DoubleFloat, None) == np.dtype('float64')
     np.testing.assert_almost_equal(expected_data, scaled_data, decimal=3)
 
 
@@ -145,6 +151,7 @@ def test_table_scaling():
     scaling = get_scaling(properties, {}, {})
     scaled_data = scaling.scale(data)
 
+    assert scaling.get_dtype(types.DoubleFloat, None) == np.dtype('float64')
     np.testing.assert_almost_equal(expected_scaled_data, scaled_data)
 
 
@@ -152,8 +159,8 @@ def test_add_scaling():
     """ Test scaling that adds two input scalings"""
 
     scaler_data = StubDaqmxData({
-        0: np.array([1.0, 2.0, 3.0]),
-        1: np.array([2.0, 4.0, 6.0]),
+        0: np.array([1, 2, 3], dtype=np.dtype('int32')),
+        1: np.array([2, 4, 6], dtype=np.dtype('uint32')),
     })
     expected_scaled_data = np.array([3.0, 6.0, 9.0])
 
@@ -166,17 +173,39 @@ def test_add_scaling():
     scaling = get_scaling(properties, {}, {})
     scaled_data = scaling.scale(scaler_data)
 
+    assert scaling.get_dtype(None, {0: types.Int32, 1: types.Uint32}) == np.dtype('int64')
+    assert scaled_data.dtype == np.dtype('int64')
     np.testing.assert_almost_equal(expected_scaled_data, scaled_data)
 
 
+def test_add_scaling_with_default_inputs():
+    """ Test scaling that adds two input scalings"""
+
+    data = StubTdmsData(np.array([1, 2, 3], dtype=np.dtype('int32')))
+    expected_scaled_data = np.array([2, 4, 6])
+
+    properties = {
+        "NI_Number_Of_Scales": 1,
+        "NI_Scale[0]_Scale_Type": "Add",
+        "NI_Scale[0]_Add_Left_Operand_Input_Source": 0xFFFFFFFF,
+        "NI_Scale[0]_Add_Right_Operand_Input_Source": 0xFFFFFFFF,
+    }
+    scaling = get_scaling(properties, {}, {})
+    scaled_data = scaling.scale(data)
+
+    assert scaling.get_dtype(types.Int32, None) == np.dtype('int32')
+    assert scaled_data.dtype == np.dtype('int32')
+    np.testing.assert_equal(expected_scaled_data, scaled_data)
+
+
 def test_subtract_scaling():
     """ Test scaling that subtracts an input scaling from another"""
 
     # This behaves the opposite to what you'd expect, the left operand
     # is subtracted from the right operand.
     scaler_data = StubDaqmxData({
-        0: np.array([1.0, 2.0, 3.0]),
-        1: np.array([2.0, 4.0, 6.0]),
+        0: np.array([1, 2, 3], dtype=np.dtype('int32')),
+        1: np.array([2, 4, 6], dtype=np.dtype('uint32')),
     })
     expected_scaled_data = np.array([1.0, 2.0, 3.0])
 
@@ -189,6 +218,8 @@ def test_subtract_scaling():
     scaling = get_scaling(properties, {}, {})
     scaled_data = scaling.scale(scaler_data)
 
+    assert scaling.get_dtype(None, {0: types.Int32, 1: types.Uint32}) == np.dtype('int64')
+    assert scaled_data.dtype == np.dtype('int64')
     np.testing.assert_almost_equal(expected_scaled_data, scaled_data)
 
 
diff --git a/nptdms/test/test_tdms_file.py b/nptdms/test/test_tdms_file.py
index 470fac5..9f73401 100644
--- a/nptdms/test/test_tdms_file.py
+++ b/nptdms/test/test_tdms_file.py
@@ -1,5 +1,6 @@
 """Test reading of example TDMS files"""
 
+from collections import defaultdict
 import os
 import tempfile
 from hypothesis import (assume, given, example, strategies)
@@ -27,8 +28,10 @@ def test_read_channel_data(test_file, expected_data):
         tdms_data = TdmsFile.read(temp_file.file)
 
     for ((group, channel), expected_data) in expected_data.items():
-        actual_data = tdms_data[group][channel].data
+        channel_obj = tdms_data[group][channel]
+        actual_data = channel_obj.data
         assert actual_data.dtype == expected_data.dtype
+        assert channel_obj.dtype == expected_data.dtype
         compare_arrays(actual_data, expected_data)
 
 
@@ -135,6 +138,43 @@ def test_reading_subset_of_data_for_scenario(test_file, expected_data, offset, l
                 compare_arrays(actual_data, expected_data[offset:offset + length])
 
 
+@pytest.mark.parametrize("test_file,expected_data", scenarios.get_scenarios())
+def test_stream_data_chunks(test_file, expected_data):
+    """Test streaming chunks of data from a TDMS file
+    """
+    data_arrays = defaultdict(list)
+    with test_file.get_tempfile() as temp_file:
+        with TdmsFile.open(temp_file.file) as tdms_file:
+            for chunk in tdms_file.data_chunks():
+                for group in chunk.groups():
+                    for channel in group.channels():
+                        key = (group.name, channel.name)
+                        assert channel.offset == len(data_arrays[key])
+                        data_arrays[key].extend(channel[:])
+
+    for ((group, channel), expected_data) in expected_data.items():
+        actual_data = data_arrays[(group, channel)]
+        compare_arrays(actual_data, expected_data)
+
+
+def test_indexing_and_iterating_data_chunks():
+    """Test streaming chunks of data from a TDMS file and indexing into chunks
+    """
+    test_file, expected_data = scenarios.single_segment_with_two_channels().values
+    data_arrays = defaultdict(list)
+    with test_file.get_tempfile() as temp_file:
+        with TdmsFile.open(temp_file.file) as tdms_file:
+            for chunk in tdms_file.data_chunks():
+                for (group, channel) in expected_data.keys():
+                    key = (group, channel)
+                    channel_chunk = chunk[group][channel]
+                    data_arrays[key].extend(list(channel_chunk))
+
+    for ((group, channel), expected_data) in expected_data.items():
+        actual_data = data_arrays[(group, channel)]
+        compare_arrays(actual_data, expected_data)
+
+
 def test_invalid_offset_throws():
     """ Exception is thrown when reading a subset of data with an invalid offset
     """
@@ -301,9 +341,10 @@ def test_string_data():
     test_file.add_segment(toc, metadata, data)
     tdms_data = test_file.load()
 
-    data = tdms_data["Group"]["StringChannel"].data
-    assert len(data) == len(strings)
-    for expected, read in zip(strings, data):
+    channel = tdms_data["Group"]["StringChannel"]
+    assert len(channel.data) == len(strings)
+    assert channel.data.dtype == channel.dtype
+    for expected, read in zip(strings, channel.data):
         assert expected == read