From 6fa3355c15d1f2cada29ae7909e54b8583751f03 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Sat, 27 Nov 2021 18:29:44 +0100 Subject: [PATCH] Add routines for saving arrays in npy format --- src/CMakeLists.txt | 1 + src/Makefile.manual | 5 + src/stdlib_io_npy.fypp | 219 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 src/stdlib_io_npy.fypp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bcb4931b3..f0c14de3d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,6 +7,7 @@ set(fppFiles stdlib_bitsets_64.fypp stdlib_bitsets_large.fypp stdlib_io.fypp + stdlib_io_npy.fypp stdlib_kinds.fypp stdlib_linalg.fypp stdlib_linalg_diag.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index f573fa6cf..754ae5f08 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -4,6 +4,7 @@ SRCFYPP = \ stdlib_bitsets_large.fypp \ stdlib_bitsets.fypp \ stdlib_io.fypp \ + stdlib_io_npy.fypp \ stdlib_kinds.fypp \ stdlib_linalg.fypp \ stdlib_linalg_diag.fypp \ @@ -87,6 +88,10 @@ stdlib_io.o: \ stdlib_optval.o \ stdlib_kinds.o \ stdlib_ascii.o +stdlib_io_npy.o: \ + stdlib_error.o \ + stdlib_kinds.o \ + stdlib_strings.o stdlib_linalg.o: \ stdlib_kinds.o stdlib_linalg_diag.o: \ diff --git a/src/stdlib_io_npy.fypp b/src/stdlib_io_npy.fypp new file mode 100644 index 000000000..183dafdd9 --- /dev/null +++ b/src/stdlib_io_npy.fypp @@ -0,0 +1,219 @@ +! SPDX-Identifer: MIT + +#:include "common.fypp" +#:set RANKS = range(1, MAXRANK + 1) +#:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES + +!> Description of the npy format taken from +!> https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html +!> +!>## Format Version 1.0 +!> +!> The first 6 bytes are a magic string: exactly \x93NUMPY. +!> +!> The next 1 byte is an unsigned byte: +!> the major version number of the file format, e.g. \x01. +!> +!> The next 1 byte is an unsigned byte: +!> the minor version number of the file format, e.g. \x00. +!> Note: the version of the file format is not tied to the version of the numpy package. +!> +!> The next 2 bytes form a little-endian unsigned short int: +!> the length of the header data HEADER_LEN. +!> +!> The next HEADER_LEN bytes form the header data describing the array’s format. +!> It is an ASCII string which contains a Python literal expression of a dictionary. +!> It is terminated by a newline (\n) and padded with spaces (\x20) to make the total +!> of len(magic string) + 2 + len(length) + HEADER_LEN be evenly divisible by 64 for +!> alignment purposes. +!> +!> The dictionary contains three keys: +!> +!> - “descr”: dtype.descr +!> An object that can be passed as an argument to the numpy.dtype constructor +!> to create the array’s dtype. +!> +!> - “fortran_order”: bool +!> Whether the array data is Fortran-contiguous or not. Since Fortran-contiguous +!> arrays are a common form of non-C-contiguity, we allow them to be written directly +!> to disk for efficiency. +!> +!> - “shape”: tuple of int +!> The shape of the array. +!> +!> For repeatability and readability, the dictionary keys are sorted in alphabetic order. +!> This is for convenience only. A writer SHOULD implement this if possible. A reader MUST +!> NOT depend on this. +!> +!> Following the header comes the array data. If the dtype contains Python objects +!> (i.e. dtype.hasobject is True), then the data is a Python pickle of the array. +!> Otherwise the data is the contiguous (either C- or Fortran-, depending on fortran_order) +!> bytes of the array. Consumers can figure out the number of bytes by multiplying the +!> number of elements given by the shape (noting that shape=() means there is 1 element) +!> by dtype.itemsize. +!> +!>## Format Version 2.0 +!> +!> The version 1.0 format only allowed the array header to have a total size of 65535 bytes. +!> This can be exceeded by structured arrays with a large number of columns. +!> The version 2.0 format extends the header size to 4 GiB. numpy.save will automatically +!> save in 2.0 format if the data requires it, else it will always use the more compatible +!> 1.0 format. +!> +!> The description of the fourth element of the header therefore has become: +!> “The next 4 bytes form a little-endian unsigned int: the length of the header data +!> HEADER_LEN.” +!> +!>## Format Version 3.0 +!> +!> This version replaces the ASCII string (which in practice was latin1) with a +!> utf8-encoded string, so supports structured types with any unicode field names. +module stdlib_io_npy + use stdlib_error, only : error_stop + use stdlib_kinds, only : int8, int16, int32, int64, sp, dp, xdp, qp + use stdlib_strings, only : to_string + implicit none + private + + public :: save_npy + + + !> Save multidimensional array in npy format + interface save_npy + #:for k1, t1 in KINDS_TYPES + #:for rank in RANKS + module procedure save_npy_${t1[0]}$${k1}$_${rank}$ + #:endfor + #:endfor + end interface save_npy + + + character(len=*), parameter :: nl = char(10) + + character(len=*), parameter :: & + type_iint8 = " Generate magic header string for npy format + pure function magic_header(major, minor) result(str) + !> Major version of npy format + integer, intent(in) :: major + !> Minor version of npy format + integer, intent(in) :: minor + !> Magic string for npy format + character(len=8) :: str + + str = magic_number // magic_string // char(major) // char(minor) + end function magic_header + + + !> Generate header for npy format + pure function npy_header(vtype, vshape) result(str) + !> Type of variable + character(len=*), intent(in) :: vtype + !> Shape of variable + integer, intent(in) :: vshape(:) + !> Header string for npy format + character(len=:), allocatable :: str + + integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4 + + str = & + "{'descr': '"//vtype//& + "', 'fortran_order': True, 'shape': "//& + shape_str(vshape)//", }" + + if (len(str) + len_v10 >= 65535) then + str = str // & + & repeat(" ", 16 - mod(len(str) + len_v20 + 1, 16)) // nl + str = magic_header(2, 0) // to_bytes_i4(int(len(str))) // str + else + str = str // & + & repeat(" ", 16 - mod(len(str) + len_v10 + 1, 16)) // nl + str = magic_header(1, 0) // to_bytes_i2(int(len(str))) // str + end if + end function npy_header + + !> Write integer as byte string in little endian encoding + pure function to_bytes_i4(val) result(str) + !> Integer value to convert to bytes + integer, intent(in) :: val + !> String of bytes + character(len=4), allocatable :: str + + str = char(mod(val, 2**8)) // & + & char(mod(val, 2**16) / 2**8) // & + & char(mod(val, 2**32) / 2**16) // & + & char(val / 2**32) + end function to_bytes_i4 + + + !> Write integer as byte string in little endian encoding, 2-byte truncated version + pure function to_bytes_i2(val) result(str) + !> Integer value to convert to bytes + integer, intent(in) :: val + !> String of bytes + character(len=2), allocatable :: str + + str = char(mod(val, 2**8)) // & + & char(mod(val, 2**16) / 2**8) + end function to_bytes_i2 + + + !> Print array shape as tuple of int + pure function shape_str(vshape) result(str) + !> Shape of variable + integer, intent(in) :: vshape(:) + !> Shape string for npy format + character(len=:), allocatable :: str + + integer :: i + + str = "(" + do i = 1, size(vshape) + str = str//to_string(vshape(i))//", " + enddo + str = str//")" + end function shape_str + + +#:for k1, t1 in KINDS_TYPES + #:for rank in RANKS + !> Save ${rank}$-dimensional array in npy format + subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + character(len=*), intent(in) :: filename + ${t1}$, intent(in) :: array${ranksuffix(rank)}$ + integer, intent(out), optional :: iostat + character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ + + integer :: io, stat + + open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) + if (stat == 0) then + write(io, iostat=stat) npy_header(vtype, shape(array)) + end if + if (stat == 0) then + write(io, iostat=stat) array + end if + close(io, iostat=stat) + + if (present(iostat)) then + iostat = stat + else if (stat /= 0) then + call error_stop("Failed to write array to file '"//filename//"'") + end if + + end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ + #:endfor +#:endfor + + +end module stdlib_io_npy