Bibliography.bib

@article{morton1966computer,
  title={A computer oriented geodetic data base and a new technique in file sequencing},
  author={Morton, Guy M},
  year={1966},
  publisher={International Business Machines Company New York}
}

@article{harris2020array,
  title={Array programming with NumPy},
  author={Harris, Charles R and Millman, K Jarrod and van der Walt, St{\'e}fan J and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J and others},
  journal={Nature},
  volume={585},
  number={7825},
  pages={357--362},
  year={2020},
  publisher={Nature Publishing Group}
}


@inproceedings{lillaney2018building,
  title={Building {NDS}tore through hierarchical storage management and microservice processing},
  author={Lillaney, Kunal and Kleissas, Dean and Eusman, Alexander and Perlman, Eric and Roncal, William Gray and Vogelstein, Joshua T and Burns, Randal},
  booktitle={2018 IEEE 14th International Conference on e-Science (e-Science)},
  pages={223--233},
  year={2018},
  organization={IEEE}
}

@ARTICLE{biguniverse,
  author={J. {Kremer} and K. {Stensbo-Smidt} and F. {Gieseke} and K. S. {Pedersen} and C. {Igel}},
  journal={IEEE Intelligent Systems},
  title={Big Universe, Big Data: Machine Learning and Image Analysis for Astronomy},
  year={2017},
  volume={32},
  number={2},
  pages={16-22},}

@article{big_data_geology,
author = {Zhang Qi and Liu Xuelong},
title = {Big data: new methods and ideas in geological scientific research},
journal = {Big Earth Data},
volume = {3},
number = {1},
pages = {1-7},
year  = {2019},
publisher = {Taylor & Francis},
doi = {10.1080/20964471.2018.1564478},
}

@inproceedings{hdf5,
  title={An overview of the {HDF5} technology suite and its applications},
  author={Folk, Mike and Heber, Gerd and Koziol, Quincey and Pourmal, Elena and Robinson, Dana},
  booktitle={Proceedings of the EDBT/ICDT 2011 Workshop on Array Databases},
  pages={36--47},
  year={2011}
}

@article{bigdata_health,
title = "An optimal big data workflow for biomedical image analysis",
journal = "Informatics in Medicine Unlocked",
volume = "11",
pages = "68 - 74",
year = "2018",
issn = "2352-9148",
doi = "https://doi.org/10.1016/j.imu.2018.05.001",
url = "http://www.sciencedirect.com/science/article/pii/S2352914818300844",
author = "Aurelle [Tchagna Kouanou] and Daniel Tchiotsop and Romanic Kengne and Djoufack Tansaa Zephirin and Ngo Mouelas [Adele Armele] and René Tchinda",
keywords = "Biomedical images, Big data, Artificial intelligence, Machine learning, Hadoop/spark",
abstract = "Background and objective
In the medical field, data volume is increasingly growing, and traditional methods cannot manage it efficiently. In biomedical computation, the continuous challenges are: management, analysis, and storage of the biomedical data. Nowadays, big data technology plays a significant role in the management, organization, and analysis of data, using machine learning and artificial intelligence techniques. It also allows a quick access to data using the NoSQL database. Thus, big data technologies include new frameworks to process medical data in a manner similar to biomedical images. It becomes very important to develop methods and/or architectures based on big data technologies, for a complete processing of biomedical image data.
Method
This paper describes big data analytics for biomedical images, shows examples reported in the literature, briefly discusses new methods used in processing, and offers conclusions. We argue for adapting and extending related work methods in the field of big data software, using Hadoop and Spark frameworks. These provide an optimal and efficient architecture for biomedical image analysis. This paper thus gives a broad overview of big data analytics to automate biomedical image diagnosis. A workflow with optimal methods and algorithm for each step is proposed.
Results
Two architectures for image classification are suggested. We use the Hadoop framework to design the first, and the Spark framework for the second. The proposed Spark architecture allows us to develop appropriate and efficient methods to leverage a large number of images for classification, which can be customized with respect to each other.
Conclusions
The proposed architectures are more complete, easier, and are adaptable in all of the steps from conception. The obtained Spark architecture is the most complete, because it facilitates the implementation of algorithms with its embedded libraries."
}

@INPROCEEDINGS{seqalgorithms,
author={V. {Hayot-Sasson} and Y. {Gao} and Y. {Yan} and T. {Glatard}},
booktitle={2017 IEEE International Conference on Big Data (Big Data)},
title={Sequential algorithms to split and merge ultra-high resolution 3D images},
year={2017},
volume={},
number={},
pages={415-424},
keywords={brain;data compression;image reconstruction;image resolution;medical image processing;merging;negative seeking;naive algorithms;ultra-high resolution images;seek times;access image chunks;data merging;parallel processing operations;image reconstruction;brain image;sequential algorithm analysis;ultra-high resolution 3D image splitting;ultra-high resolution 3D image merging;data splitting;distributed processing operations;3D blocks;clustered reads;multiple reads;clustered writes;multiple writes;memory buffering;image chunks;on-the-fly compression;Slabs;Image reconstruction;Three-dimensional displays;Memory management;Clustering algorithms;Merging;Image resolution},
doi={10.1109/BigData.2017.8257953},
ISSN={null},
month={Dec},}

@article {Amunts1472,
	author = {Amunts, Katrin and Lepage, Claude and Borgeat, Louis and Mohlberg, Hartmut and Dickscheid, Timo and Rousseau, Marc-{\'E}tienne and Bludau, Sebastian and Bazin, Pierre-Louis and Lewis, Lindsay B. and Oros-Peusquens, Ana-Maria and Shah, Nadim J. and Lippert, Thomas and Zilles, Karl and Evans, Alan C.},
	title = {BigBrain: An Ultrahigh-Resolution 3D Human Brain Model},
	volume = {340},
	number = {6139},
	pages = {1472--1475},
	year = {2013},
	doi = {10.1126/science.1235381},
	publisher = {American Association for the Advancement of Science},
	abstract = {Reference brains have become a standard tool in human brain research. Reference brains presently in the public domain provide a spatial framework at the macroscopic level. Amunts et al. (p. 1472) present a high-resolution (20 {\textmu}m) three-dimensional reconstruction of a human brain. The tool will be freely available to help with interpreting functional neuroimaging studies, fiber tract analyses, and assigning molecular and gene expression data. Reference brains are indispensable tools in human brain mapping, enabling integration of multimodal data into an anatomically realistic standard space. Available reference brains, however, are restricted to the macroscopic scale and do not provide information on the functionally important microscopic dimension. We created an ultrahigh-resolution three-dimensional (3D) model of a human brain at nearly cellular resolution of 20 micrometers, based on the reconstruction of 7404 histological sections. {\textquotedblleft}BigBrain{\textquotedblright} is a free, publicly available tool that provides considerable neuroanatomical insight into the human brain, thereby allowing the extraction of microscopic data for modeling and simulation. BigBrain enables testing of hypotheses on optimal path lengths between interconnected cortical regions or on spatial organization of genetic patterning, redefining the traditional neuroanatomy maps such as those of Brodmann and von Economo.},
	issn = {0036-8075},
	URL = {https://science.sciencemag.org/content/340/6139/1472},
	eprint = {https://science.sciencemag.org/content/340/6139/1472.full.pdf},
	journal = {Science}
}

@inproceedings{openconnectomecluster,
author = {Burns, Randal and Lillaney, Kunal and Berger, Daniel R. and Grosenick, Logan and Deisseroth, Karl and Reid, R. Clay and Roncal, William Gray and Manavalan, Priya and Bock, Davi D. and Kasthuri, Narayanan and Kazhdan, Michael and Smith, Stephen J. and Kleissas, Dean and Perlman, Eric and Chung, Kwanghun and Weiler, Nicholas C. and Lichtman, Jeff and Szalay, Alexander S. and Vogelstein, Joshua T. and Vogelstein, R. Jacob},
title = {The Open Connectome Project Data Cluster: Scalable Analysis and Vision for High-Throughput Neuroscience},
year = {2013},
isbn = {9781450319218},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2484838.2484870},
doi = {10.1145/2484838.2484870},
booktitle = {Proceedings of the 25th International Conference on Scientific and Statistical Database Management},
articleno = {27},
numpages = {11},
keywords = {connectomics, data-intensive computing},
location = {Baltimore, Maryland, USA},
series = {SSDBM}
}

@inproceedings{optimal_chuking,
author = {Otoo, E. J. and Rotem, Doron and Seshadri, Sridhar},
title = {Optimal Chunking of Large Multidimensional Arrays for Data Warehousing},
year = {2007},
isbn = {9781595938275},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1317331.1317337},
doi = {10.1145/1317331.1317337},
booktitle = {Proceedings of the ACM Tenth International Workshop on Data Warehousing and OLAP},
pages = {25–32},
numpages = {8},
keywords = {multi-dimensional arrays, data warehousing, chunking},
location = {Lisbon, Portugal},
series = {DOLAP ’07}
}

@InProceedings{ matthew_rocklin-proc-scipy-2015,
  author    = { Matthew Rocklin },
  title     = { Dask: Parallel Computation with Blocked algorithms and Task Scheduling },
  booktitle = { Proceedings of the 14th Python in Science Conference },
  pages     = { 130 - 136 },
  year      = { 2015 },
  editor    = { Kathryn Huff and James Bergstra }
}

 @misc{collette_2014,
   title={Datasets},
   howpublished = {\url{http://docs.h5py.org/en/stable/high/dataset.html}},
   journal={Datasets - h5py 2.10.0 documentation},
   author={Collette, Andrew},
   year={2014},
   note = {Accessed: 2020-09-27}
 }

 @misc{rocklin_bourbeau_2019,
   title={Best Practices},
   howpublished = {\url{https://docs.dask.org/en/latest/array-best-practices.html}},
   journal={Best Practices - Dask 2.17.2 documentation},
   author={Rocklin, Matthew and Bourbeau, James},
   year={2019},
   month={May},
   note = {Accessed: 2020-09-27}
 }

@inproceedings{hayot2019performance,
  title={Performance Evaluation of Big Data Processing Strategies for Neuroimaging},
  author={Hayot-Sasson, Val{\'e}rie and Brown, Shawn T and Glatard, Tristan},
  booktitle={2019 19th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)},
  pages={449--458},
  year={2019},
  organization={IEEE}
}