gnparser.bib

@Preamble{"\newcommand{\noop}[1]{}"}

@Book{ICPN,
  author       = "{Cantino, P.D., de Queiroz, K.}",
  title        = "{International Code of Phylogenetic Nomenclature}",
  year         = "2010",
  pages        = "1--102",
}

@Book{ICNCP,
  author       = "Eds. {Brickell, C.D., et al}",
  title        = "{International Code of Nomenclature for Cultivated
                 Plants}",
  booktitle    = "ISHS",
  year         = "2009",
  pages        = "1--184",
  edition      = "Eighth",
  ISBN         = "978-0-643-09440-6",
}

@Book{ICTV,
  author       = "Eds. {King, A.M.Q., Adams, M.J., Carstens, E.B.,
                 Lefkowitz, E.J. }",
  title        = "{Virus taxonomy: classification and nomenclature of
                 viruses: Ninth Report of the International Committee on
                 Taxonomy of Viruses.}",
  booktitle    = "San Diego, Elsevier",
  year         = "2011",
  pages        = "1--1338",
  ISBN         = "0-12-384684-6",
}

@Book{ICNB,
  author       = "Eds. {Lapage S.P., Sneath P.H.A., Lessel E.F., et
                 al}",
  title        = "{International Code of Nomenclature of Bacteria:
                 Bacteriological Code, 1990 Revision}",
  booktitle    = "ASM Press",
  year         = "1992",
  pages        = "1--232",
  ISBN         = "1-55581-039-X",
}

@Book{ICZN,
  author       = "{International Commission on Zoological
                 Nomenclature}",
  title        = "{International Code of Zoological Nomenclature}",
  booktitle    = "The International Trust for Zoological Nomenclature",
  year         = "1999",
  pages        = "1--336",
  edition      = "Forth",
  ISBN         = "0-85301-006-4",
}

@Book{FNA2002,
  annote       = "Citation for Carex scirpoidea subsp. convoluta
                 (K{\"{u}}k.) D.A. Dunlop",
  author       = "Eds. {Flora of North America Editorial Committee}",
  booktitle    = "Oxford University Press, New York",
  pages        = "551",
  title        = "{Flora of North America. Vol. 23, Magnoliophyta:
                 Commelinidae (in part): Cyperaceae}",
  year         = "2002",
}

@Book{aho1992foundations,
  title        = "Foundations of computer science",
  author       = "Alfred V Aho and Jeffrey D Ullman",
  volume       = "2",
  year         = "1992",
  publisher    = "Computer Science Press New York",
  address      = "USA",
}

@Book{charniak1996statistical,
  author       = {Eugene Charniak},
  title        = {Statistical language learning},
  year         = {1996},
  publisher    = {{MIT} press},
  address      = "USA",
  isbn         = {978-0-262-53141-2},
}

@Book{aho1972theory,
  author       = {Aho, Alfred V. and Ullman, Jeffrey D.},
  title        = {The Theory of Parsing, Translation, and Compiling},
  year         = {1972},
  isbn         = {0-13-914556-7},
  publisher    = {Prentice-Hall, Inc.},
  address      = {Upper Saddle River, NJ, USA},
}

@Inproceedings{asveld1995fuzzy,
  booktitle    = {Proceedings of the Fourth International Workshop on Parsing Technologies IWPT'95},
  title        = {A Fuzzy Approach to Erroneous Inputs in Context-Free Language Recognition},
  author       = {P.R.J. {Asveld}},
  address      = {Prague, Czech Republic},
  publisher    = {Institute of Formal and Applied Linguistics, Charles University},
  year         = {1995},
  pages        = {14--25},
}


@Article{nadeau2007survey,
  title        = {A survey of named entity recognition and classification},
  author       = {{Nadeau}, David and {Sekine}, Satoshi},
  journal      = {Lingvisticae Investigationes},
  volume       = {30},
  number       = {1},
  pages        = {3--26},
  year         = {2007},
  publisher    = {John Benjamins publishing company},
  doi          = {10.1075/li.30.1.03nad},
  ISSN         = {0378-4169}
}

@Article{VandenBerghe2015,
  abstract     = "In the domain of biological classification there are
                 several taxon name matching services that can search
                 for a species scientific name in a large collection of
                 taxonomic names. Many of these services are available
                 online, and many others run on computers of individual
                 scientists. While these systems may work very well,
                 most suffer from the fact that the list of names used
                 as a reference, and the criteria to decide on a match,
                 are hard-coded in the engine that performs the name
                 matching. In this paper we present BiOnym, a taxon name
                 matching system that separates reference namelists,
                 search criteria and matching engine. The user is
                 offered a choice of several taxonomic reference lists,
                 including the option to upload his/her own list onto
                 the system. Furthermore, BiOnym is a flexible workflow,
                 which embeds and combines techniques using lexical
                 matching algorithms as well as expert knowledge. It is
                 also an open platform allowing developers to contribute
                 with new techniques. In this paper we demonstrate the
                 benefits brought by this approach in terms of the
                 efficiency and effectiveness of the information
                 retrieval process with respect to other solutions.",
  author       = "Edward {Vanden Berghe} and Gianpaolo Coro and Nicolas
                 Bailly and Fabio Fiorellato and Caselyn Aldemita and
                 Anton Ellenbroek and Pasquale Pagano",
  doi          = "10.1016/j.ecoinf.2015.05.004",
  file         = ":home/dimus/dl/vanden berghe et al 2015.pdf:pdf",
  ISSN         = "15749541",
  journal      = "Ecological Informatics",
  keywords     = "Name matcher chain,Taxon name matching,Taxon name
                 parsing,Taxonomic Authority File,Taxonomic
                 nomenclature,Taxonomy",
  pages        = "29--41",
  title        = "{Retrieving taxa names from large biodiversity data
                 collections using a flexible matching workflow}",
  URL          = "http://www.sciencedirect.com/science/article/pii/S1574954115000825",
  volume       = "28",
  year         = "2015",
}

@Article{LeaseM.2005,
  abstract     = "We present a preliminary study of several parser
                 adaptation techniques evaluated on the GENIA corpus of
                 MEDLINE abstracts [1,2]. We begin by observing that the
                 Penn Treebank (PTB) is lexically impoverished when
                 measured on various genres of scientific and technical
                 writing, and that this significantly impacts parse
                 accuracy. To resolve this without requiring in-domain
                 treebank data, we show how existing domain-specific
                 lexical resources may be leveraged to augment
                 PTB-training: part-of-speech tags, dictionary
                 collocations, and named-entities. Using a
                 state-of-the-art statistical parser [3] as our
                 baseline, our lexically-adapted parser achieves a
                 14.2{\%} reduction in error. With oracle-knowledge of
                 named-entities, this error reduction improves to
                 21.2{\%}. © Springer-Verlag Berlin Heidelberg 2005.",
  author       = "Charniak E {Lease M.}",
  doi          = "10.1007/11562214{\_}6",
  ISBN         = "3-540-29172-5; 978-3-540-29172-5",
  ISSN         = "03029743",
  journal      = "Lecture Notes in Computer Science (including subseries
                 Lecture Notes in Artificial Intelligence and Lecture
                 Notes in Bioinformatics)",
  keywords     = "Bibliographic retrieval systems,Biomedical
                 engineering; Error analysis; Informatio,Dictionary
                 collocations; Error reduction; Lexical",
  pages        = "58--69",
  title        = "{Parsing biomedical literature}",
  URL          = "http://www.scopus.com/inward/record.url?eid=2-s2.0-33646016255{\&}partnerID=40{\&}md5=de9cc68600322e7a88fdd7fa23ea3345",
  volume       = "3651 LNAI",
  year         = "2005",
}

@Article{schmidhuber2015deep,
  title        = {Deep learning in neural networks: An overview},
  author       = {{Schmidhuber}, J{\"u}rgen},
  journal      = {Neural networks},
  volume       = {61},
  pages        = {85--117},
  year         = {2015},
  publisher    = {Elsevier}
}

@Article{Ford2006,
  abstract     = "Packrat parsing is a novel technique for implementing
                 parsers in a lazy functional programming language. A
                 packrat parser provides the power and flexibility of
                 top-down parsing with backtracking and unlimited
                 lookahead, but nevertheless guarantees linear parse
                 time. Any language defined by an LL(k) or LR(k) grammar
                 can be recognized by a packrat parser, in addition to
                 many languages that conventional linear-time algorithms
                 do not support. This additional power simplifies the
                 handling of common syntactic idioms such as the
                 widespread but troublesome longest-match rule, enables
                 the use of sophisticated disambiguation strategies such
                 as syntactic and semantic predicates, provides better
                 grammar composition properties, and allows lexical
                 analysis to be integrated seamlessly into parsing. Yet
                 despite its power, packrat parsing shares the same
                 simplicity and elegance as recursive descent parsing;
                 in fact converting a backtracking recursive descent
                 parser into a linear-time packrat parser often involves
                 only a fairly straightforward structural change. This
                 paper describes packrat parsing informally with
                 emphasis on its use in practical applications, and
                 explores its advantages and disadvantages with respect
                 to the more conventional alternatives.",
  archiveprefix = "arXiv",
  arxivid      = "cs/0603077",
  author       = "Bryan Ford",
  doi          = "10.1145/581478.581483",
  eprint       = "0603077",
  ISBN         = "1-58113-487-8",
  ISSN         = "03621340",
  journal      = "ICFP",
  keywords     = "all or part
                 of,analysis,backtracking,haskell,lexical,memoization,or,or
                 hard copies of,parser combinators,permission to make
                 digital,scannerless parsing,this work for
                 personal,top-down parsing",
  pages        = "12",
  primaryclass = "cs",
  title        = "{Packrat Parsing: Simple, Powerful, Lazy, Linear
                 Time}",
  URL          = "http://arxiv.org/abs/cs/0603077",
  year         = "2006",
}

@Article{Grimm2004,
  abstract     = "A considerable number of research projects are
                 exploring how to extend object-oriented programming
                 languages such as Java with, for example, support for
                 generics, multiple dispatch, or pattern matching. To
                 keep up with these changes, language implementors need
                 appropriate tools. In this context, easily extensible
                 parser generators are especially important because
                 parsing program sources is a necessary first step for
                 any language processor, be it a compiler,
                 syntax-highlighting editor, or API documentation
                 generator. Unfortunately, context-free grammars and the
                 corresponding LR or LL parsers, while well understood
                 and widely used, are also unnecessarily hard to extend.
                 To address this lack of appropriate tools, we introduce
                 Rats!, a parser generator for Java that supports easily
                 modifiable grammars and avoids the complexities
                 associated with altering LR or LL grammars. Our work
                 builds on recent research on packrat parsers, which are
                 recursive descent parsers that perform backtracking but
                 also memoize all intermediate results (hence their
                 name), thus ensuring linear-time performance. Our work
                 makes this parsing technique, which has been developed
                 in the context of functional programming languages,
                 practical for object-oriented languages. Furthermore,
                 our parser generator supports simpler grammar
                 specifications and more convenient error reporting,
                 while also producing better performing parsers through
                 aggressive optimizations. In this paper, we motivate
                 the need for more easily extensible parsers, describe
                 our parser generator and its optimizations in detail,
                 and present the results of our experimental
                 evaluation.",
  author       = "Robert Grimm",
  journal      = "New York University Technical Report, Dept. of
                 Computer Science, TR2004-854",
  keywords     = "extensible programming languages,packrat
                 parsers,parser generators,parsing expression grammars",
  pages        = "12",
  title        = "{Practical packrat parsing}",
  URL          = "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.82.675{\&}amp;rep=rep1{\&}amp;type=ps",
  year         = "2004",
}

@Book{Grune2008,
  abstract     = "Parsing, also referred to as syntax analysis, has been
                 and continues to be an essential part of computer
                 science and linguistics. Today, parsing techniques are
                 also implemented in a number of other disciplines,
                 including but not limited to, document preparation and
                 conversion, typesetting chemical formulae, and
                 chromosome recognition. This second edition presents
                 new developments and discoveries that have been made in
                 the field. Parsing techniques have grown considerably
                 in importance, both in computational linguistics where
                 such parsers are the only option, and computer science,
                 where advanced compilers often use general CF parsers.
                 Parsing techniques provide a solid basis for compiler
                 construction and contribute to all existing software:
                 enabling Web browsers to analyze HTML pages and
                 PostScript printers to analyze PostScript. Some of the
                 more advanced techniques are used in code generation in
                 compilers and in data compression. In linguistics, the
                 importance of formal grammars was recognized early on,
                 but only recently have the corresponding parsing
                 techniques been applied. Also their importance as
                 general pattern recognizers is slowly being
                 acknowledged. This text Parsing Techniques explores new
                 developments, such as generalized deterministic
                 parsing, linear-time substring parsing, parallel
                 parsing, parsing as intersection, non-canonical
                 methods, and non-Chomsky systems. To provide readers
                 with low-threshold access to the full field of parsing
                 techniques, this new edition uses a two-tiered
                 structure. The basic ideas behind the dozen or so
                 existing parsing techniques are explained in an
                 intuitive and narrative style, and problems are
                 presented at the conclusion of each chapter, allowing
                 the reader to step outside the bounds of the covered
                 material and explore parsing techniques at various
                 levels. The reader is also provided with an extensive
                 annotated bibliography as well as hints and partial
                 solutions to a number of problems. In the bibliography,
                 hundreds of realizations and improvements of parsing
                 techniques are explained in a much terser, yet still
                 informal, style, improving its readability and
                 usability. The reader should have an understanding of
                 algorithmic thinking, especially recursion; however,
                 knowledge of any particular programming language is not
                 required.",
  author       = "Dick Grune and Ceriel J H Jacobs",
  booktitle    = "Parsing Techniques: A Practical Guide",
  doi          = "10.1007/978-0-387-68954-8",
  ISBN         = "978-0-387-20248-8",
  pages        = "1--662",
  title        = "{Parsing techniques: A practical guide}",
  year         = "2008",
}

@Article{Penev2010,
  abstract     = "The concept of semantic tagging and its potential for
                 semantic enhancements to taxonomic papers is outlined
                 and illustrated by four exemplar papers published in
                 the present issue of ZooKeys. The four papers were
                 created in different ways: (i) written in Microsoft
                 Word and submitted as non-tagged manuscript (doi:
                 10.3897/zookeys.50.504); (ii) generated from
                 Scratchpads and submitted as XML-tagged manuscripts
                 (doi: 10.3897/zookeys.50.505 and doi:
                 10.3897/zookeys.50.506); (iii) generated from an
                 author's database (doi: 10.3897/zookeys.50.485) and
                 submitted as XML-tagged manuscript. XML tagging and
                 semantic enhancements were implemented during the
                 editorial process of ZooKeys using the Pensoft Mark Up
                 Tool (PMT), specially designed for this purpose. The
                 XML schema used was TaxPub, an extension to the
                 Document Type Definitions (DTD) of the US National
                 Library of Medicine Journal Archiving and Interchange
                 Tag Suite (NLM). The following innovative methods of
                 tagging, layout, publishing and disseminating the
                 content were tested and implemented within the ZooKeys
                 editorial workflow: (1) highly automated, fine-grained
                 XML tagging based on TaxPub; (2) final XML output of
                 the paper validated against the NLM DTD for archiving
                 in PubMedCentral; (3) bibliographic metadata embedded
                 in the PDF through XMP (Extensible Metadata Platform);
                 (4) PDF uploaded after publication to the Biodiversity
                 Heritage Library (BHL); (5) taxon treatments supplied
                 through XML to Plazi; (6) semantically enhanced HTML
                 version of the paper encompassing numerous internal and
                 external links and linkouts, such as: (i) vizualisation
                 of main tag elements within the text (e.g., taxon
                 names, taxon treatments, localities, etc.); (ii)
                 internal cross-linking between paper sections,
                 citations, references, tables, and figures; (iii)
                 mapping of localities listed in the whole paper or
                 within separate taxon treatments; (v) taxon names
                 autotagged, dynamically mapped and linked through the
                 Pensoft Taxon Profile (PTP) to large international
                 database services and indexers such as Global
                 Biodiversity Information Facility (GBIF), National
                 Center for Biotechnology Information (NCBI), Barcode of
                 Life (BOLD), Encyclopedia of Life (EOL), ZooBank,
                 Wikipedia, Wikispecies, Wikimedia, and others; (vi)
                 GenBank accession numbers autotagged and linked to
                 NCBI; (vii) external links of taxon names to references
                 in PubMed, Google Scholar, Biodiversity Heritage
                 Library and other sources. With the launching of the
                 working example, ZooKeys becomes the first taxonomic
                 journal to provide a complete XML-based editorial,
                 publication and dissemination workflow implemented as a
                 routine and cost-efficient practice. It is anticipated
                 that XML-based workflow will also soon be implemented
                 in botany through PhytoKeys, a forthcoming partner
                 journal of ZooKeys. The semantic markup and
                 enhancements are expected to greatly extend and
                 accelerate the way taxonomic information is published,
                 disseminated and used.",
  author       = "Lyubomir Penev and Donat Agosti and Teodor Georgiev
                 and Terry Catapano and Jeremy Miller and Vladimir
                 Blagoderov and David Roberts and Vincent S Smith and
                 Irina Brake and Simon Ryrcroft and Ben Scott and Norman
                 F Johnson and Robert a Morris and Guido Sautter and
                 Vishwas Chavan and Tim Robertson and David Remsen and
                 Pavel Stoev and Cynthia Parr and Sandra Knapp and W
                 John Kress and Chris F Thompson and Terry Erwin",
  doi          = "10.3897/zookeys.50.538",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Penev et al. - 2010 - Semantic
                 tagging of and semantic enhancements to systematics
                 papers ZooKeys working examples.pdf:pdf",
  ISBN         = "1525-2531",
  ISSN         = "1313-2970",
  journal      = "ZooKeys",
  number       = "50",
  pages        = "1--16",
  pmid         = "21594113",
  title        = "{Semantic tagging of and semantic enhancements to
                 systematics papers: ZooKeys working examples.}",
  URL          = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3088020{\&}tool=pmcentrez{\&}rendertype=abstract",
  volume       = "16",
  year         = "2010",
}

@Article{Mora2011,
  abstract     = "The diversity of life is one of the most striking
                 aspects of our planet; hence knowing how many species
                 inhabit Earth is among the most fundamental questions
                 in science. Yet the answer to this question remains
                 enigmatic, as efforts to sample the world's
                 biodiversity to date have been limited and thus have
                 precluded direct quantification of global species
                 richness, and because indirect estimates rely on
                 assumptions that have proven highly controversial. Here
                 we show that the higher taxonomic classification of
                 species (i.e., the assignment of species to phylum,
                 class, order, family, and genus) follows a consistent
                 and predictable pattern from which the total number of
                 species in a taxonomic group can be estimated. This
                 approach was validated against well-known taxa, and
                 when applied to all domains of life, it predicts ∼8.7
                 million (±1.3 million SE) eukaryotic species globally,
                 of which ∼2.2 million (±0.18 million SE) are marine.
                 In spite of 250 years of taxonomic classification and
                 over 1.2 million species already catalogued in a
                 central database, our results suggest that some 86{\%}
                 of existing species on Earth and 91{\%} of species in
                 the ocean still await description. Renewed interest in
                 further exploration and taxonomy is required if this
                 significant gap in our knowledge of life on Earth is to
                 be closed.",
  author       = "Camilo Mora and Derek P. Tittensor and Sina Adl and
                 Alastair G B Simpson and Boris Worm",
  doi          = "10.1371/journal.pbio.1001127",
  ISBN         = "1545-7885 (Electronic)$\backslash$n1544-9173
                 (Linking)",
  ISSN         = "15449173",
  journal      = "PLoS Biology",
  number       = "8",
  pages        = "1--8",
  pmid         = "21886479",
  title        = "{How many species are there on earth and in the
                 ocean?}",
  volume       = "9",
  year         = "2011",
}

@Book{ICN,
  abstract     = "This International Code of Nomenclature for algae,
                 fungi, and plants (ICN) replaces the formerly called
                 International Code of Botanical Nomenclature (ICBN).
                 The new code (Melbourne Code) took effect on July 23rd,
                 2011, when it was ratified by the International
                 Botanical Congress. - The title of the code was changed
                 to show that it does not apply to plants only, but also
                 to algae and fungi. Send Link to a Friend",
  author       = "{Mcneill, J. and F. R. Barrie and W. R. Buck and V.
                 Demoulin and W. Greuter and D. L. Hawksworth and P. S.
                 Herendeen and S. Knapp and K. Marhold and J. Prado and
                 W. F. {Prud Homme Van Reine} and G. F. Smith and J. H.
                 Wiersema and N. J.} Turland",
  booktitle    = "Koeltz Scientific Books",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Mcneill et al. - 2012 -
                 International Code of Nomenclature for algae, fungi,
                 and plants (Melbourne Code). Regnum Vegetabile
                 154.pdf:pdf",
  pages        = "1--140",
  publisher    = "Koeltz Scientific Books",
  title        = "{International Code of Nomenclature for algae, fungi,
                 and plants (Melbourne Code). Regnum Vegetabile 154.}",
  year         = "2012",
}

@Article{Linnaeus1767,
  abstract     = "O Systema Naturae (de nome completo: Systema naturae
                 per regna tria naturae, secundum classes, ordines,
                 genera, species, cum characteribus differentiis,
                 synonymis, locis) foi um livro escrito por Lineu, no
                 qual o autor faz a delinea{\c{c}}{\~{a}}o das suas
                 ideias para uma classifica{\c{c}}{\~{a}}o
                 hier{\'{a}}rquica das esp{\'{e}}cies. Foi um livro
                 publicado em latim. A primeira edi{\c{c}}{\~{a}}o foi
                 de 1735. A primeira edi{\c{c}}{\~{a}}o continha apenas
                 10 p{\'{a}}ginas. Na sua 13ª edi{\c{c}}{\~{a}}o, em
                 1770, tinha j{\'{a}} 3000 p{\'{a}}ginas. A 10ª
                 edi{\c{c}}{\~{a}}o do Systema Naturae de Linnaeus, 1758
                 {\'{e}} o trabalho que iniciou a aplica{\c{c}}{\~{a}}o
                 geral da nomenclatura binomial zool{\'{o}}gica.
                 Portanto, esta data {\'{e}} aceita como ponto de
                 partida da nomeclatura zool{\'{o}}gica e da lei da
                 prioridade.",
  author       = "C Linnaeus",
  doi          = "10.5962/bhl.title.542",
  ISBN         = "0-00-567100-0",
  ISSN         = "1524-4539",
  journal      = "Systema Natural per regna tria Natural, secundim
                 classes, ordines, genera, species, cum characteribus,
                 Differentiis, Synonymis, Locis",
  keywords     = "species",
  number       = "August",
  pages        = "586",
  title        = "{Systema Naturae per regna tria Natural, secundim
                 classes, ordines, genera, species, cum characteribus,
                 Differentiis, Synonymis, Locis}",
  volume       = "12",
  year         = "1767",
}

@Book{Linnaeus1758,
  abstract     = "Vitulina.4 . P. dentibus laniariis tectis. Syst. nat.
                 6. N. I. Phoca f. Vitulus marinus. Gesn. aqu. 702.
                 Aldr. Pifc. 722. Fonft. Pisc. 44. Dodart. act. 191. t.
                 191. Raj. Quadr. 189. Habitat in mari Europaeo. Sial
                 Dormiunt in lapide ex aqua eminente, pariunt in glacie
                 , per quam adscendunt halitu , non descendunt. Mammas
                 retrahunt duas abdominis ; inter Halecum catervos
                 saginantur, a Laris provocatae exspuunt pisces.
                 Anatome. E. N. C. d. I. a. 9. obs. 98 {\&} d. 3. a. 7.
                 app.15.",
  author       = "C Linnaeus",
  booktitle    = "L. Salmii, Holmiae",
  pages        = "824",
  title        = "{Systema naturae per regna tria naturae. 2 vols}",
  URL          = "http://scholar.google.com/scholar?hl=en{\&}btnG=Search{\&}q=intitle:systema+naturae+per+regna+tria+naturae{\#}6",
  year         = "1758",
}

@Book{Linne1753,
  abstract     = "Updated version of the seminal taxonomic treatment of
                 plants. {"}36. Polygonum perfoliatum. P. foliis
                 triangularibus, caule aculeato, stipulis
                 perfoliato-foliosis patentibus subrotundis. Burm. ind.
                 90. t. 31 f. 2.{"} Keyywords: Taxonomy",
  author       = "C Von Linne",
  booktitle    = "Search",
  pages        = "583",
  title        = "plantarum: exhibentes plantas rite cognitas ad genera
                 relatas cum differentiis specificis, nominibus
                 trivialibus, synonymis selectis, locis natalibus
                 secundum",
  year         = "1753",
}

@Article{Ratnasingham2013,
  abstract     = "Because many animal species are undescribed, and
                 because the identification of known species is often
                 difficult, interim taxonomic nomenclature has often
                 been used in biodiversity analysis. By assigning
                 individuals to presumptive species, called operational
                 taxonomic units (OTUs), these systems speed
                 investigations into the patterning of biodiversity and
                 enable studies that would otherwise be impossible.
                 Although OTUs have conventionally been separated
                 through their morphological divergence, DNA-based
                 delineations are not only feasible, but have important
                 advantages. OTU designation can be automated, data can
                 be readily archived, and results can be easily compared
                 among investigations. This study exploits these
                 attributes to develop a persistent, species-level
                 taxonomic registry for the animal kingdom based on the
                 analysis of patterns of nucleotide variation in the
                 barcode region of the cytochrome c oxidase I (COI)
                 gene. It begins by examining the correspondence between
                 groups of specimens identified to a species through
                 prior taxonomic work and those inferred from the
                 analysis of COI sequence variation using one new (RESL)
                 and four established (ABGD, CROP, GMYC, jMOTU)
                 algorithms. It subsequently describes the
                 implementation, and structural attributes of the
                 Barcode Index Number (BIN) system. Aside from a
                 pragmatic role in biodiversity assessments, BINs will
                 aid revisionary taxonomy by flagging possible cases of
                 synonymy, and by collating geographical information,
                 descriptive metadata, and images for specimens that are
                 likely to belong to the same species, even if it is
                 undescribed. More than 274,000 BIN web pages are now
                 available, creating a biodiversity resource that is
                 positioned for rapid growth.",
  author       = "Sujeevan Ratnasingham and Paul D N Hebert",
  doi          = "10.1371/journal.pone.0066213",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Ratnasingham, Hebert - 2013 - A
                 DNA-based registry for all animal species the barcode
                 index number (BIN) system.pdf:pdf",
  ISBN         = "1932-6203",
  ISSN         = "1932-6203",
  journal      = "PloS one",
  keywords     = "Algorithms,Animals,Cluster Analysis,DNA
                 Barcoding,Databases as Topic,Lepidoptera,Lepidoptera:
                 classification,North America,Registries,Species
                 Specificity,Taxonomic,Taxonomic: methods,Time Factors",
  number       = "7",
  pages        = "e66213",
  pmid         = "23861743",
  publisher    = "Public Library of Science",
  title        = "{A DNA-based registry for all animal species: the
                 barcode index number (BIN) system.}",
  URL          = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3704603{\&}tool=pmcentrez{\&}rendertype=abstract$\backslash$nhttp://journals.plos.org/plosone/article?id=10.1371/journal.pone.0066213",
  volume       = "8",
  year         = "2013",
}

@Article{Franz2009,
  abstract     = "Taxonomic concepts (sensu Berendsohn) embody the
                 underlying mean- ings of scientific names as stated in
                 a particular publication, thus offering a new way to
                 resolve semantic ambiguities that result from multiple
                 revisions of a taxonomic name. This paper presents a
                 comprehensive and powerful language for representing
                 the relationships among taxonomic concepts. The
                 language features terms and sym- bols for concept
                 relationships within a single taxonomic hierarchy, or
                 between two related but independently published
                 hierarchies. Taxonomic concepts pertaining to a single
                 hierarchy are characterised by parent/child
                 relationships, whereas those per- taining to two
                 independent hierarchies may have the following basic
                 relationships: congruence, inclusion (non-symmetrical,
                 relative to the side of comparison), overlap, and
                 exclusion. The relationships are asserted by
                 specialists who have the option to add or subtract
                 concepts on one or both sides of a relationship
                 equation in order to reconcile differences between
                 non-congruent taxonomic perspectives. The terms
                 ‘and’, ‘or’ and ‘not’ are available,
                 respectively, to connect multiple simultaneously or
                 alternatively valid relationship assessments, or to
                 explicitly negate the validity of a relationship. The
                 language also permits the decomposition of a
                 relationship according to the intensional (property
                 referencing) and ostensive (member pointing) aspects of
                 the compared taxonomic concepts. Adopting the concept
                 relationship language will facilitate a more precise
                 documentation of similarities and differences in
                 multiple succeeding taxonomic perspectives, thereby
                 preparing the stage for an ontology-based integration
                 of taxonomic and related biological information.",
  author       = "N. M. Franz and R. K. Peet",
  doi          = "10.1017/S147720000800282X",
  ISBN         = "1477-2000",
  ISSN         = "1477-2000",
  journal      = "Systematics and Biodiversity",
  keywords     = "biodiversity databases,data integration,linnaean
                 nomenclature,logy,onto-,semantic web,taxonomic
                 concepts",
  number       = "1",
  pages        = "5--20",
  title        = "{Perspectives: Towards a language for mapping
                 relationships among taxonomic concepts}",
  volume       = "7",
  year         = "2009",
}

@Article{Samyn2012,
  abstract     = "In an interesting contribution Joppa et al. (2011)
                 revisit some aspects of the taxonomic impediment
                 (Evenhuis 2007; http://www.cbd.int/gti/) and come to
                 the conclusion that, contrary to the generally accepted
                 idea, both the rates of species description and the
                 number of taxonomists have increased exponentially
                 since the 1950’s. Joppa et al. (2011) also note a
                 marked decline in the number of species described per
                 taxonomist which they attribute to the difficulty of
                 finding new species in an ever declining ‘missing
                 species pool’. Therefore, their results might be
                 interpreted that today’s taxonomic workforce is
                 sufficient to describe the remaining (shallow) ‘pool
                 of missing species’. In this contribution, we
                 question if this is indeed the case and propose a
                 solution for speeding up taxonomic descriptions. We
                 feel that Joppa et al. (2011) are overly enthusiastic,
                 probably because their test cases represent a selection
                 of the better-studied taxa (flowering plants, Conus
                 snails, spiders, amphibians, birds and mammals).
                 Contradicting the findings of Joppa et al. (2011) and
                 using a much broader taxon sampling, which includes
                 many poorly studied groups, Costello et al. (2011)
                 observe that species description has roughly remained
                 constant since the second World War, at least for
                 terrestrial species. These results are congruent with
                 other counts of species descriptions (Bacher 2012;
                 Chapman 2009; May 2011; Zhang 2010;
                 http://species.asu.edu/SOS). So it seems that the
                 increasing rate of species description as reported by
                 Joppa et al. (2011) is taxon specific rather than
                 general. In stark contrast to the constant rate of
                 species description is the rate with which species are
                 discovered by using DNA sequence data. Exhaustive
                 bio-inventory initiatives to map diversity of poorly
                 explored areas around the world (e.g.
                 http://laplaneterevisitee.org/;
                 http://mooreabiocode.org) in combination with
                 large-scale barcoding efforts (Hajibabei et al. 2007;
                 Valentini et al., 2009) result in an explosion of
                 species/sequences in repositories such as Genbank and
                 Bold Systems that are not linked to known species. This
                 growing number of what have been called ‘dark taxa’
                 by Page (http://iphylo.blogspot.com/2011/04/
                 dark-taxa-genbank-in-post-taxonomic.html), makes one
                 question the use of proper taxonomic names at all.
                 Indeed, it can be argued that a lot of biology does not
                 per se require formally described taxon names. However,
                 the value of scientific names exceeds the field of
                 biology sensu stricto and matters",
  author       = "Yves Samyn and Olivier {De Clerck}",
  doi          = "10.5852/ejt.2012.10",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Samyn, De Clerck - 2012 - No Name,
                 No Game.pdf:pdf",
  ISSN         = "2118-9773",
  journal      = "European Journal of Taxonomy",
  number       = "10",
  pages        = "1--3",
  title        = "{No Name, No Game}",
  volume       = "0",
  year         = "2012",
}

@Article{Schindel2005,
  abstract     = "Sir• The Consortium for the Barcode of Life (CBOL;
                 see www. barcoding . si. edu) is an international
                 initiative of natural history museums, herbaria, other
                 biodiversity research organizations, governmental
                 organizations and private companies which wish to
                 promote ...",
  author       = "David E Schindel and Scott E Miller",
  doi          = "10.1038/435017b",
  ISBN         = "0028-0836",
  ISSN         = "0028-0836",
  journal      = "Nature",
  number       = "7038",
  pages        = "17",
  pmid         = "15874991",
  title        = "{DNA barcoding a useful tool for taxonomists.}",
  volume       = "435",
  year         = "2005",
}

@Misc{DeQueiroz1997,
  abstract     = "During the post-Darwinian history of taxonomy, the
                 Linnaean hierarchy has maintained its role as a means
                 for representing hierarchical taxonomic relationships.
                 During the same period, the principle of descent has
                 taken on an increasingly important role as the basis
                 for reformulated versions of fundamental taxonomic
                 concepts and principles. Early in this history, the
                 principle of descent provided an explanation for the
                 existence of taxa and implied a nested, hierarchical
                 structure for taxonomic relationships. Although an
                 evolutionary explanation for taxa contradicted the
                 Aristotelian context within which the Linnaean
                 hierarchy was developed, the nested, hierarchical
                 structure of taxonomic relationships implied by
                 evolution was compatible with the practical use of the
                 Linnaean hierarchy for conveying hierarchical
                 relationships and seems to have reinforced this
                 practice. Later changes associated with the development
                 of taxon concepts based on the principle of descent led
                 to changes in the interpretation of the Linnaean
                 categories as well as certain modifications related to
                 use of the Linnaean hierarchy in representing
                 phylogenetic relationships. Although some authors
                 questioned use of the Linnaean hierarchy in
                 phylogenetic taxonomies, most continued to use it in
                 one form or another. More recently, taxonomists have
                 considered the relevance of the principle of descent to
                 nomenclature. They have found fundamental
                 inconsistencies between concepts of taxa based on that
                 principle and methods currently used to define taxon
                 names, which are based on the Linnaean hierarchy.
                 Although these inconsistencies can be corrected without
                 totally eliminating the Linnaean hierarchy, the
                 necessary changes would greatly reduce the importance
                 of that hierarchy, particularly in the area of
                 nomenclature. Moreover, the earlier development of
                 taxon concepts based on the principle of descent
                 effectively proposed taxonomic categories of greater
                 theoretical significance than those of the Linnaean
                 hierarchy. The historical trend of granting increasing
                 importance to the principle of descent has reduced the
                 significance of the Linnaean hierarchy to the point
                 where it may no longer be worth retaining.",
  author       = "Kevin de Queiroz",
  booktitle    = "Aliso",
  number       = "2",
  pages        = "125--144",
  title        = "{The Linnaean Hierarchy and the Evolutionization of
                 Taxonomy, with Emphasis on the Problem of
                 Nomenclature}",
  volume       = "15",
  year         = "1997",
}

@Article{Schuh2003,
  abstract     = "The Linnaean system of nomenclature has been used and
                 adapted by biologists over a period of almost 250
                 years. Under the current system of codes, it is now
                 applied to more than 2 million species of organisms.
                 Inherent in the Linnaean system is the indication of
                 hierarchical relationships. The Linnaean system has
                 been justified primarily on the basis of stability.
                 Stability can be assessed on at least two grounds: the
                 absolute stability of names, irrespective of taxonomic
                 concept; and the stability of names under changing
                 concepts. Recent arguments have invoked conformity to
                 phylogenetic methods as the primary basis for choice of
                 nomenclatural systems, but even here stability of names
                 as they relate to monophyletic groups is stated as the
                 ultimate objective. The idea of absolute stability as
                 the primary justification for nomenclatural methods was
                 wrong from the start. The reasons are several. First,
                 taxa are concepts, no matter the frequency of
                 assertions to the contrary; as such, they are subject
                 to change at all levels and always will be, with the
                 consequence that to some degree the names we use to
                 refer to them will also be subject to change. Second,
                 even if the true nature of all taxa could be agreed
                 upon, the goal would require that we discover them all
                 and correctly recognize them for what they are. Much of
                 biology is far from that goal at the species level and
                 even further for supraspecific taxa. Nomenclature
                 serves as a tool for biology. Absolute stability of
                 taxonomic concepts-and nomenclature-would hinder
                 scientific progress rather than promote it. It can been
                 demonstrated that the scientific goals of systematists
                 are far from achieved. Thus, the goal of absolute
                 nomenclatural stability is illusory and misguided. The
                 primary strength of the Linnaean system is its ability
                 to portray hierarchical relationships; stability is
                 secondary. No single system of nomenclature can ever
                 possess all desirable attributes: i.e., convey
                 information on hierarchical relationships, provide
                 absolute stability in the names portraying those
                 relationships, and provide simplicity and continuity in
                 communicating the identities of the taxa and their
                 relationships. Aside from myriad practical problems
                 involved in its implementation, it must be concluded
                 that {"}phylogenetic nomenclature{"} would not provide
                 a more stable and effective system for communicating
                 information on biological classifications than does the
                 Linnaean system.",
  author       = "Randall T Schuh",
  doi          = "10.1663/0006-8101(2003)069[0059:TLSAIY]2.0.CO;2",
  ISBN         = "00068101",
  ISSN         = "0006-8101",
  journal      = "The Botanical Review",
  number       = "1",
  pages        = "59--78",
  publisher    = "Springer",
  title        = "{The Linnaean System and Its 250-Year Persistence}",
  URL          = "http://www.bioone.org/doi/abs/10.1663/0006-8101{\%}282003{\%}29069{\%}5B0059{\%}3ATLSAIY{\%}5D2.0.CO{\%}3B2",
  volume       = "69",
  year         = "2003",
}

@InCollection{Evenhuis2010,
  abstract     = "The advent of relational databasing and data storage
                 capacity, coupled with revolutionary advances in
                 molecular sequencing technology and specimen imaging,
                 have led to a taxonomic renaissance. Systema Naturae
                 250 - The Linnaean Ark maps the origins of this
                 renaissance, beginning with Linnaeus, through his
                 {"}apostles{"}, via the great unsung hero Charles
                 Davies Sherbon — arguably the father of biodiversity
                 informatics — up to the present day with the
                 Planetary Biodiversity Inventories and into the future
                 with the Encyclopedia of Life and web-based taxonomy.
                 The book provides scientific, historical, and cultural
                 documentation of the evolution of taxonomy and the
                 successful adaptation of the Linnaean nomenclature
                 system to that evolution. It underscores the importance
                 of taxonomic accuracy, not only for the classification
                 of living organisms, but for a more complete
                 understanding of the living world and its biodiversity.
                 The book also examines the role of technologies such as
                 DNA sequencing, specimen imaging, and electronic data
                 storage.A celebration of 250 years of the scientific
                 naming of animals, Systema Naturae 250 - The Linnaean
                 Ark records and explores the history of zoological
                 nomenclature and taxonomy, detailing current and future
                 activity in these fields. Descriptive taxonomy has been
                 in decline, despite the fact that the classification of
                 organisms through taxonomic studies provides the
                 foundation of our understanding of life forms. Packed
                 with illustrations and tables, this book establishes a
                 vision for the future of descriptive taxonomy and marks
                 the beginning of a period of rapid growth of taxonomic
                 knowledge.",
  author       = "Neal L. Evenhuis and Thomas Pape and Adrian C. Pont
                 and F. Christian Thompson and Andrew Polaszek and
                 Francis Group",
  booktitle    = "Systema Naturae 250 The Linnaean Ark",
  chapter      = "7",
  editor       = "Andrew Polaszek",
  ISBN         = "978-1-4200-9501-2",
  pages        = "75--83",
  title        = "{Systema Naturae 250 The Linnaean Ark}",
  year         = "2010",
}

@InProceedings{Ford2004,
  abstract     = "For decades we have been using Chomsky's generative
                 system of grammars, particularly context-free grammars
                 (CFGs) and regular expressions (REs), to express the
                 syntax of programming languages and protocols. The
                 power of generative grammars to express ambiguity is
                 crucial to their original purpose of modelling natural
                 languages, but this very power makes it unnecessarily
                 difficult both to express and to parse machine-oriented
                 languages using CFGs. Parsing Expression Grammars
                 (PEGs) provide an alternative, recognition-based formal
                 foundation for describing machine-oriented syntax,
                 which solves the ambiguity problem by not introducing
                 ambiguity in the first place. Where CFGs express
                 nondeterministic choice between alternatives, PEGs
                 instead use prioritized choice. PEGs address frequently
                 felt expressiveness limitations of CFGs and REs,
                 simplifying syntax definitions and making it
                 unnecessary to separate their lexical and hierarchical
                 components. A linear-time parser can be built for any
                 PEG, avoiding both the complexity and fickleness of LR
                 parsers and the inefficiency of generalized CFG
                 parsing. While PEGs provide a rich set of operators for
                 constructing grammars, they are reducible to two
                 minimal recognition schemas developed around 1970,
                 TS/TDPL and gTS/GTDPL, which are here proven equivalent
                 in effective recognition power.",
  author       = "Bryan Ford",
  booktitle    = "Proceedings of the 31st ACM SIGPLAN-SIGACT symposium
                 on Principles of programming languages",
  keywords     = "bnf,context-free grammars,grammars,gtdpl,lexical
                 analysis,packrat parsing,parsing,parsing
                 expression,regular expressions,scannerless,syntactic
                 predicates,tdpl,unified grammars",
  pages        = "111--122",
  title        = "{Parsing Expression Grammars: A Recognition-Based
                 Syntactic Foundation}",
  year         = "2004",
}

@Article{Akella2012,
  abstract     = "BACKGROUND: A scientific name for an organism can be
                 associated with almost all biological data. Name
                 identification is an important step in many text mining
                 tasks aiming to extract useful information from
                 biological, biomedical and biodiversity text sources. A
                 scientific name acts as an important metadata element
                 to link biological
                 information.$\backslash$n$\backslash$nRESULTS: We
                 present NetiNeti (Name Extraction from Textual
                 Information-Name Extraction for Taxonomic Indexing), a
                 machine learning based approach for recognition of
                 scientific names including the discovery of new species
                 names from text that will also handle misspellings, OCR
                 errors and other variations in names. The system
                 generates candidate names using rules for scientific
                 names and applies probabilistic machine learning
                 methods to classify names based on structural features
                 of candidate names and features derived from their
                 contexts. NetiNeti can also disambiguate scientific
                 names from other names using the contextual
                 information. We evaluated NetiNeti on legacy
                 biodiversity texts and biomedical literature (MEDLINE).
                 NetiNeti performs better (precision = 98.9{\%} and
                 recall = 70.5{\%}) compared to a popular dictionary
                 based approach (precision = 97.5{\%} and
                 recall = 54.3{\%}) on a 600-page biodiversity book
                 that was manually marked by an annotator. On a small
                 set of PubMed Central's full text articles annotated
                 with scientific names, the precision and recall values
                 are 98.5{\%} and 96.2{\%} respectively. NetiNeti found
                 more than 190,000 unique binomial and trinomial names
                 in more than 1,880,000 PubMed records when used on the
                 full MEDLINE database. NetiNeti also successfully
                 identifies almost all of the new species names
                 mentioned within web
                 pages.$\backslash$n$\backslash$nCONCLUSIONS: We present
                 NetiNeti, a machine learning based approach for
                 identification and discovery of scientific names. The
                 system implementing the approach can be accessed at
                 http://namefinding.ubio.org.",
  author       = "Lakshmi Manohar Akella and Catherine N Norton and
                 Holly Miller",
  doi          = "10.1186/1471-2105-13-211",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Akella, Norton, Miller - 2012 -
                 NetiNeti discovery of scientific names from text using
                 machine learning methods.pdf:pdf",
  ISBN         = "1471-2105 (Linking)",
  ISSN         = "1471-2105",
  journal      = "BMC bioinformatics",
  keywords     = "Animals,Artificial Intelligence,Classification,Data
                 Mining,MEDLINE,PubMed",
  number       = "1",
  pages        = "211",
  pmid         = "22913485",
  title        = "{NetiNeti: discovery of scientific names from text
                 using machine learning methods.}",
  URL          = "http://www.biomedcentral.com/1471-2105/13/211",
  volume       = "13",
  year         = "2012",
}

@Article{Patterson2006,
  abstract     = "Taxonomic indexing refers to a new array of
                 taxonomically intelligent network services that use
                 nomenclatural principles and elements of expert
                 taxonomic knowledge to manage information about
                 organisms. Taxonomic indexing was introduced to help
                 manage the increasing amounts of digital information
                 about biology. It has been designed to form a near
                 basal layer in a layered cyberinfrastructure that deals
                 with biological information. Taxonomic Indexing
                 accommodates the special problems of using names of
                 organisms to index biological material. It links
                 alternative names for the same entity (reconciliation),
                 and distinguishes between uses of the same name for
                 different entities (disambiguation), and names are
                 placed within an indefinite number of hierarchical
                 schemes. In order to access all information on all
                 organisms, Taxonomic indexing must be able to call on a
                 registry of all names in all forms for all organisms.
                 NameBank has been developed to meet that need.
                 Taxonomic indexing is an area of informatics that
                 overlaps with taxonomy, is dependent on the expert
                 input of taxonomists, and reveals the relevance of the
                 discipline to a wide audience.",
  author       = "David J Patterson and David Remsen and William a
                 Marino and Cathy Norton",
  doi          = "10.1080/10635150500541680",
  ISBN         = "1063-5157 (Print)",
  ISSN         = "1063-5157",
  journal      = "Systematic biology",
  number       = "3",
  pages        = "367--373",
  pmid         = "16861205",
  title        = "{Taxonomic indexing--extending the role of
                 taxonomy.}",
  volume       = "55",
  year         = "2006",
}

@Article{Leary2007,
  abstract     = "Web content syndication through standard formats such
                 as RSS and ATOM has become an increasingly popular
                 mechanism for publishers, news sources and blogs to
                 disseminate regularly updated content. These
                 standardized syndication formats deliver content
                 directly to the subscriber, allowing them to locally
                 aggregate content from a variety of sources instead of
                 having to find the information on multiple websites.
                 The uBioRSS application is a 'taxonomically
                 intelligent' service customized for the biological
                 sciences. It aggregates syndicated content from
                 academic publishers and science news feeds, and then
                 uses a taxonomic Named Entity Recognition algorithm to
                 identify and index taxonomic names within those data
                 streams. The resulting name index is cross-referenced
                 to current global taxonomic datasets to provide context
                 for browsing the publications by taxonomic group. This
                 process, called taxonomic indexing, draws upon services
                 developed specifically for biological sciences,
                 collectively referred to as 'taxonomic intelligence'.
                 Such value-added enhancements can provide biologists
                 with accelerated and improved access to current
                 biological content. AVAILABILITY:
                 http://names.ubio.org/rss/",
  author       = "Patrick R. Leary and David P. Remsen and Catherine N.
                 Norton and David J. Patterson and Indra Neil Sarkar",
  doi          = "10.1093/bioinformatics/btm109",
  ISBN         = "1367-4811 (Linking)",
  ISSN         = "13674803",
  journal      = "Bioinformatics",
  number       = "11",
  pages        = "1434--1436",
  pmid         = "17392332",
  title        = "{uBioRSS: Tracking taxonomic literature using RSS}",
  volume       = "23",
  year         = "2007",
}

@Article{Road2012,
  abstract     = "The International Commission on Zoological
                 Nomenclature has voted in favour of a revised version
                 of the amendment to the International Code of
                 Zoological Nomenclature that was proposed in 2008. The
                 purpose of the amendment is to expand and refine the
                 methods of publication allowed by the Code,
                 particularly in relation to electronic publication. The
                 amendment establishes an Official Register of
                 Zoological Nomenclature (with ZooBank as its online
                 version), allows electronic publication after 2011
                 under certain conditions, and disallows publication on
                 optical discs after 2012. The requirements for
                 electronic publications are that the work be registered
                 in ZooBank before it is published, that the work itself
                 state the date of publication and contain evidence that
                 registration has occurred, and that the ZooBank
                 registration state both the name of an electronic
                 archive intended to preserve the work and the ISSN or
                 ISBN associated with the work. Registration of new
                 scientific names and nomenclatural acts is not
                 required. The Commission has confirmed that ZooBank is
                 ready to handle the requirements of the amendment.",
  author       = "Cromwell Road",
  doi          = "10.3897/zookeys.219.3944",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Road - 2012 - Amendment of Articles
                 8,9, 10,21 and 78 of the International Code of
                 Zoological Nomenclature to expand and refine
                 methods.pdf:pdf",
  ISBN         = "1313-2970$\backslash$n1313-2989",
  ISSN         = "13132989",
  journal      = "ZooKeys",
  keywords     = "Amendment,Archiving,Electronic
                 publication,International Code of Zoological
                 Nomenclature,Official Register of Zoological
                 Nomenclature,ZooBank",
  pages        = "1--10",
  pmid         = "22977348",
  title        = "{Amendment of Articles 8,9, 10,21 and 78 of the
                 International Code of Zoological Nomenclature to expand
                 and refine methods of publication}",
  volume       = "219",
  year         = "2012",
}

@Article{Ereshefsky1994,
  abstract     = "Most biologists use the Linnaean system for
                 constructing classifications of the organic world. The
                 Linnaean system, however, has lost its theoretical
                 basis due to the shift in biology from creationist and
                 essentialist tenets to evolutionary theory. As a
                 result, the Linnaean system is both cumbersome and
                 ontologically vacuous. This paper illustrates the
                 problems facing the Linnaean system, and ends with a
                 brief introduction to an alternative approach to
                 biological classification.",
  author       = "Marc Ereshefsky",
  doi          = "10.2307/188208",
  ISBN         = "0031-8248",
  ISSN         = "0031-8248",
  journal      = "Philosophy of Science",
  number       = "2",
  pages        = "186--205",
  publisher    = "The University of Chicago Press",
  title        = "{Some Problems with the Linnaean Hierarchy}",
  URL          = "http://links.jstor.org/sici?sici=0031-8248(199406)61:2<186:SPWTLH>2.0.CO;2-8",
  volume       = "61",
  year         = "1994",
}

@Article{Pyle2003,
  abstract     = "Taxonomic research, as a field of biological sciences,
                 is fundamentally an exercise in information management.
                 Modern computer technology offers the potential for
                 both streamlining the taxo- nomic process, and
                 increasing its accuracy. Effective use of computer
                 technology to successfully manage taxonomic information
                 is predicated upon the implementation of data models
                 that accommodate the diverse forms of information
                 important to taxonomic researchers. Although
                 sophisticated data models have been developed to manage
                 some information relevant to taxo- nomic research
                 (e.g., natural history specimen information;
                 descriptive data relating to morpho- logical and
                 molecular characters of specimens), similarly robust
                 models for managing information about taxonomic names
                 and how they are applied to taxonomic concepts, though
                 they exist, have not attained widespread use and
                 adoption. Herein I describe portions of a relational
                 data model developed to manage information relevant to
                 taxonomic names and concepts. The core entities of the
                 described portions of this model are Agents,
                 References, and Assertions (along with their associated
                 Protonyms). Agents (people and organizations) in this
                 context refer primarily to taxonomic authorities.
                 References are broadly defined as date-stamped
                 information (usually, but not exclusively, in the form
                 of a publication), as documented by the Agents who
                 serve as the Reference authors. Assertions consist of
                 basic elemental information about the treatment of
                 taxonomic names by taxonomic authorities as documented
                 in a particular Reference, and correspond to what many
                 authors refer to as taxon “concepts”. Protonyms are
                 a special subset (subtype) of Assertions, which
                 constitute original descriptions of taxonomic names
                 (serving to unite multiple assertions pertaining to the
                 same taxonomic name), and include elements of botanical
                 Protologues and Basionyms. I also illustrate how these
                 core entities can serve as a foundation for taxonomic
                 names and concepts as integrated with other datasets,
                 such as biological specimens and observations (and, by
                 extension, geographic distributions and character
                 matrices). The broadest data content source used to
                 populate and test the data model is derived from a
                 systematic revision of the reef-fish family
                 Pomacanthidae (marine angelfishes). Additional datasets
                 used to test the imple- mentation of the data model
                 include specimen data from the Department of Natural
                 Sciences, Bishop Museum; nomenclatural data from The
                 Catalog of Fishes; and nomenclatural and bio-
                 geographic data from two published taxonomic catalogs
                 (insects and terrestrial mollusks in Hawai‘i). An
                 intuitive, feature-rich software application based on
                 Microsoft Access® has also been devel- oped in
                 conjunction with this data model, and will be the topic
                 of a future article. 1",
  author       = "Richard L. Pyle",
  journal      = "PhyloInformatics",
  pages        = "1--54",
  title        = "{Taxonomer: a Relational Data Model for managing
                 information relevant to taxonomic research}",
  URL          = "http://scholarspace.manoa.hawaii.edu/bitstream/handle/10125/6906/uhm{\_}phd{\_}4412{\_}r.pdf?sequence=2{\#}page=15",
  volume       = "1",
  year         = "2003",
}

@Article{Patterson2010,
  abstract     = "Those who seek answers to big, broad questions about
                 biology, especially questions emphasizing the organism
                 (taxonomy, evolution and ecology), will soon benefit
                 from an emerging names-based infrastructure. It will
                 draw on the almost universal association of organism
                 names with biological information to index and
                 interconnect information distributed across the
                 Internet. The result will be a virtual data commons,
                 expanding as further data are shared, allowing biology
                 to become more of a 'big science'. Informatics devices
                 will exploit this 'big new biology', revitalizing
                 comparative biology with a broad perspective to reveal
                 previously inaccessible trends and discontinuities, so
                 helping us to reveal unfamiliar biological truths.
                 Here, we review the first components of this freely
                 available, participatory and semantic Global Names
                 Architecture. ?? 2010 Elsevier Ltd.",
  author       = "D. J. Patterson and J. Cooper and P. M. Kirk and R. L.
                 Pyle and D. P. Remsen",
  doi          = "10.1016/j.tree.2010.09.004",
  ISBN         = "0169-5347",
  ISSN         = "01695347",
  journal      = "Trends in Ecology and Evolution",
  number       = "12",
  pages        = "686--691",
  pmid         = "20961649",
  publisher    = "Elsevier Ltd",
  title        = "{Names are key to the big new biology}",
  URL          = "http://dx.doi.org/10.1016/j.tree.2010.09.004",
  volume       = "25",
  year         = "2010",
}

@Article{Boyle2013,
  abstract     = "BACKGROUND: The digitization of biodiversity data is
                 leading to the widespread application of taxon names
                 that are superfluous, ambiguous or incorrect, resulting
                 in mismatched records and inflated species numbers. The
                 ultimate consequences of misspelled names and bad
                 taxonomy are erroneous scientific conclusions and
                 faulty policy decisions. The lack of tools for
                 correcting this 'names problem' has become a
                 fundamental obstacle to integrating disparate data
                 sources and advancing the progress of biodiversity
                 science.$\backslash$n$\backslash$nRESULTS: The TNRS, or
                 Taxonomic Name Resolution Service, is an online
                 application for automated and user-supervised
                 standardization of plant scientific names. The TNRS
                 builds upon and extends existing open-source
                 applications for name parsing and fuzzy matching. Names
                 are standardized against multiple reference taxonomies,
                 including the Missouri Botanical Garden's Tropicos
                 database. Capable of processing thousands of names in a
                 single operation, the TNRS parses and corrects
                 misspelled names and authorities, standardizes variant
                 spellings, and converts nomenclatural synonyms to
                 accepted names. Family names can be included to
                 increase match accuracy and resolve many types of
                 homonyms. Partial matching of higher taxa combined with
                 extraction of annotations, accession numbers and
                 morphospecies allows the TNRS to standardize taxonomy
                 across a broad range of active and legacy
                 datasets.$\backslash$n$\backslash$nCONCLUSIONS: We show
                 how the TNRS can resolve many forms of taxonomic
                 semantic heterogeneity, correct spelling errors and
                 eliminate spurious names. As a result, the TNRS can aid
                 the integration of disparate biological datasets.
                 Although the TNRS was developed to aid in standardizing
                 plant names, its underlying algorithms and design can
                 be extended to all organisms and nomenclatural codes.
                 The TNRS is accessible via a web interface at
                 http://tnrs.iplantcollaborative.org/ and as a RESTful
                 web service and application programming interface.
                 Source code is available at
                 https://github.com/iPlantCollaborativeOpenSource/TNRS/.",
  author       = "Brad Boyle and Nicole Hopkins and Zhenyuan Lu and Juan
                 Antonio {Raygoza Garay} and Dmitry Mozzherin and Tony
                 Rees and Naim Matasci and Martha L Narro and William H
                 Piel and Sheldon J McKay and Sonya Lowry and Chris
                 Freeland and Robert K Peet and Brian J Enquist",
  doi          = "10.1186/1471-2105-14-16",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Boyle et al. - 2013 - The taxonomic
                 name resolution service an online tool for automated
                 standardization of plant names.pdf:pdf",
  ISBN         = "1471-2105",
  ISSN         = "1471-2105",
  journal      = "BMC bioinformatics",
  keywords     = "Algorithms,Classification,Classification:
                 methods,Databases,
                 Factual,Internet,Names,Plants,Plants:
                 classification,Software,User-Computer Interface",
  number       = "1",
  pages        = "16",
  pmid         = "23324024",
  title        = "{The taxonomic name resolution service: an online tool
                 for automated standardization of plant names.}",
  URL          = "http://www.biomedcentral.com/1471-2105/14/16",
  volume       = "14",
  year         = "2013",
}

@Misc{gn-nsf-2011,
  key          = "NSF GN2",
  title        = "Collaborative Research: {ABI}: Innovation: The Global
                 Names Architecture, an infrastructure for unifying
                 taxonomic databases and services for managers of
                 biological information",
  URL          = "http://nsf.gov/awardsearch/showAward?AWD\_ID=1062387",
  urldate      = "18 April 2017",
}

@Misc{worms,
  key          = "WoRMS",
  title        = "Wo{RMS} - World Register of Marine Species",
  URL          = "http://www.marinespecies.org/",
  urldate      = "18 April 2017",
}

@Misc{carms,
  key          = "CaRMS",
  title        = "Canadian Register of Marine Species",
  URL          = "http://www.marinespecies.org/carms/",
  urldate      = "18 April 2017",
}

@Misc{iplant,
  key          = "iPlant",
  title        = "iPlant Taxonomic Name Resolution Service",
  URL          = "http://tnrs.iplantcollaborative.org/",
  urldate      = "18 April 2017",
}

@Misc{eol,
  key          = "EOL",
  title        = "Encyclopedia of Life",
  URL          = "http://eol.org/",
  urldate      = "18 April 2017",
}

@Misc{gbif,
  key          = "GBIF",
  title        = "Global Biodiversity Informatics Facility",
  URL          = "http://gbif.org/",
  urldate      = "18 April 2017",
}

@Misc{ala,
  key          = "ALA",
  title        = "Atlas of Living Australia -- sharing biodiversity
                 knowledge",
  URL          = "http://www.ala.org.au/",
  urldate      = "18 April 2017",
}

@Misc{ubio:parser,
  key          = "uBioParser",
  title        = "uBio Name Parser",
  URL          = "http://www.ubio.org/tools/explode.php",
  urldate      = "18 April 2017",
}

@Misc{botsociety:parser,
  key          = "BotSocietyParser",
  title        = "Botanical Society of Britain and Ireland Taxon Name
                 Parser",
  URL          = "http://bsbidb.org.uk/taxonnameparser.php",
  urldate      = "18 April 2017",
}

@Misc{bold,
  key          = "BOLD",
  title        = "Barcode of Life",
  URL          = "http://www.barcodeoflife.org/",
  urldate      = "18 April 2017",
}

@Misc{col,
  key          = "CoL",
  title        = "Catalogue of Life",
  URL          = "http://catalogueoflife.com/",
  urldate      = "18 April 2017",
}

@Misc{idigbio,
  key          = "iDIGBIO",
  title        = "iDigBio",
  URL          = "https://www.idigbio.org/",
  urldate      = "18 April 2017",
}

@Misc{vertnet,
  key          = "VertNet",
  title        = "VertNet",
  URL          = "http://vertnet.org/",
  urldate      = "18 April 2017",
}

@Misc{treetop,
  key          = "Sobo, N.",
  title        = "Treetop",
  URL          = "https://github.com/cjheath/treetop",
  urldate      = "18 April 2017",
}

@Misc{biodiversity,
  key          = "Mozzherin, D.",
  title        = "GlobalNamesArchitecture/biodiversity: Scientific Name
                 Parser",
  URL          = "https://github.com/GlobalNamesArchitecture/biodiversity",
  urldate      = "18 April 2017",
}

@Misc{gnrd,
  key          = "{Shorthouse, D.P., Mozzherin, D.}",
  title        = "Global Names Recognition and Discovery",
  URL          = "https://gnrd.globalnames.org",
  urldate      = "18 April 2017",
}

@Misc{gn:index,
  key          = "{Mozzherin, D.}",
  title        = "Global Names Index",
  URL          = "http://gni.globalnames.org",
  urldate      = "18 April 2017",
}

@Misc{gn:resolver,
  key          = "{Mozzherin, D., Shorthouse, D.P.}",
  title        = "Global Names Resolver",
  URL          = "http://resolver.globalnames.org",
  urldate      = "18 April 2017",
}

@Misc{gnparser,
  key          = "{Mozzherin, D., Myltsev, A.}",
  title        = "GlobalNamesArchitecture/gnparser: Split scientific
                 names to meaningful elements with meta information",
  URL          = "https://github.com/GlobalNamesArchitecture/gnparser",
  urldate      = "18 April 2017",
}

@Misc{gnparser-web,
  key          = "{Mozzherin, D., Myltsev, A.}",
  title        = "Global Names Parser Web App",
  URL          = "http://parser.globalnames.org",
  urldate      = "18 April 2017",
}

@Misc{gnparser-docker,
  key          = "{Mozzherin, D., Myltsev, A.}",
  title        = "Global Names Parser Docker Image",
  URL          = "https://hub.docker.com/r/gnames/gnparser/",
  urldate      = "18 April 2017",
}

@Misc{gbifNameParser,
  key          = "Döring, M.",
  title        = "{GBIF} name-parser",
  URL          = "https://github.com/gbif/name-parser/releases/tag/name-parser-2.10",
  urldate      = "18 April 2017",
}

@Misc{bdiv-downloads,
  key          = "BioGems",
  title        = "RubyGems --- biodiversity search",
  URL          = "https://rubygems.org/search?utf8=\%E2\%9C\%93\&query=biodiversity",
  urldate      = "18 April 2017",
}

@Misc{gnparser-json,
  key          = "{Mozzherin, D., Myltsev, A.}",
  title        = "{JSON} schema for gnparser output",
  URL          = "http://globalnames.org/schemas/gnparser.json",
  urldate      = "18 April 2017",
}

@Misc{globalnames-web,
  key          = "GN Web",
  title        = "Global Names Home",
  URL          = "http://globalnames.org",
  urldate      = "18 April 2017",
}

@Misc{gnmatcher,
  key          = "Myltsev, A.",
  title        = "GlobalNamesArchitecture/gnmatcher: Fuzzy matching
                 library for scientific names with emphasis on
                 performance and scalability",
  URL          = "https://github.com/GlobalNamesArchitecture/gnmatcher",
  urldate      = "18 April 2017",
}

@Misc{sysopia,
  key          = "{Wijesooriya, V., Mozzherin, D.}",
  title        = "{EOL}/sysopia: SysOp dashboard for 2-20 hosts",
  URL          = "https://github.com/EOL/sysopia",
  urldate      = "18 April 2017",
}

@Misc{gn-crossmap,
  key          = "{Mozzherin, D.}",
  title        = "GlobalNamesArchitecture/gn\_crossmap: Ruby Gem which
                 crossmaps a list of scientific names to names from a
                 data source in {GN} Index",
  URL          = "https://github.com/GlobalNamesArchitecture/gn\_crossmap",
  urldate      = "18 April 2017",
}

@Misc{gn-idigbio,
  key          = "{Mozzherin, D., Traub, G.}",
  title        = "GlobalNamesArchitecture/idigbio_client: Ruby wrapper
                 for iDigBio {API}",
  URL          = "https://github.com/GlobalNamesArchitecture/idigbio\_client",
  urldate      = "18 April 2017",
}

@Misc{gnresolver,
  key          = "{Myltsev, A., Mozzherin, D.}",
  title        = "GlobalNamesArchitecture/gnresolver",
  URL          = "https://github.com/GlobalNamesArchitecture/gnresolver",
  urldate      = "18 April 2017",
}

@Misc{dwca-hunter,
  key          = "Mozzherin, D.",
  title        = "DarwinCore Archive Hunter",
  URL          = "https://github.com/GlobalNamesArchitecture/dwca-hunter",
  urldate      = "18 April 2017",
}

@Misc{dwc-archive,
  key          = "Mozzherin, D.",
  title        = "DarwinCore Archive Library",
  URL          = "https://rubygems.org/gems/dwc-archive",
  urldate      = "18 April 2017",
}

@Misc{biogems,
  key          = "biogems",
  title        = "Ruby Libraries for Biology",
  URL          = "http://biogems.info/",
  urldate      = "18 April 2017",
}

@Misc{gbifparser,
  key          = "Mozzherin, D.",
  title        = "gbifparser: v0.1.0",
  month        = dec,
  year         = "2015",
  doi          = "10.5281/zenodo.34848",
  URL          = "http://dx.doi.org/10.5281/zenodo.34848",
  urldate      = "18 April 2017",
}

@Misc{parboiled2,
  key          = "{Doenitz, M., Myltsev, A.}",
  title        = "A macro-based {PEG} parser generator for Scala 2.10+",
  URL          = "https://github.com/sirthias/parboiled2",
  urldate      = "18 April 2017",
}

@Misc{parboiled2-gna,
  key          = "{Doenitz, M., Myltsev, A.}",
  title        = "GlobalNamesArchitecture/parboiled2: A macro-based {PEG} parser generator for Scala 2.10+",
  URL          = "https://github.com/GlobalNamesArchitecture/parboiled2",
  urldate      = "18 April 2017",
  doi          = "10.5281/zenodo.50340"
}

@Misc{maven-globalnames,
  key          = "{Mozzherin, D., Myltsev A.}",
  title        = "Maven Central: Global Names Artifacts",
  URL          = "https://search.maven.org/#search|ga|1|globalnames",
  urldate      = "18 April 2017",
}

@Article{Bengston1988,
  abstract     = "Open nomenclature plays an important role in taxonomic
                 decisions by palaeontologists, but usage and
                 interpretation of the signs employed vary considerably.
                 Prevailing fashion seems to favour aff. to indicate
                 affinity of a potentially new, as yet undescribed
                 species with a known species, whereas cf. and ?
                 indicate uncertainty. Use of aff., cf., and? for
                 different degrees of uncertainty, as recommended by
                 some workers, leads to instability in interpretation.
                 Abbreviated taxonomic expressions such as 'Trichiurus
                 cf. lepturus' are un- ambiguous and are to be preferred
                 to 'Trichiurus cf. T. lepturus'. Careful, judicious use
                 of open nomenclature is to be encouraged and should be
                 covered by the International Code of Zoological
                 Nomenclature. A set of recommendations is given.",
  author       = "Peter Bengston",
  doi          = "10.4039/Ent8431-1",
  ISBN         = "0031-0239",
  ISSN         = "1918-3240",
  journal      = "Palaeontology",
  number       = "1",
  pages        = "223--227",
  title        = "{Open nomenclature}",
  volume       = "31",
  year         = "1988",
}

@Article{yu1997handbook,
  title        = "Handbook of Formal Languages, Regular Languages",
  author       = "Sheng Yu",
  year         = "1997",
  publisher    = "Springer Verlag",
}

@TechReport{odersky2004overview,
  title        = "An overview of the Scala programming language",
  author       = "Martin Odersky and Philippe Altherr and Vincent Cremet
                 and Burak Emir and Sebastian Maneth and St{\'e}phane
                 Micheloud and Nikolay Mihaylov and Michel Schinz and
                 Erik Stenman and Matthias Zenger",
  year         = "2004",
}

@Article{moors2008parser,
  title        = "Parser combinators in Scala",
  author       = "Adriaan Moors and Frank Piessens and Martin Odersky",
  year         = "2008",
  publisher    = "Department of Computer Science, KU Leuven",
}

@InProceedings{Burmako:2013:SML:2489837.2489840,
  author       = "Eugene Burmako",
  title        = "Scala Macros: Let Our Powers Combine!: On How Rich
                 Syntax and Static Types Work with Metaprogramming",
  booktitle    = "Proceedings of the 4th Workshop on Scala",
  series       = "SCALA '13",
  year         = "2013",
  ISBN         = "978-1-4503-2064-1",
  location     = "Montpellier, France",
  pages        = "3:1--3:10",
  articleno    = "3",
  numpages     = "10",
  URL          = "http://doi.acm.org/10.1145/2489837.2489840",
  doi          = "10.1145/2489837.2489840",
  acmid        = "2489840",
  publisher    = "ACM",
  address      = "New York, NY, USA",
  keywords     = "Scala, compile-time metaprogramming, domain-specific
                 languages, type classes",
}

@InProceedings{mikolov2013distributed,
  title        = {Distributed representations of words and phrases and their compositionality},
  author       = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and
                 Dean, Jeff},
  booktitle    = {Advances in neural information processing systems 26},
  publisher    = {Curran Associates, Inc.},
  address      = {USA},
  pages        = {3111--3119},
  year         = {2013},
}

@Article{Myltsev:inpress-a,
  author       = "Alexander Myltsev and Mathias Doenitz",
  title        = "parboiled2: a macro-based approach for effective generators of parsing expressions grammars in Scala",
  year         = "\noop{2017}in preparation"
}

@Article{bray2014javascript,
  title        = "The JavaScript Object Notation ({JSON}) Data
                 Interchange Format",
  author       = "Tim Bray",
  year         = "2014",
}

@Article{wampler2011scala,
  title        = "Scala web frameworks: Looking beyond lift",
  author       = "Dean Wampler",
  journal      = "IEEE Internet Computing",
  number       = "5",
  pages        = "87--94",
  year         = "2011",
  publisher    = "IEEE",
}

@Article{Rees2014,
  abstract     = "Misspellings of organism scientific names create
                 barriers to optimal storage and organization of
                 biological data, reconciliation of data stored under
                 different spelling variants of the same name, and
                 appropriate responses from user queries to taxonomic
                 data systems. This study presents an analysis of the
                 nature of the problem from first principles, reviews
                 some available algorithmic approaches, and describes
                 Taxamatch, an improved name matching solution for this
                 information domain. Taxamatch employs a custom Modified
                 Damerau-Levenshtein Distance algorithm in tandem with a
                 phonetic algorithm, together with a rule-based approach
                 incorporating a suite of heuristic filters, to produce
                 improved levels of recall, precision and execution time
                 over the existing dynamic programming algorithms
                 n-grams (as bigrams and trigrams) and standard edit
                 distance. Although entirely phonetic methods are faster
                 than Taxamatch, they are inferior in the area of recall
                 since many real-world errors are non-phonetic in
                 nature. Excellent performance of Taxamatch (as recall,
                 precision and execution time) is demonstrated against a
                 reference database of over 465,000 genus names and 1.6
                 million species names, as well as against a range of
                 error types as present at both genus and species levels
                 in three sets of sample data for species and four for
                 genera alone. An ancillary authority matching component
                 is included which can be used both for misspelled names
                 and for otherwise matching names where the associated
                 cited authorities are not identical.",
  author       = "Tony Rees",
  doi          = "10.1371/journal.pone.0107510",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Rees - 2014 - Taxamatch, an
                 algorithm for near ('fuzzy') matching of scientific
                 names in taxonomic databases.pdf:pdf",
  ISSN         = "1932-6203",
  journal      = "PloS one",
  keywords     = "Algorithms,Classification,Classification:
                 methods,Databases, Factual",
  month        = jan,
  number       = "9",
  pages        = "e107510",
  pmid         = "25247892",
  publisher    = "Public Library of Science",
  title        = "{Taxamatch, an algorithm for near ('fuzzy') matching
                 of scientific names in taxonomic databases.}",
  URL          = "http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0107510",
  volume       = "9",
  year         = "2014",
}

@Article{Pyle2016,
  abstract     = "For more than 250 years, the taxonomic enterprise has
                 remained almost unchanged. Certainly, the tools of the
                 trade have improved: months-long journeys aboard
                 sailing ships have been reduced to hours aboard jet
                 airplanes; advanced technology allows humans to access
                 environments that were once utterly inaccessible; GPS
                 has replaced crude maps; digital hi-resolution imagery
                 provides far more accurate renderings of organisms that
                 even the best commissioned artists of a century ago;
                 and primitive candle-lit microscopes have been replaced
                 by an array of technologies ranging from scanning
                 electron microscopy to DNA sequencing. But the basic
                 paradigm remains the same. Perhaps the most
                 revolutionary change of all – which we are still in
                 the midst of, and which has not yet been fully realized
                 – is the means by which taxonomists manage and
                 communicate the information of their trade. The rapid
                 evolution in recent decades of computer database
                 management software, and of information dissemination
                 via the Internet, have both dramatically improved the
                 potential for streamlining the entire taxonomic
                 process. Unfortunately, the potential still largely
                 exceeds the reality. The vast majority of taxonomic
                 information is either not yet digitized, or digitized
                 in a form that does not allow direct and easy access.
                 Moreover, the information that is easily accessed in
                 digital form is not yet seamlessly interconnected. In
                 an effort to bring reality closer to potential, a loose
                 affiliation of major taxonomic resources, including
                 GBIF, the Encyclopedia of Life, NBII, Catalog of Life,
                 ITIS, IPNI, ICZN, Index Fungorum, and many others have
                 been crafting a “Global Names Architecture” (GNA).
                 The intention of the GNA is not to replace any of the
                 existing taxonomic data initiatives, but rather to
                 serve as a dynamic index to interconnect them in a way
                 that streamlines the entire taxonomic enterprise: from
                 gathering specimens in the field, to publication of new
                 taxa and related data.",
  author       = "Richard Pyle",
  doi          = "10.3897/zookeys.550.10009",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Pyle - 2016 - Towards a Global Names
                 Architecture The future of indexing scientific
                 names.pdf:pdf",
  ISBN         = "10.3897/zookeys.550.10009",
  ISSN         = "1313-2970",
  journal      = "ZooKeys",
  keywords     = "Biodiversity Data,Carl Linnaeus,Charles Davies
                 Sherborn,Global Names Index,Global Names Usage
                 Bank,Taxonomy,ZooBank Biodiversity Library",
  language     = "en",
  month        = jan,
  pages        = "261--281",
  publisher    = "Pensoft Publishers",
  title        = "{Towards a Global Names Architecture: The future of
                 indexing scientific names}",
  URL          = "http://zookeys.pensoft.net/articles.php?id=6241{\&}display{\_}type=element{\&}element{\_}type=8{\&}element{\_}id=592{\&}element{\_}name=",
  volume       = "550",
  year         = "2016",
}

@Article{Remsen2016,
  author       = "David Remsen",
  doi          = "10.3897/zookeys.550.9546",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Remsen - 2016 - The use and limits
                 of scientific names in biological informatics.pdf:pdf",
  ISSN         = "1313-2970",
  journal      = "ZooKeys",
  language     = "en",
  month        = jan,
  pages        = "207--223",
  publisher    = "Pensoft Publishers",
  title        = "{The use and limits of scientific names in biological
                 informatics}",
  URL          = "http://zookeys.pensoft.net/articles.php?id=6234",
  volume       = "550",
  year         = "2016",
}

@Article{Patterson2016,
  author =       "David Patterson and Dmitry Mozzherin and David
                 Shorthouse and Anne Thessen",
  doi =          "10.3897/BDJ.4.e8080",
  file =         ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Patterson et al. - 2016 - Challenges
                 with using names to link digital biodiversity
                 information.pdf:pdf",
  abstract =     "The need for a names-based cyber-infrastructure for
                 digital biology is based on the argument that
                 scientific names serve as a standardized metadata
                 system that has been used consistently and near
                 universally for 250 years. As we move towards
                 data-centric biology, name-strings can be called on to
                 discover, index, manage, and analyze accessible digital
                 biodiversity information from multiple sources. Known
                 impediments to the use of scientific names as metadata
                 include synonyms, homonyms, mis-spellings, and the use
                 of other strings as identifiers. We here compare the
                 name-strings in GenBank, Catalogue of Life (CoL), and
                 the Dryad Digital Repository (DRYAD) to assess the
                 effectiveness of the current names-management toolkit
                 developed by Global Names to achieve interoperability
                 among distributed data sources. New tools that have
                 been used here include Parser (to break name-strings
                 into component parts and to promote the use of
                 canonical versions of the names), a modified TaxaMatch
                 fuzzy-matcher (to help manage typographical,
                 transliteration, and OCR errors), and Cross-Mapper (to
                 make comparisons among data sets). The data sources
                 include scientific names at multiple ranks; vernacular
                 (common) names; acronyms; strain identifiers and other
                 surrogates including idiosyncratic abbreviations and
                 concatenations. About 40\% of the name-strings in
                 GenBank are scientific names representing about 400,000
                 species or infraspecies and their synonyms. Of the
                 formally-named terminal taxa (species and lower taxa)
                 represented, about 82\% have a match in CoL. Using a
                 subset of content in DRYAD, about 45\% of the
                 identifiers are names of species and infraspecies, and
                 of these only about a third have a match in CoL. With
                 simple processing, the extent of matching between DRYAD
                 and CoL can be improved to over 90\%. The findings
                 confirm the necessity for name-processing tools and the
                 value of scientific names as a mechanism to
                 interconnect distributed data, and identify specific
                 areas of improvement for taxonomic data sources. Some
                 areas of diversity (bacteria and viruses) are not well
                 represented by conventional scientific names, and they
                 and other forms of strings (acronyms, identifiers, and
                 other surrogates) that are used instead of names need
                 to be managed in reconciliation services (mapping
                 alternative name-strings for the same taxon together).
                 On-line resolution services will bring older scientific
                 names up to date or convert surrogate name-strings to
                 scientific names should such names exist. Examples are
                 given of many of the aberrant forms of ‘names’ that
                 make their way into these databases. The occurrence of
                 scientific names with incorrect authors, such as
                 chresonyms within synonymy lists, is a quality-control
                 issue in need of attention. We propose a
                 future-proofing solution that will empower stakeholders
                 to take advantage of the name-based infrastructure at
                 little cost. This proposed infrastructure includes a
                 standardized system that adopts or creates UUIDs for
                 name-strings, software that can identify name-strings
                 in sources and apply the UUIDs, reconciliation and
                 resolution services to manage the name-strings, and an
                 annotation environment for quality control by users of
                 name-strings.",
  ISSN =         "1314-2828",
  journal =      "Biodiversity Data Journal",
  language =     "en",
  month =        may,
  pages =        "e8080",
  publisher =    "Pensoft Publishers",
  title =        "Challenges with using names to link digital
                 biodiversity information",
  URL =          "http://bdj.pensoft.net/articles.php?id=8080",
  volume =       "4",
  year =         "2016",
  webnote =      "We cross-map names from Catalogue of Life, DRYAD and
                 GenBank and show that while exact string-matching
                 allowed to connect less than 15\% of names, usage of
                 Global Names tools increased the success rate up to
                 90\%",
}

@Article{Mozzherin:inpress-a,
  author       = "{Mozzherin D., Myltsev A., Patterson D.}",
  journal      = "BMC Bioinformatics",
  title        = "{GNParser: a powerful scientific names parser}",
  year         = "\noop{2016}in preparation",
}

@Article{Zermoglio2016,
  abstract     = "Taxonomic names associated with digitized
                 biocollections labels have flooded into repositories
                 such as GBIF, iDigBio and VertNet. The names on these
                 labels are often misspelled, out of date, or present
                 other problems, as they were often captured only once
                 during accessioning of specimens, or have a history of
                 label changes without clear provenance. Before records
                 are reliably usable in research, it is critical that
                 these issues be addressed. However, still missing is an
                 assessment of the scope of the problem, the effort
                 needed to solve it, and a way to improve effectiveness
                 of tools developed to aid the process. We present a
                 carefully human-vetted analysis of 1000 verbatim
                 scientific names taken at random from those published
                 via the data aggregator VertNet, providing the first
                 rigorously reviewed, reference validation data set. In
                 addition to characterizing formatting problems, human
                 vetting focused on detecting misspelling, synonymy, and
                 the incorrect use of Darwin Core. Our results reveal a
                 sobering view of the challenge ahead, as less than
                 47{\%} of name strings were found to be currently
                 valid. More optimistically, nearly 97{\%} of name
                 combinations could be resolved to a currently valid
                 name, suggesting that computer-aided approaches may
                 provide feasible means to improve digitized content.
                 Finally, we associated names back to biocollections
                 records and fit logistic models to test potential
                 drivers of issues. A set of candidate variables
                 (geographic region, year collected, higher-level clade,
                 and the institutional digitally accessible data volume)
                 and their 2-way interactions all predict the
                 probability of records having taxon name issues, based
                 on model selection approaches. We strongly encourage
                 further experiments to use this reference data set as a
                 means to compare automated or computer-aided taxon name
                 tools for their ability to resolve and improve the
                 existing wealth of legacy data.",
  author       = "Paula F Zermoglio and Robert P Guralnick and John R
                 Wieczorek",
  doi          = "10.1371/journal.pone.0146894",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Zermoglio, Guralnick, Wieczorek -
                 2016 - A Standardized Reference Data Set for Vertebrate
                 Taxon Name Resolution.pdf:pdf",
  ISSN         = "1932-6203",
  journal      = "PLoS One",
  month        = jan,
  number       = "1",
  pages        = "1--20",
  pmid         = "26760296",
  publisher    = "Public Library of Science",
  title        = "{A Standardized Reference Data Set for Vertebrate
                 Taxon Name Resolution}",
  URL          = "http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0146894",
  volume       = "11",
  year         = "2016",
}

@Article{Sautter2006,
  abstract     = "Most of the literature on natural history is hidden in
                 millions of pages stacked up in our libraries. Various
                 initiatives aim now at making these publications
                 digitally accessible and searchable, applying xml-mark
                 up technologies. The unique biological names play a
                 crucial role to link content related to a particular
                 taxon. Thus discovering and marking them up is
                 extremely important. Since their manual extraction and
                 markup is cumbersome and time-intensive, it needs be
                 automated. In this paper, we present computational
                 linguistics techniques and evaluate how they can help
                 to extract taxonomic names auto-matically. We build on
                 an existing approach for extraction of such names
                 (Koning et al. 2005) and combine it with several other
                 learning techniques. We apply them to the texts
                 sequentially so that each technique can use the results
                 from the preceding ones. In particular, we use
                 structural rules, dynamic lexica with fuzzy lookups,
                 and word-level language recognition. We use legacy
                 documents from different sources and times as test bed
                 for our evaluation. The experimental results for our
                 combining approach (FAT) show greater than 99{\%}
                 precision and recall. They reveal the potential of
                 computational linguis-tics techniques towards an
                 automated markup of biosystematics publications.",
  author       = "Guido Sautter and Klemens B{\"{o}}hm and Donat
                 Agosti",
  doi          = "10.2307/1216144",
  file         = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
                 Desktop/Downloaded/Sautter, B{\"{o}}hm, Agosti - 2006 -
                 A combining approach to find all taxon names
                 (FAT).pdf:pdf",
  ISSN         = "00400262",
  journal      = "Biodiversity Informatics",
  keywords     = "american,becoming a
                 major,bhl,biodiversity,biosystematics
                 literature,digital library,e,edu,g,heritage
                 library,issue,mass digitization,named entity
                 recognition,si,systematics,taxonomic name
                 extraction,www",
  language     = "en",
  month        = dec,
  pages        = "46--58",
  title        = "{A combining approach to Find All taxon names ( FAT )
                 in legacy biosystematics literature}",
  URL          = "https://journals.ku.edu/index.php/jbi/article/view/34/19",
  volume       = "3",
  year         = "2006",
}

@Article{Kluyver2013,
  author       = "Thomas A. Kluyver and Colin P. Osborne",
  title        = "Taxonome: a software package for linking biological
                 species data",
  journal      = "Ecology and Evolution",
  volume       = "3",
  number       = "5",
  ISSN         = "2045-7758",
  URL          = "http://dx.doi.org/10.1002/ece3.529",
  doi          = "10.1002/ece3.529",
  pages        = "1262--1265",
  keywords     = "Binomials, fuzzy matching, name matching, synonyms",
  year         = "2013",
}