-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgnparser.bib
2082 lines (1988 loc) · 104 KB
/
gnparser.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@Preamble{"\newcommand{\noop}[1]{}"}
@Book{ICPN,
author = "{Cantino, P.D., de Queiroz, K.}",
title = "{International Code of Phylogenetic Nomenclature}",
year = "2010",
pages = "1--102",
}
@Book{ICNCP,
author = "Eds. {Brickell, C.D., et al}",
title = "{International Code of Nomenclature for Cultivated
Plants}",
booktitle = "ISHS",
year = "2009",
pages = "1--184",
edition = "Eighth",
ISBN = "978-0-643-09440-6",
}
@Book{ICTV,
author = "Eds. {King, A.M.Q., Adams, M.J., Carstens, E.B.,
Lefkowitz, E.J. }",
title = "{Virus taxonomy: classification and nomenclature of
viruses: Ninth Report of the International Committee on
Taxonomy of Viruses.}",
booktitle = "San Diego, Elsevier",
year = "2011",
pages = "1--1338",
ISBN = "0-12-384684-6",
}
@Book{ICNB,
author = "Eds. {Lapage S.P., Sneath P.H.A., Lessel E.F., et
al}",
title = "{International Code of Nomenclature of Bacteria:
Bacteriological Code, 1990 Revision}",
booktitle = "ASM Press",
year = "1992",
pages = "1--232",
ISBN = "1-55581-039-X",
}
@Book{ICZN,
author = "{International Commission on Zoological
Nomenclature}",
title = "{International Code of Zoological Nomenclature}",
booktitle = "The International Trust for Zoological Nomenclature",
year = "1999",
pages = "1--336",
edition = "Forth",
ISBN = "0-85301-006-4",
}
@Book{FNA2002,
annote = "Citation for Carex scirpoidea subsp. convoluta
(K{\"{u}}k.) D.A. Dunlop",
author = "Eds. {Flora of North America Editorial Committee}",
booktitle = "Oxford University Press, New York",
pages = "551",
title = "{Flora of North America. Vol. 23, Magnoliophyta:
Commelinidae (in part): Cyperaceae}",
year = "2002",
}
@Book{aho1992foundations,
title = "Foundations of computer science",
author = "Alfred V Aho and Jeffrey D Ullman",
volume = "2",
year = "1992",
publisher = "Computer Science Press New York",
address = "USA",
}
@Book{charniak1996statistical,
author = {Eugene Charniak},
title = {Statistical language learning},
year = {1996},
publisher = {{MIT} press},
address = "USA",
isbn = {978-0-262-53141-2},
}
@Book{aho1972theory,
author = {Aho, Alfred V. and Ullman, Jeffrey D.},
title = {The Theory of Parsing, Translation, and Compiling},
year = {1972},
isbn = {0-13-914556-7},
publisher = {Prentice-Hall, Inc.},
address = {Upper Saddle River, NJ, USA},
}
@Inproceedings{asveld1995fuzzy,
booktitle = {Proceedings of the Fourth International Workshop on Parsing Technologies IWPT'95},
title = {A Fuzzy Approach to Erroneous Inputs in Context-Free Language Recognition},
author = {P.R.J. {Asveld}},
address = {Prague, Czech Republic},
publisher = {Institute of Formal and Applied Linguistics, Charles University},
year = {1995},
pages = {14--25},
}
@Article{nadeau2007survey,
title = {A survey of named entity recognition and classification},
author = {{Nadeau}, David and {Sekine}, Satoshi},
journal = {Lingvisticae Investigationes},
volume = {30},
number = {1},
pages = {3--26},
year = {2007},
publisher = {John Benjamins publishing company},
doi = {10.1075/li.30.1.03nad},
ISSN = {0378-4169}
}
@Article{VandenBerghe2015,
abstract = "In the domain of biological classification there are
several taxon name matching services that can search
for a species scientific name in a large collection of
taxonomic names. Many of these services are available
online, and many others run on computers of individual
scientists. While these systems may work very well,
most suffer from the fact that the list of names used
as a reference, and the criteria to decide on a match,
are hard-coded in the engine that performs the name
matching. In this paper we present BiOnym, a taxon name
matching system that separates reference namelists,
search criteria and matching engine. The user is
offered a choice of several taxonomic reference lists,
including the option to upload his/her own list onto
the system. Furthermore, BiOnym is a flexible workflow,
which embeds and combines techniques using lexical
matching algorithms as well as expert knowledge. It is
also an open platform allowing developers to contribute
with new techniques. In this paper we demonstrate the
benefits brought by this approach in terms of the
efficiency and effectiveness of the information
retrieval process with respect to other solutions.",
author = "Edward {Vanden Berghe} and Gianpaolo Coro and Nicolas
Bailly and Fabio Fiorellato and Caselyn Aldemita and
Anton Ellenbroek and Pasquale Pagano",
doi = "10.1016/j.ecoinf.2015.05.004",
file = ":home/dimus/dl/vanden berghe et al 2015.pdf:pdf",
ISSN = "15749541",
journal = "Ecological Informatics",
keywords = "Name matcher chain,Taxon name matching,Taxon name
parsing,Taxonomic Authority File,Taxonomic
nomenclature,Taxonomy",
pages = "29--41",
title = "{Retrieving taxa names from large biodiversity data
collections using a flexible matching workflow}",
URL = "http://www.sciencedirect.com/science/article/pii/S1574954115000825",
volume = "28",
year = "2015",
}
@Article{LeaseM.2005,
abstract = "We present a preliminary study of several parser
adaptation techniques evaluated on the GENIA corpus of
MEDLINE abstracts [1,2]. We begin by observing that the
Penn Treebank (PTB) is lexically impoverished when
measured on various genres of scientific and technical
writing, and that this significantly impacts parse
accuracy. To resolve this without requiring in-domain
treebank data, we show how existing domain-specific
lexical resources may be leveraged to augment
PTB-training: part-of-speech tags, dictionary
collocations, and named-entities. Using a
state-of-the-art statistical parser [3] as our
baseline, our lexically-adapted parser achieves a
14.2{\%} reduction in error. With oracle-knowledge of
named-entities, this error reduction improves to
21.2{\%}. © Springer-Verlag Berlin Heidelberg 2005.",
author = "Charniak E {Lease M.}",
doi = "10.1007/11562214{\_}6",
ISBN = "3-540-29172-5; 978-3-540-29172-5",
ISSN = "03029743",
journal = "Lecture Notes in Computer Science (including subseries
Lecture Notes in Artificial Intelligence and Lecture
Notes in Bioinformatics)",
keywords = "Bibliographic retrieval systems,Biomedical
engineering; Error analysis; Informatio,Dictionary
collocations; Error reduction; Lexical",
pages = "58--69",
title = "{Parsing biomedical literature}",
URL = "http://www.scopus.com/inward/record.url?eid=2-s2.0-33646016255{\&}partnerID=40{\&}md5=de9cc68600322e7a88fdd7fa23ea3345",
volume = "3651 LNAI",
year = "2005",
}
@Article{schmidhuber2015deep,
title = {Deep learning in neural networks: An overview},
author = {{Schmidhuber}, J{\"u}rgen},
journal = {Neural networks},
volume = {61},
pages = {85--117},
year = {2015},
publisher = {Elsevier}
}
@Article{Ford2006,
abstract = "Packrat parsing is a novel technique for implementing
parsers in a lazy functional programming language. A
packrat parser provides the power and flexibility of
top-down parsing with backtracking and unlimited
lookahead, but nevertheless guarantees linear parse
time. Any language defined by an LL(k) or LR(k) grammar
can be recognized by a packrat parser, in addition to
many languages that conventional linear-time algorithms
do not support. This additional power simplifies the
handling of common syntactic idioms such as the
widespread but troublesome longest-match rule, enables
the use of sophisticated disambiguation strategies such
as syntactic and semantic predicates, provides better
grammar composition properties, and allows lexical
analysis to be integrated seamlessly into parsing. Yet
despite its power, packrat parsing shares the same
simplicity and elegance as recursive descent parsing;
in fact converting a backtracking recursive descent
parser into a linear-time packrat parser often involves
only a fairly straightforward structural change. This
paper describes packrat parsing informally with
emphasis on its use in practical applications, and
explores its advantages and disadvantages with respect
to the more conventional alternatives.",
archiveprefix = "arXiv",
arxivid = "cs/0603077",
author = "Bryan Ford",
doi = "10.1145/581478.581483",
eprint = "0603077",
ISBN = "1-58113-487-8",
ISSN = "03621340",
journal = "ICFP",
keywords = "all or part
of,analysis,backtracking,haskell,lexical,memoization,or,or
hard copies of,parser combinators,permission to make
digital,scannerless parsing,this work for
personal,top-down parsing",
pages = "12",
primaryclass = "cs",
title = "{Packrat Parsing: Simple, Powerful, Lazy, Linear
Time}",
URL = "http://arxiv.org/abs/cs/0603077",
year = "2006",
}
@Article{Grimm2004,
abstract = "A considerable number of research projects are
exploring how to extend object-oriented programming
languages such as Java with, for example, support for
generics, multiple dispatch, or pattern matching. To
keep up with these changes, language implementors need
appropriate tools. In this context, easily extensible
parser generators are especially important because
parsing program sources is a necessary first step for
any language processor, be it a compiler,
syntax-highlighting editor, or API documentation
generator. Unfortunately, context-free grammars and the
corresponding LR or LL parsers, while well understood
and widely used, are also unnecessarily hard to extend.
To address this lack of appropriate tools, we introduce
Rats!, a parser generator for Java that supports easily
modifiable grammars and avoids the complexities
associated with altering LR or LL grammars. Our work
builds on recent research on packrat parsers, which are
recursive descent parsers that perform backtracking but
also memoize all intermediate results (hence their
name), thus ensuring linear-time performance. Our work
makes this parsing technique, which has been developed
in the context of functional programming languages,
practical for object-oriented languages. Furthermore,
our parser generator supports simpler grammar
specifications and more convenient error reporting,
while also producing better performing parsers through
aggressive optimizations. In this paper, we motivate
the need for more easily extensible parsers, describe
our parser generator and its optimizations in detail,
and present the results of our experimental
evaluation.",
author = "Robert Grimm",
journal = "New York University Technical Report, Dept. of
Computer Science, TR2004-854",
keywords = "extensible programming languages,packrat
parsers,parser generators,parsing expression grammars",
pages = "12",
title = "{Practical packrat parsing}",
URL = "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.82.675{\&}amp;rep=rep1{\&}amp;type=ps",
year = "2004",
}
@Book{Grune2008,
abstract = "Parsing, also referred to as syntax analysis, has been
and continues to be an essential part of computer
science and linguistics. Today, parsing techniques are
also implemented in a number of other disciplines,
including but not limited to, document preparation and
conversion, typesetting chemical formulae, and
chromosome recognition. This second edition presents
new developments and discoveries that have been made in
the field. Parsing techniques have grown considerably
in importance, both in computational linguistics where
such parsers are the only option, and computer science,
where advanced compilers often use general CF parsers.
Parsing techniques provide a solid basis for compiler
construction and contribute to all existing software:
enabling Web browsers to analyze HTML pages and
PostScript printers to analyze PostScript. Some of the
more advanced techniques are used in code generation in
compilers and in data compression. In linguistics, the
importance of formal grammars was recognized early on,
but only recently have the corresponding parsing
techniques been applied. Also their importance as
general pattern recognizers is slowly being
acknowledged. This text Parsing Techniques explores new
developments, such as generalized deterministic
parsing, linear-time substring parsing, parallel
parsing, parsing as intersection, non-canonical
methods, and non-Chomsky systems. To provide readers
with low-threshold access to the full field of parsing
techniques, this new edition uses a two-tiered
structure. The basic ideas behind the dozen or so
existing parsing techniques are explained in an
intuitive and narrative style, and problems are
presented at the conclusion of each chapter, allowing
the reader to step outside the bounds of the covered
material and explore parsing techniques at various
levels. The reader is also provided with an extensive
annotated bibliography as well as hints and partial
solutions to a number of problems. In the bibliography,
hundreds of realizations and improvements of parsing
techniques are explained in a much terser, yet still
informal, style, improving its readability and
usability. The reader should have an understanding of
algorithmic thinking, especially recursion; however,
knowledge of any particular programming language is not
required.",
author = "Dick Grune and Ceriel J H Jacobs",
booktitle = "Parsing Techniques: A Practical Guide",
doi = "10.1007/978-0-387-68954-8",
ISBN = "978-0-387-20248-8",
pages = "1--662",
title = "{Parsing techniques: A practical guide}",
year = "2008",
}
@Article{Penev2010,
abstract = "The concept of semantic tagging and its potential for
semantic enhancements to taxonomic papers is outlined
and illustrated by four exemplar papers published in
the present issue of ZooKeys. The four papers were
created in different ways: (i) written in Microsoft
Word and submitted as non-tagged manuscript (doi:
10.3897/zookeys.50.504); (ii) generated from
Scratchpads and submitted as XML-tagged manuscripts
(doi: 10.3897/zookeys.50.505 and doi:
10.3897/zookeys.50.506); (iii) generated from an
author's database (doi: 10.3897/zookeys.50.485) and
submitted as XML-tagged manuscript. XML tagging and
semantic enhancements were implemented during the
editorial process of ZooKeys using the Pensoft Mark Up
Tool (PMT), specially designed for this purpose. The
XML schema used was TaxPub, an extension to the
Document Type Definitions (DTD) of the US National
Library of Medicine Journal Archiving and Interchange
Tag Suite (NLM). The following innovative methods of
tagging, layout, publishing and disseminating the
content were tested and implemented within the ZooKeys
editorial workflow: (1) highly automated, fine-grained
XML tagging based on TaxPub; (2) final XML output of
the paper validated against the NLM DTD for archiving
in PubMedCentral; (3) bibliographic metadata embedded
in the PDF through XMP (Extensible Metadata Platform);
(4) PDF uploaded after publication to the Biodiversity
Heritage Library (BHL); (5) taxon treatments supplied
through XML to Plazi; (6) semantically enhanced HTML
version of the paper encompassing numerous internal and
external links and linkouts, such as: (i) vizualisation
of main tag elements within the text (e.g., taxon
names, taxon treatments, localities, etc.); (ii)
internal cross-linking between paper sections,
citations, references, tables, and figures; (iii)
mapping of localities listed in the whole paper or
within separate taxon treatments; (v) taxon names
autotagged, dynamically mapped and linked through the
Pensoft Taxon Profile (PTP) to large international
database services and indexers such as Global
Biodiversity Information Facility (GBIF), National
Center for Biotechnology Information (NCBI), Barcode of
Life (BOLD), Encyclopedia of Life (EOL), ZooBank,
Wikipedia, Wikispecies, Wikimedia, and others; (vi)
GenBank accession numbers autotagged and linked to
NCBI; (vii) external links of taxon names to references
in PubMed, Google Scholar, Biodiversity Heritage
Library and other sources. With the launching of the
working example, ZooKeys becomes the first taxonomic
journal to provide a complete XML-based editorial,
publication and dissemination workflow implemented as a
routine and cost-efficient practice. It is anticipated
that XML-based workflow will also soon be implemented
in botany through PhytoKeys, a forthcoming partner
journal of ZooKeys. The semantic markup and
enhancements are expected to greatly extend and
accelerate the way taxonomic information is published,
disseminated and used.",
author = "Lyubomir Penev and Donat Agosti and Teodor Georgiev
and Terry Catapano and Jeremy Miller and Vladimir
Blagoderov and David Roberts and Vincent S Smith and
Irina Brake and Simon Ryrcroft and Ben Scott and Norman
F Johnson and Robert a Morris and Guido Sautter and
Vishwas Chavan and Tim Robertson and David Remsen and
Pavel Stoev and Cynthia Parr and Sandra Knapp and W
John Kress and Chris F Thompson and Terry Erwin",
doi = "10.3897/zookeys.50.538",
file = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
Desktop/Downloaded/Penev et al. - 2010 - Semantic
tagging of and semantic enhancements to systematics
papers ZooKeys working examples.pdf:pdf",
ISBN = "1525-2531",
ISSN = "1313-2970",
journal = "ZooKeys",
number = "50",
pages = "1--16",
pmid = "21594113",
title = "{Semantic tagging of and semantic enhancements to
systematics papers: ZooKeys working examples.}",
URL = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3088020{\&}tool=pmcentrez{\&}rendertype=abstract",
volume = "16",
year = "2010",
}
@Article{Mora2011,
abstract = "The diversity of life is one of the most striking
aspects of our planet; hence knowing how many species
inhabit Earth is among the most fundamental questions
in science. Yet the answer to this question remains
enigmatic, as efforts to sample the world's
biodiversity to date have been limited and thus have
precluded direct quantification of global species
richness, and because indirect estimates rely on
assumptions that have proven highly controversial. Here
we show that the higher taxonomic classification of
species (i.e., the assignment of species to phylum,
class, order, family, and genus) follows a consistent
and predictable pattern from which the total number of
species in a taxonomic group can be estimated. This
approach was validated against well-known taxa, and
when applied to all domains of life, it predicts ∼8.7
million (±1.3 million SE) eukaryotic species globally,
of which ∼2.2 million (±0.18 million SE) are marine.
In spite of 250 years of taxonomic classification and
over 1.2 million species already catalogued in a
central database, our results suggest that some 86{\%}
of existing species on Earth and 91{\%} of species in
the ocean still await description. Renewed interest in
further exploration and taxonomy is required if this
significant gap in our knowledge of life on Earth is to
be closed.",
author = "Camilo Mora and Derek P. Tittensor and Sina Adl and
Alastair G B Simpson and Boris Worm",
doi = "10.1371/journal.pbio.1001127",
ISBN = "1545-7885 (Electronic)$\backslash$n1544-9173
(Linking)",
ISSN = "15449173",
journal = "PLoS Biology",
number = "8",
pages = "1--8",
pmid = "21886479",
title = "{How many species are there on earth and in the
ocean?}",
volume = "9",
year = "2011",
}
@Book{ICN,
abstract = "This International Code of Nomenclature for algae,
fungi, and plants (ICN) replaces the formerly called
International Code of Botanical Nomenclature (ICBN).
The new code (Melbourne Code) took effect on July 23rd,
2011, when it was ratified by the International
Botanical Congress. - The title of the code was changed
to show that it does not apply to plants only, but also
to algae and fungi. Send Link to a Friend",
author = "{Mcneill, J. and F. R. Barrie and W. R. Buck and V.
Demoulin and W. Greuter and D. L. Hawksworth and P. S.
Herendeen and S. Knapp and K. Marhold and J. Prado and
W. F. {Prud Homme Van Reine} and G. F. Smith and J. H.
Wiersema and N. J.} Turland",
booktitle = "Koeltz Scientific Books",
file = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
Desktop/Downloaded/Mcneill et al. - 2012 -
International Code of Nomenclature for algae, fungi,
and plants (Melbourne Code). Regnum Vegetabile
154.pdf:pdf",
pages = "1--140",
publisher = "Koeltz Scientific Books",
title = "{International Code of Nomenclature for algae, fungi,
and plants (Melbourne Code). Regnum Vegetabile 154.}",
year = "2012",
}
@Article{Linnaeus1767,
abstract = "O Systema Naturae (de nome completo: Systema naturae
per regna tria naturae, secundum classes, ordines,
genera, species, cum characteribus differentiis,
synonymis, locis) foi um livro escrito por Lineu, no
qual o autor faz a delinea{\c{c}}{\~{a}}o das suas
ideias para uma classifica{\c{c}}{\~{a}}o
hier{\'{a}}rquica das esp{\'{e}}cies. Foi um livro
publicado em latim. A primeira edi{\c{c}}{\~{a}}o foi
de 1735. A primeira edi{\c{c}}{\~{a}}o continha apenas
10 p{\'{a}}ginas. Na sua 13ª edi{\c{c}}{\~{a}}o, em
1770, tinha j{\'{a}} 3000 p{\'{a}}ginas. A 10ª
edi{\c{c}}{\~{a}}o do Systema Naturae de Linnaeus, 1758
{\'{e}} o trabalho que iniciou a aplica{\c{c}}{\~{a}}o
geral da nomenclatura binomial zool{\'{o}}gica.
Portanto, esta data {\'{e}} aceita como ponto de
partida da nomeclatura zool{\'{o}}gica e da lei da
prioridade.",
author = "C Linnaeus",
doi = "10.5962/bhl.title.542",
ISBN = "0-00-567100-0",
ISSN = "1524-4539",
journal = "Systema Natural per regna tria Natural, secundim
classes, ordines, genera, species, cum characteribus,
Differentiis, Synonymis, Locis",
keywords = "species",
number = "August",
pages = "586",
title = "{Systema Naturae per regna tria Natural, secundim
classes, ordines, genera, species, cum characteribus,
Differentiis, Synonymis, Locis}",
volume = "12",
year = "1767",
}
@Book{Linnaeus1758,
abstract = "Vitulina.4 . P. dentibus laniariis tectis. Syst. nat.
6. N. I. Phoca f. Vitulus marinus. Gesn. aqu. 702.
Aldr. Pifc. 722. Fonft. Pisc. 44. Dodart. act. 191. t.
191. Raj. Quadr. 189. Habitat in mari Europaeo. Sial
Dormiunt in lapide ex aqua eminente, pariunt in glacie
, per quam adscendunt halitu , non descendunt. Mammas
retrahunt duas abdominis ; inter Halecum catervos
saginantur, a Laris provocatae exspuunt pisces.
Anatome. E. N. C. d. I. a. 9. obs. 98 {\&} d. 3. a. 7.
app.15.",
author = "C Linnaeus",
booktitle = "L. Salmii, Holmiae",
pages = "824",
title = "{Systema naturae per regna tria naturae. 2 vols}",
URL = "http://scholar.google.com/scholar?hl=en{\&}btnG=Search{\&}q=intitle:systema+naturae+per+regna+tria+naturae{\#}6",
year = "1758",
}
@Book{Linne1753,
abstract = "Updated version of the seminal taxonomic treatment of
plants. {"}36. Polygonum perfoliatum. P. foliis
triangularibus, caule aculeato, stipulis
perfoliato-foliosis patentibus subrotundis. Burm. ind.
90. t. 31 f. 2.{"} Keyywords: Taxonomy",
author = "C Von Linne",
booktitle = "Search",
pages = "583",
title = "plantarum: exhibentes plantas rite cognitas ad genera
relatas cum differentiis specificis, nominibus
trivialibus, synonymis selectis, locis natalibus
secundum",
year = "1753",
}
@Article{Ratnasingham2013,
abstract = "Because many animal species are undescribed, and
because the identification of known species is often
difficult, interim taxonomic nomenclature has often
been used in biodiversity analysis. By assigning
individuals to presumptive species, called operational
taxonomic units (OTUs), these systems speed
investigations into the patterning of biodiversity and
enable studies that would otherwise be impossible.
Although OTUs have conventionally been separated
through their morphological divergence, DNA-based
delineations are not only feasible, but have important
advantages. OTU designation can be automated, data can
be readily archived, and results can be easily compared
among investigations. This study exploits these
attributes to develop a persistent, species-level
taxonomic registry for the animal kingdom based on the
analysis of patterns of nucleotide variation in the
barcode region of the cytochrome c oxidase I (COI)
gene. It begins by examining the correspondence between
groups of specimens identified to a species through
prior taxonomic work and those inferred from the
analysis of COI sequence variation using one new (RESL)
and four established (ABGD, CROP, GMYC, jMOTU)
algorithms. It subsequently describes the
implementation, and structural attributes of the
Barcode Index Number (BIN) system. Aside from a
pragmatic role in biodiversity assessments, BINs will
aid revisionary taxonomy by flagging possible cases of
synonymy, and by collating geographical information,
descriptive metadata, and images for specimens that are
likely to belong to the same species, even if it is
undescribed. More than 274,000 BIN web pages are now
available, creating a biodiversity resource that is
positioned for rapid growth.",
author = "Sujeevan Ratnasingham and Paul D N Hebert",
doi = "10.1371/journal.pone.0066213",
file = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
Desktop/Downloaded/Ratnasingham, Hebert - 2013 - A
DNA-based registry for all animal species the barcode
index number (BIN) system.pdf:pdf",
ISBN = "1932-6203",
ISSN = "1932-6203",
journal = "PloS one",
keywords = "Algorithms,Animals,Cluster Analysis,DNA
Barcoding,Databases as Topic,Lepidoptera,Lepidoptera:
classification,North America,Registries,Species
Specificity,Taxonomic,Taxonomic: methods,Time Factors",
number = "7",
pages = "e66213",
pmid = "23861743",
publisher = "Public Library of Science",
title = "{A DNA-based registry for all animal species: the
barcode index number (BIN) system.}",
URL = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3704603{\&}tool=pmcentrez{\&}rendertype=abstract$\backslash$nhttp://journals.plos.org/plosone/article?id=10.1371/journal.pone.0066213",
volume = "8",
year = "2013",
}
@Article{Franz2009,
abstract = "Taxonomic concepts (sensu Berendsohn) embody the
underlying mean- ings of scientific names as stated in
a particular publication, thus offering a new way to
resolve semantic ambiguities that result from multiple
revisions of a taxonomic name. This paper presents a
comprehensive and powerful language for representing
the relationships among taxonomic concepts. The
language features terms and sym- bols for concept
relationships within a single taxonomic hierarchy, or
between two related but independently published
hierarchies. Taxonomic concepts pertaining to a single
hierarchy are characterised by parent/child
relationships, whereas those per- taining to two
independent hierarchies may have the following basic
relationships: congruence, inclusion (non-symmetrical,
relative to the side of comparison), overlap, and
exclusion. The relationships are asserted by
specialists who have the option to add or subtract
concepts on one or both sides of a relationship
equation in order to reconcile differences between
non-congruent taxonomic perspectives. The terms
‘and’, ‘or’ and ‘not’ are available,
respectively, to connect multiple simultaneously or
alternatively valid relationship assessments, or to
explicitly negate the validity of a relationship. The
language also permits the decomposition of a
relationship according to the intensional (property
referencing) and ostensive (member pointing) aspects of
the compared taxonomic concepts. Adopting the concept
relationship language will facilitate a more precise
documentation of similarities and differences in
multiple succeeding taxonomic perspectives, thereby
preparing the stage for an ontology-based integration
of taxonomic and related biological information.",
author = "N. M. Franz and R. K. Peet",
doi = "10.1017/S147720000800282X",
ISBN = "1477-2000",
ISSN = "1477-2000",
journal = "Systematics and Biodiversity",
keywords = "biodiversity databases,data integration,linnaean
nomenclature,logy,onto-,semantic web,taxonomic
concepts",
number = "1",
pages = "5--20",
title = "{Perspectives: Towards a language for mapping
relationships among taxonomic concepts}",
volume = "7",
year = "2009",
}
@Article{Samyn2012,
abstract = "In an interesting contribution Joppa et al. (2011)
revisit some aspects of the taxonomic impediment
(Evenhuis 2007; http://www.cbd.int/gti/) and come to
the conclusion that, contrary to the generally accepted
idea, both the rates of species description and the
number of taxonomists have increased exponentially
since the 1950’s. Joppa et al. (2011) also note a
marked decline in the number of species described per
taxonomist which they attribute to the difficulty of
finding new species in an ever declining ‘missing
species pool’. Therefore, their results might be
interpreted that today’s taxonomic workforce is
sufficient to describe the remaining (shallow) ‘pool
of missing species’. In this contribution, we
question if this is indeed the case and propose a
solution for speeding up taxonomic descriptions. We
feel that Joppa et al. (2011) are overly enthusiastic,
probably because their test cases represent a selection
of the better-studied taxa (flowering plants, Conus
snails, spiders, amphibians, birds and mammals).
Contradicting the findings of Joppa et al. (2011) and
using a much broader taxon sampling, which includes
many poorly studied groups, Costello et al. (2011)
observe that species description has roughly remained
constant since the second World War, at least for
terrestrial species. These results are congruent with
other counts of species descriptions (Bacher 2012;
Chapman 2009; May 2011; Zhang 2010;
http://species.asu.edu/SOS). So it seems that the
increasing rate of species description as reported by
Joppa et al. (2011) is taxon specific rather than
general. In stark contrast to the constant rate of
species description is the rate with which species are
discovered by using DNA sequence data. Exhaustive
bio-inventory initiatives to map diversity of poorly
explored areas around the world (e.g.
http://laplaneterevisitee.org/;
http://mooreabiocode.org) in combination with
large-scale barcoding efforts (Hajibabei et al. 2007;
Valentini et al., 2009) result in an explosion of
species/sequences in repositories such as Genbank and
Bold Systems that are not linked to known species. This
growing number of what have been called ‘dark taxa’
by Page (http://iphylo.blogspot.com/2011/04/
dark-taxa-genbank-in-post-taxonomic.html), makes one
question the use of proper taxonomic names at all.
Indeed, it can be argued that a lot of biology does not
per se require formally described taxon names. However,
the value of scientific names exceeds the field of
biology sensu stricto and matters",
author = "Yves Samyn and Olivier {De Clerck}",
doi = "10.5852/ejt.2012.10",
file = ":home/dimus/.local/share/data/Mendeley Ltd./Mendeley
Desktop/Downloaded/Samyn, De Clerck - 2012 - No Name,
No Game.pdf:pdf",
ISSN = "2118-9773",
journal = "European Journal of Taxonomy",
number = "10",
pages = "1--3",
title = "{No Name, No Game}",
volume = "0",
year = "2012",
}
@Article{Schindel2005,
abstract = "Sir• The Consortium for the Barcode of Life (CBOL;
see www. barcoding . si. edu) is an international
initiative of natural history museums, herbaria, other
biodiversity research organizations, governmental
organizations and private companies which wish to
promote ...",
author = "David E Schindel and Scott E Miller",
doi = "10.1038/435017b",
ISBN = "0028-0836",
ISSN = "0028-0836",
journal = "Nature",
number = "7038",
pages = "17",
pmid = "15874991",
title = "{DNA barcoding a useful tool for taxonomists.}",
volume = "435",
year = "2005",
}
@Misc{DeQueiroz1997,
abstract = "During the post-Darwinian history of taxonomy, the
Linnaean hierarchy has maintained its role as a means
for representing hierarchical taxonomic relationships.
During the same period, the principle of descent has
taken on an increasingly important role as the basis
for reformulated versions of fundamental taxonomic
concepts and principles. Early in this history, the
principle of descent provided an explanation for the
existence of taxa and implied a nested, hierarchical
structure for taxonomic relationships. Although an
evolutionary explanation for taxa contradicted the
Aristotelian context within which the Linnaean
hierarchy was developed, the nested, hierarchical
structure of taxonomic relationships implied by
evolution was compatible with the practical use of the
Linnaean hierarchy for conveying hierarchical
relationships and seems to have reinforced this
practice. Later changes associated with the development
of taxon concepts based on the principle of descent led
to changes in the interpretation of the Linnaean
categories as well as certain modifications related to
use of the Linnaean hierarchy in representing
phylogenetic relationships. Although some authors
questioned use of the Linnaean hierarchy in
phylogenetic taxonomies, most continued to use it in
one form or another. More recently, taxonomists have
considered the relevance of the principle of descent to
nomenclature. They have found fundamental
inconsistencies between concepts of taxa based on that
principle and methods currently used to define taxon
names, which are based on the Linnaean hierarchy.
Although these inconsistencies can be corrected without
totally eliminating the Linnaean hierarchy, the
necessary changes would greatly reduce the importance
of that hierarchy, particularly in the area of
nomenclature. Moreover, the earlier development of
taxon concepts based on the principle of descent
effectively proposed taxonomic categories of greater
theoretical significance than those of the Linnaean
hierarchy. The historical trend of granting increasing
importance to the principle of descent has reduced the
significance of the Linnaean hierarchy to the point
where it may no longer be worth retaining.",
author = "Kevin de Queiroz",
booktitle = "Aliso",
number = "2",
pages = "125--144",
title = "{The Linnaean Hierarchy and the Evolutionization of
Taxonomy, with Emphasis on the Problem of
Nomenclature}",
volume = "15",
year = "1997",
}
@Article{Schuh2003,
abstract = "The Linnaean system of nomenclature has been used and
adapted by biologists over a period of almost 250
years. Under the current system of codes, it is now
applied to more than 2 million species of organisms.
Inherent in the Linnaean system is the indication of
hierarchical relationships. The Linnaean system has
been justified primarily on the basis of stability.
Stability can be assessed on at least two grounds: the
absolute stability of names, irrespective of taxonomic
concept; and the stability of names under changing
concepts. Recent arguments have invoked conformity to
phylogenetic methods as the primary basis for choice of
nomenclatural systems, but even here stability of names
as they relate to monophyletic groups is stated as the
ultimate objective. The idea of absolute stability as
the primary justification for nomenclatural methods was
wrong from the start. The reasons are several. First,
taxa are concepts, no matter the frequency of
assertions to the contrary; as such, they are subject
to change at all levels and always will be, with the
consequence that to some degree the names we use to
refer to them will also be subject to change. Second,
even if the true nature of all taxa could be agreed
upon, the goal would require that we discover them all
and correctly recognize them for what they are. Much of
biology is far from that goal at the species level and
even further for supraspecific taxa. Nomenclature
serves as a tool for biology. Absolute stability of
taxonomic concepts-and nomenclature-would hinder
scientific progress rather than promote it. It can been
demonstrated that the scientific goals of systematists
are far from achieved. Thus, the goal of absolute
nomenclatural stability is illusory and misguided. The
primary strength of the Linnaean system is its ability
to portray hierarchical relationships; stability is
secondary. No single system of nomenclature can ever
possess all desirable attributes: i.e., convey
information on hierarchical relationships, provide
absolute stability in the names portraying those
relationships, and provide simplicity and continuity in
communicating the identities of the taxa and their
relationships. Aside from myriad practical problems
involved in its implementation, it must be concluded
that {"}phylogenetic nomenclature{"} would not provide
a more stable and effective system for communicating
information on biological classifications than does the
Linnaean system.",
author = "Randall T Schuh",
doi = "10.1663/0006-8101(2003)069[0059:TLSAIY]2.0.CO;2",
ISBN = "00068101",
ISSN = "0006-8101",
journal = "The Botanical Review",
number = "1",
pages = "59--78",
publisher = "Springer",
title = "{The Linnaean System and Its 250-Year Persistence}",
URL = "http://www.bioone.org/doi/abs/10.1663/0006-8101{\%}282003{\%}29069{\%}5B0059{\%}3ATLSAIY{\%}5D2.0.CO{\%}3B2",
volume = "69",
year = "2003",
}
@InCollection{Evenhuis2010,
abstract = "The advent of relational databasing and data storage
capacity, coupled with revolutionary advances in
molecular sequencing technology and specimen imaging,
have led to a taxonomic renaissance. Systema Naturae
250 - The Linnaean Ark maps the origins of this
renaissance, beginning with Linnaeus, through his
{"}apostles{"}, via the great unsung hero Charles
Davies Sherbon — arguably the father of biodiversity
informatics — up to the present day with the
Planetary Biodiversity Inventories and into the future
with the Encyclopedia of Life and web-based taxonomy.
The book provides scientific, historical, and cultural
documentation of the evolution of taxonomy and the
successful adaptation of the Linnaean nomenclature
system to that evolution. It underscores the importance
of taxonomic accuracy, not only for the classification
of living organisms, but for a more complete
understanding of the living world and its biodiversity.
The book also examines the role of technologies such as
DNA sequencing, specimen imaging, and electronic data
storage.A celebration of 250 years of the scientific
naming of animals, Systema Naturae 250 - The Linnaean
Ark records and explores the history of zoological
nomenclature and taxonomy, detailing current and future
activity in these fields. Descriptive taxonomy has been
in decline, despite the fact that the classification of
organisms through taxonomic studies provides the
foundation of our understanding of life forms. Packed
with illustrations and tables, this book establishes a
vision for the future of descriptive taxonomy and marks
the beginning of a period of rapid growth of taxonomic
knowledge.",
author = "Neal L. Evenhuis and Thomas Pape and Adrian C. Pont
and F. Christian Thompson and Andrew Polaszek and
Francis Group",
booktitle = "Systema Naturae 250 The Linnaean Ark",
chapter = "7",
editor = "Andrew Polaszek",
ISBN = "978-1-4200-9501-2",
pages = "75--83",
title = "{Systema Naturae 250 The Linnaean Ark}",
year = "2010",
}
@InProceedings{Ford2004,
abstract = "For decades we have been using Chomsky's generative
system of grammars, particularly context-free grammars
(CFGs) and regular expressions (REs), to express the
syntax of programming languages and protocols. The
power of generative grammars to express ambiguity is
crucial to their original purpose of modelling natural
languages, but this very power makes it unnecessarily
difficult both to express and to parse machine-oriented
languages using CFGs. Parsing Expression Grammars
(PEGs) provide an alternative, recognition-based formal
foundation for describing machine-oriented syntax,
which solves the ambiguity problem by not introducing
ambiguity in the first place. Where CFGs express
nondeterministic choice between alternatives, PEGs
instead use prioritized choice. PEGs address frequently
felt expressiveness limitations of CFGs and REs,
simplifying syntax definitions and making it
unnecessary to separate their lexical and hierarchical
components. A linear-time parser can be built for any
PEG, avoiding both the complexity and fickleness of LR
parsers and the inefficiency of generalized CFG
parsing. While PEGs provide a rich set of operators for
constructing grammars, they are reducible to two
minimal recognition schemas developed around 1970,
TS/TDPL and gTS/GTDPL, which are here proven equivalent
in effective recognition power.",
author = "Bryan Ford",
booktitle = "Proceedings of the 31st ACM SIGPLAN-SIGACT symposium
on Principles of programming languages",
keywords = "bnf,context-free grammars,grammars,gtdpl,lexical
analysis,packrat parsing,parsing,parsing
expression,regular expressions,scannerless,syntactic
predicates,tdpl,unified grammars",
pages = "111--122",
title = "{Parsing Expression Grammars: A Recognition-Based
Syntactic Foundation}",
year = "2004",
}
@Article{Akella2012,
abstract = "BACKGROUND: A scientific name for an organism can be
associated with almost all biological data. Name
identification is an important step in many text mining
tasks aiming to extract useful information from
biological, biomedical and biodiversity text sources. A
scientific name acts as an important metadata element
to link biological
information.$\backslash$n$\backslash$nRESULTS: We
present NetiNeti (Name Extraction from Textual
Information-Name Extraction for Taxonomic Indexing), a
machine learning based approach for recognition of
scientific names including the discovery of new species
names from text that will also handle misspellings, OCR
errors and other variations in names. The system
generates candidate names using rules for scientific
names and applies probabilistic machine learning
methods to classify names based on structural features
of candidate names and features derived from their
contexts. NetiNeti can also disambiguate scientific
names from other names using the contextual
information. We evaluated NetiNeti on legacy
biodiversity texts and biomedical literature (MEDLINE).
NetiNeti performs better (precision = 98.9{\%} and
recall = 70.5{\%}) compared to a popular dictionary
based approach (precision = 97.5{\%} and
recall = 54.3{\%}) on a 600-page biodiversity book
that was manually marked by an annotator. On a small
set of PubMed Central's full text articles annotated
with scientific names, the precision and recall values
are 98.5{\%} and 96.2{\%} respectively. NetiNeti found
more than 190,000 unique binomial and trinomial names