-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Autogenerated literator docs for v0.1.0
- Loading branch information
1 parent
7d8216c
commit b343c0d
Showing
5 changed files
with
435 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
|
||
```scala | ||
package com.bio4j.data.enzyme | ||
|
||
trait AnyEntry extends Any { | ||
|
||
def ID: String | ||
def subSubClassID: String | ||
|
||
def description: String | ||
def alternativeNames: Seq[String] | ||
def cofactors: Seq[String] | ||
def catalyticActivity: String | ||
def comments: Seq[String] | ||
} | ||
|
||
sealed trait EnzymeClasses extends Any { | ||
|
||
def ID: String | ||
def description: String | ||
} | ||
case class EnzymeClass(val ID: String, val description: String) extends EnzymeClasses | ||
case class EnzymeSubClass(val ID: String, val description: String) extends EnzymeClasses | ||
case class EnzymeSubSubClass(val ID: String, val description: String) extends EnzymeClasses | ||
|
||
``` | ||
|
||
|
||
|
||
|
||
[test/scala/EnzymeEntries.scala]: ../../test/scala/EnzymeEntries.scala.md | ||
[test/scala/EnzymeClasses.scala]: ../../test/scala/EnzymeClasses.scala.md | ||
[main/scala/entry.scala]: entry.scala.md | ||
[main/scala/flat/entry.scala]: flat/entry.scala.md | ||
[main/scala/flat/classes.scala]: flat/classes.scala.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
|
||
```scala | ||
package com.bio4j.data.enzyme.flat | ||
|
||
import com.bio4j.data.enzyme._ | ||
|
||
case class ClassLine(val line: String) { | ||
|
||
final def asEnzymeClass: EnzymeClasses = { | ||
|
||
ID match { | ||
// the order is important here | ||
case id if (id == classID) => EnzymeClass(id, description) | ||
case id if (id == subClassID) => EnzymeSubClass(id, description) | ||
case id if (id == subSubClassID) => EnzymeSubSubClass(id, description) | ||
} | ||
} | ||
``` | ||
|
||
|
||
In the `enzclass.txt` source file the id always takes 9 characters, but it has funny empty spaces around. | ||
|
||
|
||
```scala | ||
private lazy val ID: String = | ||
line | ||
.take(9) | ||
.filter(_ != ' ') | ||
|
||
private lazy val IDFragments: (String,String,String,String) = { | ||
|
||
val fragments = ID.split('.').take(4) | ||
|
||
(fragments(0), fragments(1), fragments(2), fragments(3)) | ||
} | ||
|
||
private def classID: String = | ||
s"${IDFragments._1}.-.-.-" | ||
|
||
private def subClassID: String = | ||
s"${IDFragments._1}.${IDFragments._2}.-.-" | ||
|
||
private def subSubClassID: String = | ||
s"${IDFragments._1}.${IDFragments._2}.${IDFragments._3}.-" | ||
``` | ||
|
||
|
||
We don't want to store the description with a dot at the end! | ||
|
||
|
||
```scala | ||
private lazy val description: String = | ||
line | ||
.drop(9) | ||
.trim | ||
.stripSuffix(".") | ||
} | ||
|
||
case object enzymeClasses { | ||
``` | ||
|
||
|
||
The Enzyme source file `enzclass.txt` starts with: | ||
|
||
``` | ||
--------------------------------------------------------------------------- | ||
ENZYME nomenclature database | ||
SIB Swiss Institute of Bioinformatics; Geneva, Switzerland | ||
---------------------------------------------------------------------------- | ||
|
||
Description: Definition of enzyme classes, subclasses and sub-subclasses | ||
Name: enzclass.txt | ||
Release: 07-Sep-2016 | ||
|
||
---------------------------------------------------------------------------- | ||
|
||
1. -. -.- Oxidoreductases. | ||
1. 1. -.- Acting on the CH-OH group of donors. | ||
``` | ||
|
||
it also ends with: | ||
|
||
``` | ||
---------------------------------------------------------------------------- | ||
Copyrighted by the SIB Swiss Institute of Bioinformatics. | ||
There are no restrictions on its use by any institutions as long as | ||
its content is in no way modified. | ||
---------------------------------------------------------------------------- | ||
``` | ||
|
||
so we are only picking lines with a dot in the second char. | ||
|
||
Note that there empty lines now and then, which need to be filtered out too. | ||
|
||
|
||
```scala | ||
def fromLines(lines: Iterator[String]): Iterator[EnzymeClasses] = | ||
lines | ||
.filter(_.nonEmpty) | ||
.collect { case line if(line(1) == '.') => ClassLine(line).asEnzymeClass } | ||
} | ||
|
||
``` | ||
|
||
|
||
|
||
|
||
[test/scala/EnzymeEntries.scala]: ../../../test/scala/EnzymeEntries.scala.md | ||
[test/scala/EnzymeClasses.scala]: ../../../test/scala/EnzymeClasses.scala.md | ||
[main/scala/entry.scala]: ../entry.scala.md | ||
[main/scala/flat/entry.scala]: entry.scala.md | ||
[main/scala/flat/classes.scala]: classes.scala.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
|
||
```scala | ||
package com.bio4j.data.enzyme.flat | ||
|
||
import com.bio4j.data.enzyme._ | ||
|
||
case class Entry(val lines: Seq[String]) extends AnyEntry { | ||
|
||
def ID: String = | ||
id.value | ||
|
||
def subSubClassID: String = | ||
id.subSubClassID | ||
|
||
def description: String = | ||
de.description | ||
|
||
def alternativeNames: Seq[String] = | ||
an.alternativeNames | ||
|
||
def cofactors: Seq[String] = | ||
cf.cofactors | ||
|
||
def catalyticActivity: String = | ||
ca.catalyticActivity | ||
|
||
def comments: Seq[String] = | ||
cc.comments | ||
|
||
private lazy val id: ID = | ||
new ID(linesWith(prefix = "ID").head) | ||
|
||
private lazy val de: DE = | ||
DE(linesWith(prefix = "DE")) | ||
|
||
private lazy val an: AN = | ||
AN(linesWith(prefix = "AN")) | ||
|
||
private lazy val cf: CF = | ||
CF(linesWith(prefix = "CF")) | ||
|
||
private lazy val ca: CA = | ||
CA(linesWith(prefix = "CA")) | ||
|
||
private lazy val cc: CC = | ||
CC(linesWith(prefix = "CC")) | ||
|
||
private def linesWith(prefix: String): Seq[String] = | ||
lines collect { case line if(line startsWith prefix) => line.stripPrefix(prefix).trim } | ||
} | ||
|
||
private case class ID(val value: String) extends AnyVal { | ||
|
||
def subSubClassID: String = | ||
s"${value.reverse.dropWhile(_ != '.').reverse}-" | ||
} | ||
|
||
private case class DE(val lines: Seq[String]) extends AnyVal { | ||
|
||
def description: String = | ||
lines | ||
.map(_.trim.stripSuffix(".")) | ||
.mkString(" ") | ||
} | ||
|
||
private case class AN(val lines: Seq[String]) extends AnyVal { | ||
|
||
def alternativeNames: Seq[String] = | ||
lines | ||
.mkString(" ") | ||
.split('.') | ||
} | ||
|
||
private case class CF(val lines: Seq[String]) extends AnyVal { | ||
|
||
def cofactors: Seq[String] = | ||
lines | ||
.mkString("") | ||
.split(';') | ||
.map(_.trim.stripSuffix(".")) | ||
} | ||
|
||
private case class CA(val lines: Seq[String]) extends AnyVal { | ||
|
||
def catalyticActivity: String = | ||
lines.mkString(" ") | ||
} | ||
|
||
private case class CC(val lines: Seq[String]) extends AnyVal { | ||
|
||
def comments: Seq[String] = | ||
lines.mkString(" ") | ||
.split("-!-") | ||
.collect { case txt if(txt.nonEmpty) => txt.trim.stripSuffix(".") } | ||
} | ||
|
||
case object entries { | ||
``` | ||
|
||
|
||
ENZYME entries file have a "header" consisting on CC lines and an end of entry // line. | ||
|
||
|
||
```scala | ||
def fromLines(lines: Seq[String]): Seq[Entry] = | ||
entryLines(lines.dropWhile( l => l.startsWith("CC") || l.startsWith("//") )).map { Entry(_) } | ||
|
||
def validFromLines(lines: Seq[String]): Seq[Entry] = | ||
fromLines(lines) filter isValid | ||
``` | ||
|
||
|
||
See ftp://ftp.expasy.org/databases/enzyme/enzuser.txt | ||
|
||
|
||
```scala | ||
private def isValid(entry: Entry): Boolean = | ||
!( entry.description.startsWith("Deleted entry") || entry.description.startsWith("Transferred entry") ) | ||
|
||
@annotation.tailrec | ||
private def entryLinesRec( | ||
currentLine: Option[String], | ||
linesLeft: Seq[String], | ||
entryAcc: Seq[String], | ||
acc: Seq[Seq[String]] | ||
) | ||
: Seq[Seq[String]] = | ||
currentLine match { | ||
case None => acc | ||
case Some(line) => { | ||
|
||
if(isEndLine(line)) | ||
entryLinesRec( | ||
currentLine = linesLeft.headOption, | ||
linesLeft = if(linesLeft.isEmpty) Seq() else linesLeft.tail, | ||
entryAcc = Seq(), | ||
acc = acc :+ entryAcc | ||
) | ||
else | ||
entryLinesRec( | ||
currentLine = linesLeft.headOption, | ||
linesLeft = if(linesLeft.isEmpty) Seq() else linesLeft.tail, | ||
entryAcc = entryAcc :+ line, | ||
acc = acc | ||
) | ||
} | ||
} | ||
|
||
private def entryLines(lines: Seq[String]): Seq[Seq[String]] = | ||
entryLinesRec( | ||
currentLine = lines.headOption, | ||
linesLeft = lines.tail, | ||
entryAcc = Seq(), | ||
acc = Seq() | ||
) | ||
|
||
private def isEndLine(line: String) = | ||
line.startsWith("//") | ||
} | ||
|
||
``` | ||
|
||
|
||
|
||
|
||
[test/scala/EnzymeEntries.scala]: ../../../test/scala/EnzymeEntries.scala.md | ||
[test/scala/EnzymeClasses.scala]: ../../../test/scala/EnzymeClasses.scala.md | ||
[main/scala/entry.scala]: ../entry.scala.md | ||
[main/scala/flat/entry.scala]: entry.scala.md | ||
[main/scala/flat/classes.scala]: classes.scala.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
|
||
```scala | ||
package com.bio4j.data.enzyme.test | ||
|
||
import org.scalatest.FunSuite | ||
|
||
import com.bio4j.data.enzyme._ | ||
|
||
class ParseEnzymeClasses extends FunSuite { | ||
|
||
def lines = | ||
io.Source.fromFile("enzclass.txt").getLines | ||
|
||
def allEnzymeClasses = flat.enzymeClasses.fromLines(lines) | ||
|
||
test("parse all enzyme classes") { | ||
|
||
allEnzymeClasses.foreach { e => | ||
|
||
val clazz = e | ||
} | ||
} | ||
``` | ||
|
||
|
||
This is unlikely to change | ||
|
||
|
||
```scala | ||
test("check first classes") { | ||
|
||
val firstFive = (allEnzymeClasses take 5).toList | ||
|
||
assert { | ||
|
||
firstFive === List[EnzymeClasses]( | ||
EnzymeClass("1.-.-.-", "Oxidoreductases"), | ||
EnzymeSubClass("1.1.-.-", "Acting on the CH-OH group of donors"), | ||
EnzymeSubSubClass("1.1.1.-", "With NAD(+) or NADP(+) as acceptor"), | ||
EnzymeSubSubClass("1.1.2.-", "With a cytochrome as acceptor"), | ||
EnzymeSubSubClass("1.1.3.-", "With oxygen as acceptor") | ||
) | ||
} | ||
} | ||
} | ||
|
||
``` | ||
|
||
|
||
|
||
|
||
[test/scala/EnzymeEntries.scala]: EnzymeEntries.scala.md | ||
[test/scala/EnzymeClasses.scala]: EnzymeClasses.scala.md | ||
[main/scala/entry.scala]: ../../main/scala/entry.scala.md | ||
[main/scala/flat/entry.scala]: ../../main/scala/flat/entry.scala.md | ||
[main/scala/flat/classes.scala]: ../../main/scala/flat/classes.scala.md |
Oops, something went wrong.