Skip to content

Commit

Permalink
Autogenerated literator docs for v0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
eparejatobes committed Nov 5, 2016
1 parent 7d8216c commit b343c0d
Show file tree
Hide file tree
Showing 5 changed files with 435 additions and 0 deletions.
35 changes: 35 additions & 0 deletions docs/src/main/scala/entry.scala.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

```scala
package com.bio4j.data.enzyme

trait AnyEntry extends Any {

def ID: String
def subSubClassID: String

def description: String
def alternativeNames: Seq[String]
def cofactors: Seq[String]
def catalyticActivity: String
def comments: Seq[String]
}

sealed trait EnzymeClasses extends Any {

def ID: String
def description: String
}
case class EnzymeClass(val ID: String, val description: String) extends EnzymeClasses
case class EnzymeSubClass(val ID: String, val description: String) extends EnzymeClasses
case class EnzymeSubSubClass(val ID: String, val description: String) extends EnzymeClasses

```




[test/scala/EnzymeEntries.scala]: ../../test/scala/EnzymeEntries.scala.md
[test/scala/EnzymeClasses.scala]: ../../test/scala/EnzymeClasses.scala.md
[main/scala/entry.scala]: entry.scala.md
[main/scala/flat/entry.scala]: flat/entry.scala.md
[main/scala/flat/classes.scala]: flat/classes.scala.md
112 changes: 112 additions & 0 deletions docs/src/main/scala/flat/classes.scala.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@

```scala
package com.bio4j.data.enzyme.flat

import com.bio4j.data.enzyme._

case class ClassLine(val line: String) {

final def asEnzymeClass: EnzymeClasses = {

ID match {
// the order is important here
case id if (id == classID) => EnzymeClass(id, description)
case id if (id == subClassID) => EnzymeSubClass(id, description)
case id if (id == subSubClassID) => EnzymeSubSubClass(id, description)
}
}
```


In the `enzclass.txt` source file the id always takes 9 characters, but it has funny empty spaces around.


```scala
private lazy val ID: String =
line
.take(9)
.filter(_ != ' ')

private lazy val IDFragments: (String,String,String,String) = {

val fragments = ID.split('.').take(4)

(fragments(0), fragments(1), fragments(2), fragments(3))
}

private def classID: String =
s"${IDFragments._1}.-.-.-"

private def subClassID: String =
s"${IDFragments._1}.${IDFragments._2}.-.-"

private def subSubClassID: String =
s"${IDFragments._1}.${IDFragments._2}.${IDFragments._3}.-"
```


We don't want to store the description with a dot at the end!


```scala
private lazy val description: String =
line
.drop(9)
.trim
.stripSuffix(".")
}

case object enzymeClasses {
```


The Enzyme source file `enzclass.txt` starts with:

```
---------------------------------------------------------------------------
ENZYME nomenclature database
SIB Swiss Institute of Bioinformatics; Geneva, Switzerland
----------------------------------------------------------------------------

Description: Definition of enzyme classes, subclasses and sub-subclasses
Name: enzclass.txt
Release: 07-Sep-2016

----------------------------------------------------------------------------

1. -. -.- Oxidoreductases.
1. 1. -.- Acting on the CH-OH group of donors.
```

it also ends with:

```
----------------------------------------------------------------------------
Copyrighted by the SIB Swiss Institute of Bioinformatics.
There are no restrictions on its use by any institutions as long as
its content is in no way modified.
----------------------------------------------------------------------------
```

so we are only picking lines with a dot in the second char.

Note that there empty lines now and then, which need to be filtered out too.


```scala
def fromLines(lines: Iterator[String]): Iterator[EnzymeClasses] =
lines
.filter(_.nonEmpty)
.collect { case line if(line(1) == '.') => ClassLine(line).asEnzymeClass }
}

```




[test/scala/EnzymeEntries.scala]: ../../../test/scala/EnzymeEntries.scala.md
[test/scala/EnzymeClasses.scala]: ../../../test/scala/EnzymeClasses.scala.md
[main/scala/entry.scala]: ../entry.scala.md
[main/scala/flat/entry.scala]: entry.scala.md
[main/scala/flat/classes.scala]: classes.scala.md
170 changes: 170 additions & 0 deletions docs/src/main/scala/flat/entry.scala.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@

```scala
package com.bio4j.data.enzyme.flat

import com.bio4j.data.enzyme._

case class Entry(val lines: Seq[String]) extends AnyEntry {

def ID: String =
id.value

def subSubClassID: String =
id.subSubClassID

def description: String =
de.description

def alternativeNames: Seq[String] =
an.alternativeNames

def cofactors: Seq[String] =
cf.cofactors

def catalyticActivity: String =
ca.catalyticActivity

def comments: Seq[String] =
cc.comments

private lazy val id: ID =
new ID(linesWith(prefix = "ID").head)

private lazy val de: DE =
DE(linesWith(prefix = "DE"))

private lazy val an: AN =
AN(linesWith(prefix = "AN"))

private lazy val cf: CF =
CF(linesWith(prefix = "CF"))

private lazy val ca: CA =
CA(linesWith(prefix = "CA"))

private lazy val cc: CC =
CC(linesWith(prefix = "CC"))

private def linesWith(prefix: String): Seq[String] =
lines collect { case line if(line startsWith prefix) => line.stripPrefix(prefix).trim }
}

private case class ID(val value: String) extends AnyVal {

def subSubClassID: String =
s"${value.reverse.dropWhile(_ != '.').reverse}-"
}

private case class DE(val lines: Seq[String]) extends AnyVal {

def description: String =
lines
.map(_.trim.stripSuffix("."))
.mkString(" ")
}

private case class AN(val lines: Seq[String]) extends AnyVal {

def alternativeNames: Seq[String] =
lines
.mkString(" ")
.split('.')
}

private case class CF(val lines: Seq[String]) extends AnyVal {

def cofactors: Seq[String] =
lines
.mkString("")
.split(';')
.map(_.trim.stripSuffix("."))
}

private case class CA(val lines: Seq[String]) extends AnyVal {

def catalyticActivity: String =
lines.mkString(" ")
}

private case class CC(val lines: Seq[String]) extends AnyVal {

def comments: Seq[String] =
lines.mkString(" ")
.split("-!-")
.collect { case txt if(txt.nonEmpty) => txt.trim.stripSuffix(".") }
}

case object entries {
```


ENZYME entries file have a "header" consisting on CC lines and an end of entry // line.


```scala
def fromLines(lines: Seq[String]): Seq[Entry] =
entryLines(lines.dropWhile( l => l.startsWith("CC") || l.startsWith("//") )).map { Entry(_) }

def validFromLines(lines: Seq[String]): Seq[Entry] =
fromLines(lines) filter isValid
```


See ftp://ftp.expasy.org/databases/enzyme/enzuser.txt


```scala
private def isValid(entry: Entry): Boolean =
!( entry.description.startsWith("Deleted entry") || entry.description.startsWith("Transferred entry") )

@annotation.tailrec
private def entryLinesRec(
currentLine: Option[String],
linesLeft: Seq[String],
entryAcc: Seq[String],
acc: Seq[Seq[String]]
)
: Seq[Seq[String]] =
currentLine match {
case None => acc
case Some(line) => {

if(isEndLine(line))
entryLinesRec(
currentLine = linesLeft.headOption,
linesLeft = if(linesLeft.isEmpty) Seq() else linesLeft.tail,
entryAcc = Seq(),
acc = acc :+ entryAcc
)
else
entryLinesRec(
currentLine = linesLeft.headOption,
linesLeft = if(linesLeft.isEmpty) Seq() else linesLeft.tail,
entryAcc = entryAcc :+ line,
acc = acc
)
}
}

private def entryLines(lines: Seq[String]): Seq[Seq[String]] =
entryLinesRec(
currentLine = lines.headOption,
linesLeft = lines.tail,
entryAcc = Seq(),
acc = Seq()
)

private def isEndLine(line: String) =
line.startsWith("//")
}

```




[test/scala/EnzymeEntries.scala]: ../../../test/scala/EnzymeEntries.scala.md
[test/scala/EnzymeClasses.scala]: ../../../test/scala/EnzymeClasses.scala.md
[main/scala/entry.scala]: ../entry.scala.md
[main/scala/flat/entry.scala]: entry.scala.md
[main/scala/flat/classes.scala]: classes.scala.md
56 changes: 56 additions & 0 deletions docs/src/test/scala/EnzymeClasses.scala.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

```scala
package com.bio4j.data.enzyme.test

import org.scalatest.FunSuite

import com.bio4j.data.enzyme._

class ParseEnzymeClasses extends FunSuite {

def lines =
io.Source.fromFile("enzclass.txt").getLines

def allEnzymeClasses = flat.enzymeClasses.fromLines(lines)

test("parse all enzyme classes") {

allEnzymeClasses.foreach { e =>

val clazz = e
}
}
```


This is unlikely to change


```scala
test("check first classes") {

val firstFive = (allEnzymeClasses take 5).toList

assert {

firstFive === List[EnzymeClasses](
EnzymeClass("1.-.-.-", "Oxidoreductases"),
EnzymeSubClass("1.1.-.-", "Acting on the CH-OH group of donors"),
EnzymeSubSubClass("1.1.1.-", "With NAD(+) or NADP(+) as acceptor"),
EnzymeSubSubClass("1.1.2.-", "With a cytochrome as acceptor"),
EnzymeSubSubClass("1.1.3.-", "With oxygen as acceptor")
)
}
}
}

```




[test/scala/EnzymeEntries.scala]: EnzymeEntries.scala.md
[test/scala/EnzymeClasses.scala]: EnzymeClasses.scala.md
[main/scala/entry.scala]: ../../main/scala/entry.scala.md
[main/scala/flat/entry.scala]: ../../main/scala/flat/entry.scala.md
[main/scala/flat/classes.scala]: ../../main/scala/flat/classes.scala.md
Loading

0 comments on commit b343c0d

Please sign in to comment.