diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000000..d0c3eaf8ee
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,13 @@
+[run]
+source =
+    dffml
+    tests
+branch = True
+
+[report]
+exclude_lines =
+    no cov
+    no qa
+    noqa
+    pragma: no cover
+    if __name__ == .__main__.:
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000..8a9c8b85f6
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,6 @@
+.venv/
+.eggs/
+.mypy_cache/
+*/*.venv/
+*/*.eggs/
+*/*.mypy_cache/
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000..dfe0770424
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..748a6bcb33
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,19 @@
+*.log
+*.pyc
+.cache/
+.coverage
+.idea/
+.vscode/
+*.egg-info/
+build/
+dist/
+docs/build/
+venv/
+wheelhouse/
+*.png
+*.eggs
+*.swp
+.mypy_cache/
+*.egg-info/
+htmlcov/
+.venv/
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000000..d92bda802a
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,3 @@
+[MESSAGES CONTROL]
+disable=too-few-public-methods,
+        bad-continuation
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000000..c31a693270
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,26 @@
+language: python
+dist: xenial
+python:
+  - "3.7"
+addons:
+  apt:
+    packages:
+      - git
+      - subversion
+      - cloc
+env:
+  global:
+    - LOGGING=debug
+  matrix:
+    - PLUGIN=.
+    - PLUGIN=model/tensorflow
+    - PLUGIN=feature/git
+before_install:
+  # Update cloc (xenial has bad version)
+  - |
+    curl -o /tmp/cloc -sSL https://github.com/AlDanial/cloc/raw/1.80/cloc
+    sudo cp /tmp/cloc /usr/bin/cloc
+    sudo chmod 755 /usr/bin/cloc
+  - |
+script:
+  - python setup.py install && cd $PLUGIN && python setup.py test
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000000..144a787d3c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,18 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+- Nothing yet...
+
+## [0.1.0] - 2019-03-07
+### Added
+- Feature class to collect a feature in a dataset
+- Git features to collect feature data from Git repos
+- Model class to wrap implementations of machine learning models
+- Tensorflow DNN model for generic usage of the DNN estimator
+- CLI interface and framework
+- Source class to manage dataset storage
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000..b46d255267
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.7
+
+WORKDIR /usr/src/dffml
+
+COPY . .
+RUN pip install --no-cache-dir . && \
+  cp scripts/docker-entrypoint.sh /usr/bin/ && \
+  chmod 755 /usr/bin/docker-entrypoint.sh
+
+ENTRYPOINT ["/usr/bin/docker-entrypoint.sh"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000..8ce5aa9e27
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2017-2019 Intel
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000000..9d5d250d09
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include LICENSE
+include README.rst
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000000..3dfa732252
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,260 @@
+Data Flow Facilitator for Machine Learning (dffml)
+==================================================
+
+.. image:: https://travis-ci.org/intel/dffml.svg
+    :target: https://travis-ci.org/intel/dffml
+.. image:: https://bestpractices.coreinfrastructure.org/projects/2594/badge
+    :target: https://bestpractices.coreinfrastructure.org/projects/2594
+
+Is DFFML Right For Me?
+----------------------
+
+If you answer yes to any of these questions DFFML can make your life easier.
+
+- Dataset Generation
+
+  - Need to generate a dataset
+  - Need to run asynchronous operations in order to gather dataset (http
+    requests, interaction with command line utilities, etc.)
+
+- Models
+
+  - Want to quickly prototype how machine learning could be used on a dataset
+    without writing a model
+  - Need to write a finely tuned model by interacting with low level APIs of
+    popular machine learning frameworks.
+
+- Storage
+
+  - Need a way to use datasets which could be stored in different locations or
+    formats.
+
+About
+-----
+
+DFFML facilitates data generation, model creation, and use of models via
+services. See `Architecture`_ to learn how it works.
+
+- Facilitates data collection, model creation, and use of models via services.
+- Provides plumbing to facilitate the collection of feature data to create
+  datasets.
+- Allows developers to define their ML models via a standardized API.
+
+  - This let's users try different libraries / models to compare performance.
+
+- Plugin based
+
+  - Features which gather feature data (Number of Git Authors, etc.)
+  - Models which expose ML models via the standard API (Tensorflow, Scikit,
+    etc.)
+  - Sources which load and store feature data (CSV, JSON, MySQL, etc.)
+
+The plumbing DFFML provides enables users to swap out models and features,
+in order to quickly prototype.
+
+Installation
+------------
+
+DFFML currently should work with Python 3.6. However, only Python 3.7 is
+officially supported. This is because there are a lot of nice helper methods
+Python 3.7 implemented that we intend to use instead of re-implementing.
+
+.. code-block:: bash
+
+    python3.7 -m pip install -U dffml
+
+You can also install the Features for Git Version Control, and Models for
+Tensorflow Library all at once.
+
+- `DFFML Features for Git Version Control <feature/git/README.rst>`_
+- `DFFML Models for Tensorflow Library <model/tensorflow/README.rst>`_
+
+If you want a quick how to on the iris dataset head to the
+`DFFML Models for Tensorflow Library <model/tensorflow/README.rst>`_ repo.
+
+.. code-block:: bash
+
+    python3.7 -m pip install -U dffml[git,tensorflow]
+
+Docker Build
+------------
+
+This is a good option if you don't want to deal with installing Python 3.7.
+
+.. code-block:: bash
+
+    docker build -t dffml .
+
+You can then alias dffml to run the docker container.
+
+.. code-block:: bash
+
+    alias dffml="docker run --rm -ti -v $HOME/.local:/home/$USER/.local/ -v $PWD:/workdir -w /workdir -e UID=$(id -u) -e USER=$USER dffml"
+
+This creates an alias that takes your current working directory and mounts it
+into ``/workdir`` as well as your ``$HOME/.local`` to the same in the container.
+
+With the alias, you can run ``dffml`` commands as you would if installed via
+``pip``.
+
+.. code-block:: bash
+
+    dffml list
+
+Keep in mind that if you're working on files they can only be ones in your
+current working directory, and if you want to access network resources and they
+are on your host, you'll have to talk to ``172.17.0.1`` (docker0 inet address)
+instead of ``localhost`` or ``127.0.0.1``.
+
+The purpose of mounting ``$HOME/.local`` is so that if you want to
+``pip install`` anything, you can, and it will persist between invocations due
+to that being on the host.
+
+If you wan to run ``pip`` you can put it after ``dffml``.
+
+.. code-block:: bash
+
+    dffml pip install example
+
+Hacking
+-------
+
+Then install in development mode to the virtualenv and development dependencies.
+
+.. code-block:: bash
+
+    git clone git@github.com:intel/dffml
+    cd dffml
+    pip install --user -e .[git,tensorflow]
+
+Usage
+-----
+
+See `DFFML Models for Tensorflow Library <model/tensorflow/README.rst>`_ repo
+until documentation here is updated with a generic example.
+
+Testing
+-------
+
+.. code-block:: bash
+
+    python3.7 setup.py test
+
+Architecture
+------------
+
+When applying Machine Learning to a new problem developers must first collect
+data for models to train on. DFFML facilitates the collection of feature data
+to create datasets for models to learn on.
+
+.. image:: https://github.com/intel/dffml/raw/master/docs/arch.png
+
+DFFML's architecture can be thought of similarly to a search engine. Each
+**Feature** a developer defines searches for data associated with the unique key
+its provided with. Once the data is found it is added to a **Repo** (repository)
+associated with that unique key. A **Feature**'s search for data is dubbed
+*evaluation*. A **Repo** holds the results of each **Feature**'s evaluation.
+Results are stored under their respective **Feature** names.
+
+To define machine learning a model within DFFML, users create a **Model**.
+Models are responsible for training, assessing accuracy, and making
+predictions. After evaluation a **Repo** can be used by a **Model** for any of
+those tasks. Defining a machine learning model as a **Model** allows users to
+quickly compare accuracy of various models on their gathered dataset.
+
+Once the best most accurate model is known, users can easily integrate use of
+the model into existing applications via the Python API, or a **Service**.
+Services provide applications with ways to access the DFFML API over various
+protocols and deployment scenarios.
+
+Repo
+----
+
+A repo is a repository of information. It is instantiated with a source URL
+which represents or points to where more information on it can be found.
+
+Every repo has (or wants) a classification. Those which already have
+classifications can be used to train Models. The classification of the repo is
+what Education will ask it's models to make predictions on.
+
+Feature
+------
+
+Features are given a repo, containing at the minimum a source URL for it,
+and produce a list of results which represent the evaluation of that feature.
+
+Not all methods are applicable to all repos. As such, all Features implement the
+``applicable`` method.
+
+Feature is the abstract base class for all features. New features must be
+derived from this class and implement the fetch, parse, and calc methods. These
+methods are always called in order by the evaluator. However, they are executed
+in parallel with the same stages of other features.
+
+A feature is provided with a repo
+and is expected to fetch any data it needs to calculate itself when fetch
+is called. All data fetched should be stored in tempdir() if it must reside
+on disk.
+
+Once the appropriate data is fetched the parse method is responsible for
+storing the parts of that data which will be used to calculate in the
+subclass
+
+.. code-block:: python
+
+    from dffml.feature import Feature
+
+    class StringByFT(Feature):
+
+        async def fetch(self):
+            self.__value = '42'
+
+        async def parse(self):
+            self.__value = int(self.__value)
+
+The calc method then uses variables set in parse to calculate the feature.
+
+.. code-block:: python
+
+    async def calc(self):
+        return self.__value * 42
+
+.. code-block:: python
+
+    entry_points={
+        'dffml.feature': [
+            'string_by_42 = mypackage.string_by_42:StringByFT',
+        ],
+    },
+
+Source
+------
+
+Repos come from a source. Sources may contain more information on a repo than
+just it's source URL. Sources are responsible for providing the repos they
+contain and updating those repos upon request.
+
+Model
+-------
+
+Models are feed classified repos from which they learn from during their
+training phase. After training they can be used to make a prediction about the
+classification of a repo.
+
+License
+-------
+
+dffml is distributed under the MIT License, see ``LICENSE``.
+
+Legal
+-----
+
+..
+
+    This software is subject to the U.S. Export Administration Regulations and
+    other U.S. law, and may not be exported or re-exported to certain countries
+    (Cuba, Iran, Crimea Region of Ukraine, North Korea, Sudan, and Syria) or to
+    persons or entities prohibited from receiving U.S. exports (including
+    Denied Parties, Specially Designated Nationals, and entities on the Bureau
+    of Export Administration Entity List or involved with missile technology or
+    nuclear, chemical or biological weapons).
diff --git a/dffml/__init__.py b/dffml/__init__.py
new file mode 100644
index 0000000000..fc390d5f20
--- /dev/null
+++ b/dffml/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Education (dffml) is a package and tool for doing machine learning.
+
+It uses the setuptools dynamic discovery of services and plugins [1] to
+evaluate a package based on the installed features.
+
+[1]: http://setuptools.readthedocs.io/en/latest/setuptools.html
+'''
+from .feature import Feature
+
+# Used to declare our namespace for resource discovery
+__import__('pkg_resources').declare_namespace(__name__)
diff --git a/dffml/accuracy.py b/dffml/accuracy.py
new file mode 100644
index 0000000000..1a164bcfbc
--- /dev/null
+++ b/dffml/accuracy.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+class Accuracy(float):
+
+    def __str__(self):
+        return '%.02f' % (float(self) * 100.0)
diff --git a/dffml/cli.py b/dffml/cli.py
new file mode 100644
index 0000000000..545b1f5b7a
--- /dev/null
+++ b/dffml/cli.py
@@ -0,0 +1,293 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Command line interface evaluates packages given their source URLs
+'''
+import os
+import sys
+import pdb
+import json
+import asyncio
+import logging
+import inspect
+import argparse
+import pkg_resources
+
+from .log import LOGGER
+from .version import VERSION
+from .port import Port
+from .feature import Feature, Features, Data
+from .source import Source, Sources, SubsetSources
+from .model import Model
+from .util.cli import CMD, Arg, SourcesCMD, FeaturesCMD, ModelCMD, PortCMD, \
+        KeysCMD, ListEntrypoint, ParseSourcesAction
+
+class Version(CMD):
+    '''
+    Print version and exit
+    '''
+
+    async def run(self):
+        LOGGER.debug('Reporting version')
+        print(VERSION)
+
+class Edit(SourcesCMD, KeysCMD):
+    '''
+    Edit each specified repo
+    '''
+
+    async def run(self):
+        async with self.sources as sources:
+            for key in self.keys:
+                repo = await sources.repo(key)
+                pdb.set_trace()
+                await sources.update(repo)
+
+class ListRepos(SourcesCMD):
+    '''
+    List repos stored in sources
+    '''
+
+    async def run(self):
+        async with self.sources as sources:
+            async for repo in sources.repos():
+                print(repo)
+
+class ListFeatures(ListEntrypoint):
+    '''
+    List installed features
+    '''
+
+    ENTRYPOINT = Feature
+
+    def display(self, cls):
+        if not cls.__doc__ is None:
+            print('%s(%s):' % (cls.NAME, cls.__qualname__))
+            print(cls.__doc__.rstrip())
+        else:
+            print('%s(%s)' % (cls.NAME, cls.__qualname__))
+        print()
+
+class ListServices(ListEntrypoint):
+    '''
+    List installed services
+    '''
+
+    async def run(self):
+        for i in pkg_resources.iter_entry_points('dffml.service.cli'):
+            loaded = i.load()
+            if issubclass(loaded, CMD):
+                self.display(loaded)
+
+class ListSources(ListEntrypoint):
+    '''
+    List installed sources
+    '''
+
+    ENTRYPOINT = Source
+
+class ListModels(ListEntrypoint):
+    '''
+    List installed models
+    '''
+
+    ENTRYPOINT = Model
+
+class ListPorts(ListEntrypoint):
+    '''
+    List installed ports
+    '''
+
+    ENTRYPOINT = Port
+
+class List(CMD):
+    '''
+    List repos and installed interfaces
+    '''
+
+    repos = ListRepos
+    features = ListFeatures
+    sources = ListSources
+    models = ListModels
+    services = ListServices
+    ports = ListPorts
+
+class Applicable(FeaturesCMD):
+
+    arg_key = Arg('-key', help='Check if features is applicable for this key',
+            required=True)
+
+    async def run(self):
+        async with self.features as features:
+            return await features.applicable(Data(self.key))
+
+class Merge(CMD):
+    '''
+    Merge repo data between sources
+    '''
+
+    arg_dest = Arg(name='dest', help='Sources merge repos into',
+            action=ParseSourcesAction)
+    arg_src = Arg('src', help='Sources to pull repos from',
+            action=ParseSourcesAction)
+
+    async def run(self):
+        async with self.src, self.dest:
+            async for repo in self.src.repos():
+                repo.merge(await self.dest.repo(repo.src_url))
+                await self.dest.update(repo)
+
+class EvaluateCMD(FeaturesCMD, SourcesCMD):
+
+    arg_sources = SourcesCMD.arg_sources.modify(required=False)
+    arg_caching = Arg('-caching', help='Re-evaluate or use last',
+            required=False, default=False, action='store_true')
+    arg_parallel = Arg('-parallel', help='Evaluate in parallel',
+            required=False, default=1, type=int)
+    arg_cacheless = Arg('-cacheless',
+            help='Do not re-evaluate if these features are missing',
+            required=False, default=[], nargs='+')
+
+class EvaluateAll(EvaluateCMD):
+    '''Evaluate all repos in sources'''
+
+    arg_update = Arg('-update', help='Update repo with sources', required=False,
+            default=False, action='store_true')
+
+    async def evaluate(self, sources, features):
+        async for repo in features.evaluate_repos(sources.repos(),
+                features=[name for name in features.names() \
+                        if not name in self.cacheless],
+                num_workers=self.parallel, caching=self.caching):
+            yield repo
+            if self.update:
+                await sources.update(repo)
+
+    async def run(self):
+        async with self.sources as sources, self.features as features:
+            async for repo in self.evaluate(sources, features):
+                yield repo
+
+class EvaluateRepo(EvaluateAll, KeysCMD):
+    '''Evaluate features on individual repos'''
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.sources = SubsetSources(*self.sources, keys=self.keys)
+
+class Evaluate(CMD):
+    '''Evaluate features against repos'''
+
+    repo = EvaluateRepo
+    _all = EvaluateAll
+
+class MLCMD(ModelCMD, FeaturesCMD, SourcesCMD):
+    '''
+    Commands which use models share many similar arguments.
+    '''
+
+    arg_classifications = Arg('-classifications', nargs='+', required=True,
+            default=[])
+
+class Train(MLCMD):
+    '''Train a model on data from given sources'''
+
+    arg_steps = Arg('-steps', help='Number of steps', required=True, type=int,
+            default=5000)
+    arg_num_epochs = Arg('-num_epochs', help='Number of epochs', required=True,
+            type=int, default=30)
+
+    async def run(self):
+        if not self.model_dir is None and not os.path.isdir(self.model_dir):
+            os.makedirs(self.model_dir)
+        async with self.sources as sources, self.features as features:
+            return await self.model.train(sources, features,
+                    self.classifications, self.steps, self.num_epochs)
+
+class Accuracy(MLCMD):
+    '''Assess model accuracy on data from given sources'''
+
+    async def run(self):
+        async with self.sources as sources, self.features as features:
+            return float(await self.model.accuracy(sources, features,
+                self.classifications))
+
+class PredictAll(EvaluateAll, MLCMD):
+    '''Predicts for all sources'''
+
+    async def predict(self, sources, features, repos):
+        async for repo, classification, confidence in \
+                self.model.predict(repos, features, self.classifications):
+            repo.predicted(classification, confidence)
+            yield repo
+            if self.update:
+                await sources.update(repo)
+
+    async def run(self):
+        async with self.sources as sources, self.features as features:
+            async for repo in self.predict(sources, features,
+                    self.evaluate(sources, features)):
+                yield repo
+
+class PredictRepo(PredictAll, EvaluateRepo):
+    '''Predictions for individual repos'''
+    pass
+
+class Predict(CMD):
+    '''Evaluate features against repos and produce a prediction'''
+
+    repo = PredictRepo
+    _all = PredictAll
+
+class ImportExportCMD(PortCMD, SourcesCMD):
+    '''Shared import export arguments'''
+
+    arg_filename = Arg('filename', type=str)
+
+class Import(ImportExportCMD):
+    '''Imports repos'''
+
+    async def run(self):
+        async with self.sources as sources:
+            return await self.port.import_from_file(sources, self.filename)
+
+class Export(ImportExportCMD):
+    '''Exports repos'''
+
+    async def run(self):
+        async with self.sources as sources:
+            return await self.port.export_to_file(sources, self.filename)
+
+def services():
+    '''
+    Loads dffml.services.cli entrypoint and creates a CMD class incorporating
+    all of the loaded CLI versions of services as subcommands.
+    '''
+    class Service(CMD):
+        '''
+        Expose various functionalities of dffml
+        '''
+        pass
+    for i in pkg_resources.iter_entry_points('dffml.service.cli'):
+        loaded = i.load()
+        if issubclass(loaded, CMD):
+            setattr(Service, i.name, loaded)
+    return Service
+
+class CLI(CMD):
+    '''
+    CLI interface for dffml
+    '''
+
+    version = Version
+    _list = List
+    edit = Edit
+    merge = Merge
+    _import = Import
+    export = Export
+    train = Train
+    accuracy = Accuracy
+    predict = Predict
+    evaluate = Evaluate
+    service = services()
+    applicable = Applicable
diff --git a/dffml/feature/__init__.py b/dffml/feature/__init__.py
new file mode 100644
index 0000000000..36da2e2a2d
--- /dev/null
+++ b/dffml/feature/__init__.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+All features registered to the dffml.feature entry point using setuptools are
+derived from the Feature class. To add a feature, create a module which has a
+setup.py which specifies where to find your Feature subclass within your module.
+
+>>> setup(
+>>>     name='myfeatures',
+...
+>>>     entry_points={
+>>>         'dffml.feature': [
+>>>             'numfiles = myfeatures:NumFilesFeature',
+>>>         ],
+>>>     },
+>>> )
+'''
+from .feature import Data, Feature, Features, LoggingDict, DefFeature
+
+# Declares dffml.feature is a namespace package
+__import__('pkg_resources').declare_namespace(__name__)
diff --git a/dffml/feature/feature.py b/dffml/feature/feature.py
new file mode 100644
index 0000000000..54c3fe2309
--- /dev/null
+++ b/dffml/feature/feature.py
@@ -0,0 +1,412 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Feature subclasses are responsible for generating an integer value given an open
+feature project's feature URL.
+'''
+import abc
+import pydoc
+import asyncio
+import traceback
+import concurrent.futures as futures
+import pkg_resources
+from functools import singledispatch, partial
+from typing import Optional, List, Dict, Type, AsyncIterator, Any, Callable
+
+from .log import LOGGER
+from ..repo import Repo
+from ..util.monitor import Monitor, Task
+from ..util.entrypoint import Entrypoint
+from ..util.asynchelper import AsyncContextManagerList
+
+class Frequency(object):
+    '''
+    Frequency in months
+    '''
+    MONTHS: int = 0
+
+class Quarterly(Frequency):
+    '''
+    Evaluate on a quarterly basis (every 3 months).
+    '''
+    MONTHS = 3
+
+class Yearly(Frequency):
+    '''
+    Evaluate on a yearly basis.
+    '''
+    MONTHS = 12
+
+class LoggingDict(object):
+
+    def __init__(self, data: 'Data') -> None:
+        self.__data = data
+        self.__dict: Dict = {}
+        self.ignore = (asyncio.Lock,)
+
+    async def get(self, key, default=None):
+        val = self.__dict.get(key, default)
+        return val
+
+    async def set(self, key, value):
+        self.__dict[key] = value
+        if not isinstance(value, self.ignore):
+            await self.__data.update({key: value}, event='set')
+
+    async def inc(self, key, default=None, by=1):
+        value = await self.get(key, default=default)
+        value += by
+        await self.set(key, value)
+        return value
+
+class Data(Task):
+    '''
+    Passed to each feature during evaluation. Shared between all features a repo
+    is being evaluated with
+    '''
+
+    LOGGER = LOGGER.getChild('Data')
+
+    def __init__(self, src_url: str) -> None:
+        super().__init__(key=src_url)
+        self.src_url = src_url
+        self.lock: asyncio.Lock = asyncio.Lock()
+        self.temp: Dict[str, Any] = {}
+        self.data: LoggingDict = LoggingDict(self)
+        self.results: Dict[str, Any] = {}
+        self.locks: Dict[str, Any] = {}
+
+    async def mklock(self, name: str) -> asyncio.Lock:
+        '''
+        Return a lock stored in data under the key `name`. Create the lock if it
+        does not exist.
+        '''
+        async with self.lock:
+            lock = self.locks.get(name, None)
+            if lock is None:
+                lock = asyncio.Lock()
+                self.locks[name] = lock
+            return lock
+
+    async def result(self):
+        results = await self.complete()
+        self.results = results
+        self.LOGGER.debug('Data got results: %r', results)
+        return results
+
+class Feature(abc.ABC, Entrypoint):
+    '''
+    Abstract base class for all features. New features must be derived from this
+    class and implement the fetch, parse, and calc methods. These methods are
+    always expected to be called in order. Anything you add to your feature
+    subclass in fetch or parse is accessible in calc.
+
+    A feature is provided with the feature URL of the package (in self._src_url)
+    and is expected to fetch any data it needs to calculate itself when fetch
+    is called. All data fetched should be stored in tempdir() if it must reside
+    on disk.
+
+    Once the appropriate data is fetched the parse method is responsible for
+    storing the parts of that data which will be used to calculate in the
+    subclass
+
+    >>> self.__example_parsed_value_name = example_value
+
+    The calc method then uses variables set in parse to output an integer value.
+
+    >>>     def calc(self):
+    >>>         return self.__example_parsed_value_name
+
+    Full example of a feature implementation:
+
+    >>> import glob
+    >>> from dffml.feature import Feature
+    >>>
+    >>> class NumFilesFeature(Feature):
+    >>>
+    >>>     @abc.abstractmethod
+    >>>     def fetch(self, data):
+    >>>         self._downloader.vcs(self._src_url, self.tempdir('src'))
+    >>>
+    >>>     @abc.abstractmethod
+    >>>     def parse(self, data):
+    >>>         self.__num_files = glob.glob(self.tempdir(), recursive=True)
+    >>>
+    >>>     @abc.abstractmethod
+    >>>     def calc(self, data):
+    >>>         return self.__num_files
+    '''
+
+    LOGGER = LOGGER.getChild('Feature')
+
+    NAME: str = ''
+    # LENGTH: int = 10
+    # FREQUENCY: Type[Frequency] = Quarterly
+    ENTRY_POINT = 'dffml.feature'
+
+    def __str__(self):
+        return '%s(%s)' % (self.NAME, self.__class__.__qualname__)
+
+    def __repr__(self):
+        return '%s[%r, %d]' % (self.__str__(), self.dtype(), self.length())
+
+    def dtype(self) -> Type:
+        '''
+        Models need to know a Feature's datatype.
+        '''
+        self.LOGGER.warning('%s dtype unimplemented', self)
+        return int
+
+    def length(self) -> int:
+        '''
+        Models need to know a Feature's length, 1 means single value, more than
+        that is the length of the array calc returns.
+        '''
+        self.LOGGER.warning('%s length unimplemented', self)
+        return 1
+
+    async def applicable(self, data) -> bool:
+        return True
+
+    async def fetch(self, data):
+        '''
+        Fetch retrieves any additional information about the software we are
+        evaluating. Any data fetched should be stored in tempdir().
+        '''
+        pass
+
+    async def parse(self, data):
+        '''
+        Parse the data we downloaded in fetch() into a usable form.
+        '''
+        pass
+
+    async def calc(self, data):
+        '''
+        Calculates the score for this feature based on data found by parse().
+        '''
+        return False
+
+    async def setUp(self, data):
+        '''
+        Preform setup
+        '''
+        pass
+
+    async def tearDown(self, data, error=False):
+        '''
+        Release any post calculation resources
+        '''
+        pass
+
+    async def open(self):
+        '''
+        Opens any resources needed
+        '''
+        pass
+
+    async def close(self):
+        '''
+        Closes any opened resources
+        '''
+        pass
+
+    async def __aenter__(self):
+        await self.open()
+        # TODO Context management
+        return self
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        await self.close()
+
+def DefFeature(name, dtype, length):
+
+    class DefinedFeature(Feature):
+
+        LOGGER = LOGGER.getChild('DefFeature')
+
+        def __init__(self, name: str = '', dtype: Type = int, length: int = 1) \
+                -> None:
+            super().__init__()
+            self.NAME = name
+            self._dtype = dtype
+            self._length = length
+
+        def dtype(self) -> Type:
+            '''
+            Models need to know a Feature's datatype.
+            '''
+            return self._dtype
+
+        def length(self) -> int:
+            '''
+            Models need to know a Feature's length, 1 means single value, more than
+            that is the length of the array calc returns.
+            '''
+            return self._length
+
+    return DefinedFeature(name=name, dtype=dtype, length=length)
+
+class Features(AsyncContextManagerList, Monitor):
+
+    TIMEOUT: int = 60 * 2
+
+    LOGGER = LOGGER.getChild('Features')
+
+    def __init__(self, *args: Feature, timeout: int = None) -> None:
+        super().__init__(*args)
+        Monitor.__init__(self)
+        self.timeout = timeout if not timeout is None \
+                else self.TIMEOUT
+
+    def names(self) -> List[str]:
+        return list(({feature.NAME: True for feature in self}).keys())
+
+    async def evaluate(self, src: str, task: Task = None) -> Dict[str, Any]:
+        return await asyncio.wait_for(self._evaluate(src, task=task),
+                self.timeout)
+
+    async def _evaluate(self, src: str, task: Task = None) -> Dict[str, Any]:
+        '''
+        Evaluates all repos passed to it.
+        Args:
+            src: src of repo to be evaluated
+            caching: If `True` sources will NOT be re-evaluated if they have
+                     features
+        Returns:
+            A `dict` containing source URLs and their repos
+        '''
+        toreDown = False
+        data: Data = Data(src)
+        if not task is None:
+            data = task # type: ignore
+        features: Dict[str, Feature] = {}
+        results: Dict[str, Any] = {}
+        try:
+            applicable = await self.applicable(data)
+            self.LOGGER.debug('Applicable[%r]: %r', data.src_url, applicable)
+            await applicable.on_all('setUp', data)
+            await applicable.on_all('fetch', data)
+            await applicable.on_all('parse', data)
+            await applicable.run_calc(results, data)
+            await applicable.on_all('tearDown', data)
+            toreDown = True
+        except futures._base.CancelledError as err:
+            if not toreDown:
+                await applicable.on_all('tearDown', data)
+            return {}
+        data.results.update(results)
+        return results
+
+    async def applicable(self, data: Data) -> 'Features':
+        return self.__class__(*[feature for feature in self \
+                if feature.NAME and await feature.applicable(data)])
+
+    async def on_all(self, method_name: str, data: Data):
+        await asyncio.gather(*[self.run_feature_method(
+            feature, getattr(feature, method_name), data) \
+            for feature in self])
+
+    async def run_calc(self, results: Dict[str, Any], data: Data):
+        await asyncio.gather(*[self._run_calc(feature, results, data) \
+                for feature in self])
+
+    async def _run_calc(self, feature: Feature, results: Dict[str, Any],
+            data: Data) -> Any:
+        results[feature.NAME] = await self.run_feature_method(feature,
+                    feature.calc, data)
+
+    async def run_feature_method(self, feature: Feature,
+            method: Callable[[Data], Any], data: Data) -> Any:
+        error: Exception = Exception('Not an error')
+        try:
+            self.LOGGER.debug('%s %s(%s).%s', data.src_url, feature.NAME,
+                    feature.__class__.__qualname__, method.__name__)
+            return await method(data)
+        except futures._base.CancelledError as err:
+            raise
+        except Exception as err:
+            error = err
+            self.LOGGER.error('Error evaluating %s: %s: %s', data.src_url, err,
+                    traceback.format_exc().strip())
+        if str(error) != 'Not an error':
+            if method.__name__ != 'tearDown':
+                await feature.tearDown(data)
+            self.remove(feature)
+
+    def mktask(self, func, key):
+        data = Data(key)
+        Task.__init__(data, func, key)
+        return data
+
+    async def evaluate_repo(self, repo: Repo, *,
+            features: List[str] = [], caching: bool = False):
+        results: Dict[str, Any] = repo.features(features)
+        if caching and results:
+            return repo
+        try:
+            results = await self.evaluate(repo.src_url)
+            if results:
+                repo.evaluated(results)
+        except futures._base.TimeoutError:
+            self.LOGGER.warning('Evaluation timed out: %s', repo.src_url)
+        return repo
+
+    async def evaluate_repos(self, repos: AsyncIterator[Repo], *,
+            features: Optional[List[str]] = None, caching: bool = False,
+            num_workers: int = 1):
+        if features is None:
+            features = self.names()
+        sem = asyncio.Semaphore(value=num_workers)
+        async def with_sem(sem, func, *args, **kwargs):
+            async with sem:
+                return await func(*args, **kwargs)
+        evaluate_repo = partial(with_sem, sem, self.evaluate_repo,
+                features=features, caching=caching)
+        for repo in await asyncio.gather(*[evaluate_repo(repo) \
+                async for repo in repos]):
+            yield repo
+
+    async def submit(self, src: str):
+        return await super().start(partial(self.evaluate, src), src,
+                mktask=self.mktask)
+
+    @classmethod
+    def load(cls, *these: str):
+        '''
+        Loads all installed loading and returns them as a list. Sources to be
+        loaded should be registered to ENTRY_POINT via setuptools.
+        '''
+        these, loading_classes = cls.load_defs(*these)
+        for i in pkg_resources.iter_entry_points(Feature.ENTRY_POINT):
+            loaded = i.load()
+            if issubclass(loaded, Feature) and loaded.NAME in these:
+                loading_classes.append(loaded())
+        self = cls(*loading_classes)
+        for name in these:
+            if not name in self.names():
+                raise KeyError('%s was not found in (%s)' % \
+                        (repr(name), ', '.join(map(str, loading_classes))))
+        if not self.names():
+            raise KeyError('No features were loaded')
+        return self
+
+    @classmethod
+    def load_defs(cls, *args: str):
+        defs = []
+        no_def = [arg for arg in args if not arg.startswith('def:')]
+        for arg in args:
+            if arg.startswith('def:'):
+                defs.append(cls.load_def(*arg.replace('def:', '').split(':')))
+        return no_def, defs
+
+    @classmethod
+    def load_def(cls, name: str, dtype: str, length: str):
+        return DefFeature(name, cls.convert_dtype(dtype), int(length))
+
+    @classmethod
+    def convert_dtype(cls, dtype: str):
+        found = pydoc.locate(dtype)
+        if found is None:
+            raise TypeError('Failed to convert_dtype %r' % (dtype,))
+        return found
diff --git a/dffml/feature/log.py b/dffml/feature/log.py
new file mode 100644
index 0000000000..d153e40552
--- /dev/null
+++ b/dffml/feature/log.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/dffml/log.py b/dffml/log.py
new file mode 100644
index 0000000000..d153e40552
--- /dev/null
+++ b/dffml/log.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/dffml/model/__init__.py b/dffml/model/__init__.py
new file mode 100644
index 0000000000..4e2783a7b6
--- /dev/null
+++ b/dffml/model/__init__.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+All models registered to the dffml.model entry point using setuptools are
+derived from the Model class. To add a model, create a module which has a
+setup.py which specifies where to find your Model subclass within your module.
+
+>>> setup(
+>>>     name='mymodel',
+...
+>>>     entry_points={
+>>>         'dffml.model': [
+>>>             'mymodel = mymodel:MyModel',
+>>>         ],
+>>>     },
+>>> )
+'''
+from .model import Model
+
+# Declares dffml.model as a namespace package
+__import__('pkg_resources').declare_namespace(__name__)
diff --git a/dffml/model/log.py b/dffml/model/log.py
new file mode 100644
index 0000000000..d153e40552
--- /dev/null
+++ b/dffml/model/log.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/dffml/model/model.py b/dffml/model/model.py
new file mode 100644
index 0000000000..c16a5546ac
--- /dev/null
+++ b/dffml/model/model.py
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Model subclasses are responsible for training themselves on repos, making
+predictions about the classifications of repos, and assessing their prediction
+accuracy.
+'''
+import abc
+from typing import AsyncIterator, Tuple, Any, List, Optional
+
+from ..repo import Repo
+from ..source import Sources
+from ..feature import Features
+from ..accuracy import Accuracy
+from ..util.entrypoint import Entrypoint
+
+class Model(abc.ABC, Entrypoint):
+    '''
+    Abstract base class which should be derived from and implmented using
+    various machine learning frameworks or concepts.
+    '''
+
+    ENTRY_POINT = 'dffml.model'
+
+    def __init__(self, model_dir: Optional[str] = None) -> None:
+        super().__init__()
+        self.model_dir = model_dir
+
+    @abc.abstractmethod
+    async def train(self, sources: Sources, features: Features,
+            classifications: List[Any], steps: int, num_epochs: int):
+        '''
+        Train using repos as the data to learn from.
+        '''
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    async def accuracy(self, sources: Sources, features: Features,
+            classifications: List[Any]) -> Accuracy:
+        '''
+        Evaluates the accuracy of our model after training using the input repos
+        as test data.
+        '''
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    async def predict(self, repos: AsyncIterator[Repo], features: Features,
+            classifications: List[Any]) -> \
+                    AsyncIterator[Tuple[Repo, Any, float]]:
+        '''
+        Uses trained data to make a prediction about the quality of a repo.
+        '''
+        raise NotImplementedError()
+        yield (Repo(''), '', 0.0)
+
+    @classmethod
+    def installed(cls):
+        return {key: model() for key, model in cls.load().items()}
diff --git a/dffml/port/__init__.py b/dffml/port/__init__.py
new file mode 100644
index 0000000000..28b9141a86
--- /dev/null
+++ b/dffml/port/__init__.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+All ports registered to the dffml.port entry point using setuptools are
+derived from the Port class. To add a port, create a module which has a
+setup.py which specifies where to find your Port subclass within your module.
+
+>>> setup(
+>>>     name='myport',
+...
+>>>     entry_points={
+>>>         'dffml.port': [
+>>>             'myport = myport:MyPort',
+>>>         ],
+>>>     },
+>>> )
+'''
+from .port import Port
+
+# Declares dffml.port as a namespace package
+__import__('pkg_resources').declare_namespace(__name__)
diff --git a/dffml/port/json.py b/dffml/port/json.py
new file mode 100644
index 0000000000..c18fc84bfb
--- /dev/null
+++ b/dffml/port/json.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Ports repos in JSON format
+'''
+import json
+
+from .port import Port
+from ..repo import Repo
+from ..source import Source
+
+class JSON(Port):
+    '''
+    Imports and exports repos in JSON format
+    '''
+
+    async def export_fd(self, source: Source, fd):
+        json.dump({repo.src_url: repo.dict() async for repo in source.repos()},
+                fd)
+
+    async def import_fd(self, source: Source, fd):
+        for src_url, data in json.load(fd):
+            await source.update(Repo(src_url, data=data))
diff --git a/dffml/port/log.py b/dffml/port/log.py
new file mode 100644
index 0000000000..d153e40552
--- /dev/null
+++ b/dffml/port/log.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/dffml/port/port.py b/dffml/port/port.py
new file mode 100644
index 0000000000..db75608bf3
--- /dev/null
+++ b/dffml/port/port.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Port subclasses import and export repos.
+'''
+import abc
+
+from ..source import Source
+from ..util.entrypoint import Entrypoint
+
+class Port(abc.ABC, Entrypoint):
+    '''
+    Port repos into the format the porter understands
+    '''
+
+    ENTRY_POINT = 'dffml.port'
+
+    @abc.abstractmethod
+    async def export_fd(self, source: Source, fd):
+        '''
+        Export repos
+        '''
+
+    @abc.abstractmethod
+    async def import_fd(self, source: Source, fd):
+        '''
+        Import repos
+        '''
+
+    async def export_to_file(self, source: Source, filename: str):
+        with open(filename, 'w') as fd:
+            await self.export_fd(source, fd)
+
+    async def import_from_file(self, source: Source, filename: str):
+        with open(filename, 'r') as fd:
+            await self.import_fd(source, fd)
diff --git a/dffml/repo.py b/dffml/repo.py
new file mode 100644
index 0000000000..b4a7b1fe43
--- /dev/null
+++ b/dffml/repo.py
@@ -0,0 +1,210 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Information on the software to evaluate is stored in a Repo instance.
+'''
+import os
+from datetime import datetime
+from typing import Optional, List, Dict, Any, AsyncIterator
+
+from .log import LOGGER
+
+LOGGER = LOGGER.getChild('repo')
+
+class RepoPrediction(dict):
+
+    EXPORTED = ['classification', 'confidence']
+
+    def __init__(self, *,
+            confidence: float = 0.0,
+            classification: Any = '') -> None:
+        self['confidence'] = confidence
+        self['classification'] = classification
+
+    @property
+    def confidence(self):
+        return self['confidence']
+
+    @property
+    def classification(self):
+        return self['classification']
+
+    def dict(self):
+        if not self:
+            return []
+        return self
+
+    def __len__(self):
+        if self['confidence'] == 0.0 and not self['classification']:
+            return 0
+        return 2
+
+    def __bool__(self):
+        return bool(len(self))
+    __nonzero__ = __bool__
+
+class RepoData(object):
+
+    DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
+    EXPORTED = ['src_url', 'features', 'classification', 'prediction']
+
+    def __init__(self, *,
+            src_url: Optional[str] = None,
+            features: Optional[Dict[str, Any]] = None,
+            classification: Optional[str] = None,
+            prediction: Optional[RepoPrediction] = None,
+            last_updated: Optional[datetime] = None) -> None:
+        # If the repo is not evaluated or predicted then don't report out a new
+        # value for last_updated
+        self.last_updated_default = datetime.now()
+        if src_url is None:
+            src_url = ''
+        if features is None:
+            features = {}
+        if classification is None:
+            classification = ''
+        if prediction is None:
+            prediction = RepoPrediction()
+        if last_updated is None:
+            last_updated = self.last_updated_default
+        if isinstance(last_updated, str):
+            last_updated = datetime.strptime(last_updated, self.DATE_FORMAT)
+        self.src_url = src_url
+        self.features = features
+        self.classification = classification
+        self.prediction = RepoPrediction(**prediction)
+        self.last_updated = last_updated
+
+    def dict(self):
+        data = {key: getattr(self, key, []) for key in self.EXPORTED \
+                if len(getattr(self, key, []))}
+        # Do not report if there has been no change since instantiation to
+        # a default time value
+        if self.last_updated != self.last_updated_default:
+            data['last_updated'] = self.last_updated.strftime(self.DATE_FORMAT)
+        return data
+
+    def __repr__(self):
+        return str(self.dict())
+
+class Repo(object):
+    '''
+    Manages feature independent information and actions for a repo.
+    '''
+
+    REPO_DATA = RepoData
+
+    def __init__(self, src_url: str, *,
+            data: Optional[Dict[str, Any]] = None,
+            extra: Optional[Dict[str, Any]] = None) -> None:
+        if data is None:
+            data = {}
+        if extra is None:
+            extra = {}
+        data['src_url'] = src_url
+        if 'extra' in data:
+            # Prefer extra from init arguments to extra stored in data
+            data['extra'].update(extra)
+            extra = data['extra']
+            del data['extra']
+        self.data = self.REPO_DATA(**data)
+        self.extra = extra
+
+    def dict(self):
+        data = self.data.dict()
+        data['extra'] = self.extra
+        return data
+
+    def __repr__(self):
+        return str(self.dict())
+
+    def __str__(self):
+        if not self.data.prediction:
+            confidence, classification = (0.0, 'Undetermined')
+        else:
+            confidence, classification = (self.data.prediction.confidence,
+                self.data.prediction.classification)
+        header = ('%-11s (%2.1f%% confidence) %s' % \
+                (classification, 100.0 * confidence, self.src_url))
+        if self.classified():
+            header += ' classified as: %s' % (self.classification(),)
+        if len(self.extra.keys()):
+            header += ' '  + str(self.extra)
+        return '\n'.join([header] + \
+                [('%-30s%s' % (feature, str(results))) \
+                for feature, results in self.features().items()]).rstrip()
+
+    def merge(self, repo: 'Repo'):
+        data = self.data.dict()
+        data.update(repo.data.dict())
+        self.data = self.REPO_DATA(**data)
+        self.extra.update(repo.extra) # type: ignore
+
+    @property
+    def src_url(self) -> str:
+        return self.data.src_url
+
+    def evaluated(self, results: Dict[str, Any], overwrite=False):
+        '''
+        Updates features with the result dict
+        '''
+        if overwrite:
+            self.data.features = results
+        else:
+            self.data.features.update(results)
+        self.data.last_updated = datetime.now()
+        LOGGER.info('Evaluated %s %r', self.data.src_url, self.data.features)
+
+    def features(self, subset: List[str] = []) -> Dict[str, Any]:
+        '''
+        Returns all features for the repo or the subset specified.
+        '''
+        if not subset:
+            return self.data.features
+        for name in subset:
+            if not name in self.data.features or self.data.features[name] is None:
+                return {}
+        return {name: self.data.features[name] for name in subset}
+
+    def predicted(self, classification: Any, confidence: float):
+        '''
+        Set the prediction for this repo
+        '''
+        self.data.prediction = RepoPrediction(
+                classification=classification,
+                confidence=float(confidence))
+        self.data.last_updated = datetime.now()
+
+    def prediction(self) -> RepoPrediction:
+        '''
+        Get the prediction for this repo
+        '''
+        return self.data.prediction
+
+    def classify(self, classification):
+        '''
+        Set the classification for the repo
+        '''
+        self.data.classification = classification
+
+    def classified(self):
+        '''
+        Return True if the repo has a classification
+        '''
+        if self.data.classification == '':
+            return False
+        return True
+
+    def classification(self):
+        '''
+        Repo classification or value error if unclassified
+        '''
+        if not self.classified():
+            raise ValueError('Unclassified')
+        return self.data.classification
+
+    async def asyncgen(self) -> AsyncIterator['Repo']:
+        '''
+        Async gen for a single repo
+        '''
+        yield self
diff --git a/dffml/source/__init__.py b/dffml/source/__init__.py
new file mode 100644
index 0000000000..28d6a8f026
--- /dev/null
+++ b/dffml/source/__init__.py
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+All sources registered to the dffml.source entry point using setuptools are
+derived from the Source class. To add a source, create a module which has a
+setup.py which specifies where to find your Source subclass within your module.
+
+>>> setup(
+>>>     name='mysource',
+...
+>>>     entry_points={
+>>>         'dffml.source': [
+>>>             'mysource = mysource:MySource',
+>>>         ],
+>>>     },
+>>> )
+'''
+from .source import Source, Sources, SubsetSources, ValidationSources
+from .memory import MemorySource, RepoSource
+from .json import JSONSource
+from .file import FileSource
+
+# Declares dffml.source as a namespace package
+__import__('pkg_resources').declare_namespace(__name__)
diff --git a/dffml/source/csvfile.py b/dffml/source/csvfile.py
new file mode 100644
index 0000000000..37eb3c2a34
--- /dev/null
+++ b/dffml/source/csvfile.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Loads repos from a csv file, using columns as features
+'''
+import os
+import csv
+import ast
+import urllib.request
+
+from ..repo import Repo
+from .memory import MemorySource
+from .file import FileSource
+
+from .log import LOGGER
+
+LOGGER = LOGGER.getChild('csv')
+
+csv.register_dialect('strip', skipinitialspace=True)
+
+class CSVSource(FileSource, MemorySource):
+    '''
+    Uses a CSV file as the source of repo feature data
+    '''
+
+    async def load_fd(self, fd):
+        '''
+        Parses a CSV stream into Repo instances
+        '''
+        i = 0
+        self.mem = {}
+        for data in csv.DictReader(fd, dialect='strip'):
+            for key, value in data.items():
+                try:
+                    data[key] = ast.literal_eval(value)
+                except (SyntaxError, ValueError):
+                    data[key] = value
+            if not data.get('classification') is None:
+                classification = data['classification']
+                del data['classification']
+                repo = Repo(str(i), data={'features': data,
+                    'classification': str(classification)})
+            else:
+                repo = Repo(str(i), data={'features': data})
+            i += 1
+            self.mem[repo.src_url] = repo
+        LOGGER.debug('%r loaded %d records', self, len(self.mem))
+
+    async def _close(self):
+        LOGGER.debug('%r save to file not implemented', self)
+
+    async def dump_fd(self, fd):
+        pass
+        # LOGGER.debug('%r saved %d records', self, len(self.mem))
diff --git a/dffml/source/file.py b/dffml/source/file.py
new file mode 100644
index 0000000000..96b4c7e962
--- /dev/null
+++ b/dffml/source/file.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import os
+import abc
+import asyncio
+
+from .source import Source
+# from .log import LOGGER
+
+# LOGGER = LOGGER.getChild('file')
+
+class FileSource(Source):
+    '''
+    FileSource reads and write from a file on open / close.
+    '''
+
+    @property
+    def readonly(self) -> bool:
+        return bool(self.src[::-1].startswith((':ro')[::-1]))
+
+    @property
+    def filename(self):
+        '''
+        Path to JSON file used for storage on disk.
+        '''
+        if self.readonly:
+            return self.src[:-3]
+        return self.src
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__class__.__qualname__, self.filename)
+
+    async def open(self):
+        await asyncio.shield(self._open())
+
+    async def _open(self):
+        if not os.path.isfile(self.filename):
+            self.mem = {}
+            return
+        with open(self.filename, 'r') as fd:
+            await self.load_fd(fd)
+
+    async def close(self):
+        await asyncio.shield(self._close())
+
+    async def _close(self):
+        if not self.readonly:
+            with open(self.filename, 'w') as fd:
+                await self.dump_fd(fd)
+
+    @abc.abstractmethod
+    async def load_fd(self, fd):
+        pass # pragma: no cover
+
+    @abc.abstractmethod
+    async def dump_fd(self, fd):
+        pass # pragma: no cover
diff --git a/dffml/source/json.py b/dffml/source/json.py
new file mode 100644
index 0000000000..4c73781585
--- /dev/null
+++ b/dffml/source/json.py
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import json
+
+from ..repo import Repo
+from .memory import MemorySource
+from .file import FileSource
+
+from .log import LOGGER
+
+LOGGER = LOGGER.getChild('json')
+
+class JSONSource(FileSource, MemorySource):
+    '''
+    JSONSource reads and write from a JSON file on open / close. Otherwise
+    stored in memory.
+    '''
+
+    async def load_fd(self, fd):
+        repos = json.load(fd)
+        self.mem = {src_url: Repo(src_url, data=data) \
+                for src_url, data in repos.items()}
+        LOGGER.debug('%r loaded %d records', self, len(self.mem))
+
+    async def dump_fd(self, fd):
+        json.dump({repo.src_url: repo.dict() for repo in self.mem.values()}, fd)
+        LOGGER.debug('%r saved %d records', self, len(self.mem))
diff --git a/dffml/source/log.py b/dffml/source/log.py
new file mode 100644
index 0000000000..d153e40552
--- /dev/null
+++ b/dffml/source/log.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/dffml/source/memory.py b/dffml/source/memory.py
new file mode 100644
index 0000000000..0e7a3d76e4
--- /dev/null
+++ b/dffml/source/memory.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Fake data sources used for testing
+'''
+import asyncio
+from typing import Dict, AsyncIterator
+
+from ..repo import Repo
+from .source import Source
+
+class MemorySource(Source):
+    '''
+    Stores repos in a dict in memory
+    '''
+
+    def __init__(self, src: str) -> None:
+        super().__init__(src)
+        self.mem: Dict[str, Repo] = {}
+        self.lock = asyncio.Lock()
+
+    async def update(self, repo):
+        async with self.lock:
+            self.mem[repo.src_url] = repo
+
+    async def repos(self) -> AsyncIterator[Repo]:
+        # NOTE No lock used here because sometimes we iterate and update
+        # Feel free to debate this by opening an issue.
+        for repo in self.mem.values():
+            yield repo
+
+    async def repo(self, src_url: str) -> Repo:
+        async with self.lock:
+            return self.mem.get(src_url, Repo(src_url))
+
+class RepoSource(MemorySource):
+    '''
+    Takes repo data from instantiation arguments. Stores repos in memory.
+    '''
+
+    def __init__(self, *args: Repo, src: str = '') -> None:
+        super().__init__(src)
+        self.mem = {repo.src_url: repo for repo in args}
diff --git a/dffml/source/source.py b/dffml/source/source.py
new file mode 100644
index 0000000000..ad16509a1f
--- /dev/null
+++ b/dffml/source/source.py
@@ -0,0 +1,163 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Source subclasses are responsible for generating an integer value given an open
+source project's source URL.
+'''
+import abc
+import asyncio
+from typing import AsyncIterator, Dict, List, Optional, Callable
+
+from .log import LOGGER
+from ..repo import Repo, RepoData
+from ..util.asynchelper import AsyncContextManagerList
+from ..util.entrypoint import Entrypoint
+
+class Source(abc.ABC, Entrypoint):
+    '''
+    Abstract base class for all sources. New sources must be derived from this
+    class and implement the repos method.
+    '''
+
+    ENTRY_POINT = 'dffml.source'
+
+    def __init__(self, src: str) -> None:
+        self.src = src
+
+    @abc.abstractmethod
+    async def update(self, repo: Repo):
+        '''
+        Updates a repo for a source
+        '''
+
+    @abc.abstractmethod
+    async def repos(self) -> AsyncIterator[Repo]:
+        '''
+        Returns a list of repos retrieved from self.src
+        '''
+        # mypy ignores AsyncIterator[Repo], therefore this is needed
+        yield Repo('') # pragma: no cover
+
+    @abc.abstractmethod
+    async def repo(self, src_url: str):
+        '''
+        Get a repo from the source or add it if it doesn't exist
+        '''
+
+    @classmethod
+    def load_from_dict(cls, sources: Dict[str, str]):
+        '''
+        Loads each source requested and instantiates it with its src_url.
+        '''
+        loaded: Dict[str, Source] = {}
+        for src_url, name in sources.items():
+            loaded[src_url] = cls.load(name)(src_url)
+        return loaded
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__class__.__qualname__, self.src)
+
+    async def open(self):
+        return
+
+    async def close(self):
+        return
+
+    async def __aenter__(self):
+        await self.open()
+        # TODO Context management
+        return self
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        await self.close()
+
+class Sources(AsyncContextManagerList):
+
+    async def update(self, repo: Repo):
+        '''
+        Updates a repo for a source
+        '''
+        LOGGER.debug('Updating %r: %r', repo.src_url, repo.dict())
+        for source in self:
+            await source.update(repo)
+
+    async def repos(self, validation: Optional[Callable[[Repo], bool]] = None) \
+            -> AsyncIterator[Repo]:
+        '''
+        Retrieves repos from all sources
+        '''
+        for source in self:
+            async for repo in source.repos():
+                if validation is None or validation(repo):
+                    yield repo
+
+    async def repo(self, src_url: str):
+        '''
+        Retrieve and or register repo will all sources
+        '''
+        repo = Repo(src_url)
+        for source in self:
+            repo.merge(await source.repo(src_url))
+        return repo
+
+    async def classified_with_features(self,
+            features: List[str]) -> AsyncIterator[Repo]:
+        '''
+        Returns all classified repos which have the requested features
+        '''
+        async for repo in self.repos(lambda repo: \
+                bool(repo.features(features) and repo.classified())):
+            yield repo
+
+    async def unclassified_with_features(self,
+            features: List[str]) -> AsyncIterator[Repo]:
+        '''
+        Returns all unclassified repos which have the requested features
+        '''
+        async for repo in self.repos(lambda repo: \
+                bool(repo.features(features) and not repo.classified())):
+            yield repo
+
+    async def with_features(self, features: List[str]) -> AsyncIterator[Repo]:
+        '''
+        Returns all repos which have the requested features
+        '''
+        async for repo in self.repos(lambda repo: bool(repo.features(features))):
+            yield repo
+
+class SubsetSources(Sources):
+    '''
+    Restricts access to a subset of repos during iteration based on their keys.
+    '''
+
+    def __init__(self, *args: Source, keys: Optional[List[str]] = None) \
+            -> None:
+        super().__init__(*args)
+        if keys is None:
+            keys = []
+        self.keys = keys
+
+    async def repos(self, validation: Optional[Callable[[Repo], bool]] = None) \
+            -> AsyncIterator[Repo]:
+        for key in self.keys:
+            repo = await self.repo(key)
+            if validation is None or validation(repo):
+                yield repo
+
+class ValidationSources(Sources):
+    '''
+    Restricts access to a subset of repos during iteration based on a validation
+    function.
+    '''
+
+    def __init__(self, *args: Source, validation: Callable[[Repo], bool]) \
+            -> None:
+        super().__init__(*args)
+        self.validation = validation
+
+    async def repos(self, validation: Optional[Callable[[Repo], bool]] = None) \
+            -> AsyncIterator[Repo]:
+        async for repo in super().repos():
+            if self.validation(repo) \
+                    and (validation is None or validation(repo)):
+                yield repo
diff --git a/dffml/util/__init__.py b/dffml/util/__init__.py
new file mode 100644
index 0000000000..5bbefb030a
--- /dev/null
+++ b/dffml/util/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
diff --git a/dffml/util/asynchelper.py b/dffml/util/asynchelper.py
new file mode 100644
index 0000000000..e684f92e04
--- /dev/null
+++ b/dffml/util/asynchelper.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+WARNING: concurrent can be much slower for quick tasks. It is best used for long
+running concurrent tasks.
+'''
+import random
+import asyncio
+from threading import Thread
+
+from .log import LOGGER
+
+class AsyncContextManagerList(list):
+
+    def __init__(self, *args):
+        super().__init__(list(args))
+
+    async def __aenter__(self):
+        for item in self:
+            await item.__aenter__()
+        # TODO Context management
+        return self
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        for item in self:
+            await item.__aexit__(exc_type, exc_value, traceback)
diff --git a/dffml/util/asynctestcase.py b/dffml/util/asynctestcase.py
new file mode 100644
index 0000000000..79a6f7014d
--- /dev/null
+++ b/dffml/util/asynctestcase.py
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Adds support for test cases which need to be run in an event loop.
+'''
+import os
+import asyncio
+import inspect
+import logging
+import unittest
+
+class AsyncTestCase(unittest.TestCase):
+    '''
+    Runs any test_ methods as coroutines in the default event loop.
+
+    USAGE
+    >>> from dffml.util.asynctestcase import AsyncTestCase
+    >>>
+    >>> class AsyncTestCase(unittest.AsyncTestCase):
+    >>>
+    >>>     async def test_sleep(self):
+    >>>         await asyncio.sleep(1)
+    '''
+
+    # The event loop to run test_ functions in
+    loop = asyncio.get_event_loop()
+
+    def async_wrapper(self, coro):
+        '''
+        Returns a function which calls the test_ function which calls
+        loop.run_until_complete to return the result of the test.
+        '''
+        def run_it(*args, **kwargs):
+            '''
+            Calls the loop's run_until_complete method.
+            '''
+            logging.basicConfig(level=getattr(logging,
+                os.getenv('LOGGING', 'CRITICAL').upper(), logging.CRITICAL))
+            result = self.loop.run_until_complete(coro(*args, **kwargs))
+            logging.basicConfig(level=logging.CRITICAL)
+            return result
+        return run_it
+
+    def run(self, result=None):
+        '''
+        Convert all test_ methods via async_wrapper so that they are run in the
+        event loop.
+        '''
+        methods = inspect.getmembers(self, predicate=inspect.ismethod)
+        for name, method in methods:
+            if inspect.iscoroutinefunction(method) \
+                    and (name.startswith('test_') \
+                    or name in ['setUp', 'tearDown']):
+                setattr(self, name, self.async_wrapper(method))
+        return super().run(result=result)
diff --git a/dffml/util/cli.py b/dffml/util/cli.py
new file mode 100644
index 0000000000..659c2782ad
--- /dev/null
+++ b/dffml/util/cli.py
@@ -0,0 +1,241 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import os
+import sys
+import copy
+import json
+import asyncio
+import inspect
+import logging
+import argparse
+from typing import Optional
+
+from .log import LOGGER
+from ..repo import Repo
+from ..port import Port
+from ..feature import Feature, Features
+from ..source import Source, Sources, JSONSource
+from ..model import Model
+
+LOGGER = LOGGER.getChild('cli')
+
+class ParseSourcesAction(argparse.Action):
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        if not isinstance(values, list):
+            values = [values]
+        parse = dict(map(lambda source: source.split('=', maxsplit=2)[::-1],
+            values))
+        values = Sources(*list(Source.load_from_dict(parse).values()))
+        setattr(namespace, self.dest, values)
+
+class ParseFeaturesAction(argparse.Action):
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, Features.load(*values))
+
+class ParseModelAction(argparse.Action):
+
+    def __call__(self, parser, namespace, value, option_string=None):
+        setattr(namespace, self.dest, Model.load(value)())
+
+class ParsePortAction(argparse.Action):
+
+    def __call__(self, parser, namespace, value, option_string=None):
+        setattr(namespace, self.dest, Port.load(value)())
+
+class ParseLoggingAction(argparse.Action):
+
+    def __call__(self, parser, namespace, value, option_string=None):
+        setattr(namespace, self.dest,
+                getattr(logging, value.upper(), logging.INFO))
+        logging.basicConfig(level=getattr(namespace, self.dest))
+
+class Arg(dict):
+
+    def __init__(self, name: str, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self.name = name
+
+    def modify(self, name: Optional[str] = None, **kwargs):
+        updated = copy.copy(self)
+        updated.update(kwargs)
+        if not name is None:
+            updated.name = name
+        return updated
+
+class JSONEncoder(json.JSONEncoder):
+    '''
+    Encodes dffml types to JSON representation.
+    '''
+
+    def default(self, obj):
+        if isinstance(obj, Repo):
+            return obj.dict()
+        elif isinstance(obj, Feature):
+            return obj.NAME
+        return json.JSONEncoder.default(self, obj)
+
+class CMD(object):
+
+    JSONEncoder = JSONEncoder
+
+    arg_log = Arg('-log', help='Logging level', action=ParseLoggingAction,
+            required=False, default=logging.INFO)
+
+    def __init__(self, **kwargs) -> None:
+        for name, method in [(name.lower().replace('arg_', ''), method) \
+                for name, method in inspect.getmembers(self) \
+                if isinstance(method, Arg)]:
+            if not name in kwargs and method.name in kwargs:
+                name = method.name
+            if not name in kwargs and 'default' in method:
+                kwargs[name] = method['default']
+            if name in kwargs:
+                LOGGER.debug('Setting %s.%s = %r', self, name, kwargs[name])
+                setattr(self, name, kwargs[name])
+            else:
+                LOGGER.debug('Ignored %s.%s', self, name)
+
+    async def __aenter__(self):
+        pass
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        pass
+
+    @classmethod
+    async def parse_args(cls, *args):
+        parser = Parser()
+        parser.add_subs(cls)
+        return parser, parser.parse_args(args)
+
+    @classmethod
+    async def cli(cls, *args):
+        self = cls()
+        parser, args = await self.parse_args(*args)
+        if getattr(args, 'cmd', None) is None:
+            parser.print_help()
+            return None
+        if getattr(args.cmd, 'run', None) is None:
+            args.parser.print_help()
+            return None
+        cmd = args.cmd(**self.sanitize_args(vars(args)))
+        async with cmd:
+            if inspect.isasyncgenfunction(cmd.run):
+                return [res async for res in cmd.run()]
+            else:
+                return await cmd.run()
+
+    def sanitize_args(self, args):
+        '''
+        Remove CMD internals from arguments passed to subclasses of CMD.
+        '''
+        for rm in ['cmd', 'parser', 'log']:
+            if rm in args:
+                del args[rm]
+        return args
+
+    @classmethod
+    def main(cls, loop=asyncio.get_event_loop(), argv=sys.argv):
+        '''
+        Runs cli commands in asyncio loop and outputs in appropriate format
+        '''
+        result = None
+        try:
+            result = loop.run_until_complete(cls.cli(*argv[1:]))
+        except KeyboardInterrupt: # pragma: no cover
+            pass # pragma: no cover
+        loop.run_until_complete(loop.shutdown_asyncgens())
+        loop.close()
+        if not result is None:
+            json.dump(result, sys.stdout, sort_keys=True, indent=4,
+                      separators=(',', ': '), cls=cls.JSONEncoder)
+            print()
+
+class Parser(argparse.ArgumentParser):
+
+    def add_subs(self, add_from: CMD):
+        '''
+        Add sub commands and arguments recursively
+        '''
+        # Only one subparser should be created even if multiple sub commands
+        subparsers = None
+        for name, method in [(name.lower().replace('_', ''), method) \
+                for name, method in inspect.getmembers(add_from)]:
+            if inspect.isclass(method) and issubclass(method, CMD):
+                if subparsers is None: # pragma: no cover
+                    subparsers = self.add_subparsers() # pragma: no cover
+                parser = subparsers.add_parser(name, help=None \
+                        if method.__doc__ is None else method.__doc__.strip())
+                parser.set_defaults(cmd=method)
+                parser.set_defaults(parser=parser)
+                parser.add_subs(method) # type: ignore
+            elif isinstance(method, Arg):
+                self.add_argument(method.name, **method)
+
+class ListEntrypoint(CMD):
+    '''
+    Subclass this with an Entrypoint to display all registered classes.
+    '''
+
+    def display(self, cls):
+        '''
+        Print out the loaded but uninstantiated class
+        '''
+        if not cls.__doc__ is None:
+            print('%s:' % (cls.__qualname__))
+            print(cls.__doc__.rstrip())
+        else:
+            print('%s' % (cls.__qualname__))
+        print()
+
+    async def run(self):
+        '''
+        Display all classes registered with the entrypoint
+        '''
+        for cls in self.ENTRYPOINT.load():
+            self.display(cls)
+
+class FeaturesCMD(CMD):
+    '''
+    Set timeout for features
+    '''
+
+    arg_features = Arg('-features', nargs='+', required=True,
+            default=Features(), action=ParseFeaturesAction)
+    arg_timeout = Arg('-timeout', help='Feature evaluation timeout',
+            required=False, default=Features.TIMEOUT, type=int)
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.features.timeout = self.timeout
+
+class SourcesCMD(CMD):
+
+    arg_sources = Arg('-sources', help='Sources for loading and saving',
+            nargs='+', default=Sources(JSONSource(os.path.join(
+                os.path.expanduser('~'), '.cache', 'dffml.json'))),
+            action=ParseSourcesAction)
+
+class ModelCMD(CMD):
+    '''
+    Set a models model dir.
+    '''
+
+    arg_model = Arg('-model', help='Model used for ML',
+            action=ParseModelAction, required=True)
+    arg_model_dir = Arg('-model_dir', help='Model directory for ML',
+            default=os.path.join(os.path.expanduser('~'), '.cache', 'dffml'))
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model.model_dir = self.model_dir
+
+class PortCMD(CMD):
+
+    arg_port = Arg('port', action=ParsePortAction)
+
+class KeysCMD(CMD):
+
+    arg_keys = Arg('-keys', help='Key used for source lookup and evaluation',
+            nargs='+', required=True)
diff --git a/dffml/util/entrypoint.py b/dffml/util/entrypoint.py
new file mode 100644
index 0000000000..761702fd80
--- /dev/null
+++ b/dffml/util/entrypoint.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Loader subclasses know how to load classes under their entry point which conform
+to their subclasses.
+'''
+import pkg_resources
+from typing import List
+
+class Entrypoint(object):
+    '''
+    Uses the pkg_resources.iter_entry_points on the ENTRY_POINT of the class
+    '''
+
+    ENTRY_POINT = 'util.entrypoint'
+
+    @classmethod
+    def load(cls, loading=None):
+        '''
+        Loads all installed loading and returns them as a list. Sources to be
+        loaded should be registered to ENTRY_POINT via setuptools.
+        '''
+        loading_classes = []
+        for i in pkg_resources.iter_entry_points(cls.ENTRY_POINT):
+            loaded = i.load()
+            if issubclass(loaded, cls):
+                loading_classes.append(loaded)
+                if loading is not None and i.name == loading:
+                    return loaded
+        if loading is not None:
+            raise KeyError('%s was not found in (%s)' % \
+                    (repr(loading), ', '.join(list(map(str, loading_classes)))))
+        return loading_classes
+
+    @classmethod
+    def load_multiple(cls, to_load: List[str]):
+        '''
+        Loads each class requested without instantiating it.
+        '''
+        return {name: cls.load(name) for name in to_load}
diff --git a/dffml/util/log.py b/dffml/util/log.py
new file mode 100644
index 0000000000..d153e40552
--- /dev/null
+++ b/dffml/util/log.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/dffml/util/monitor.py b/dffml/util/monitor.py
new file mode 100644
index 0000000000..740efc2b0d
--- /dev/null
+++ b/dffml/util/monitor.py
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import asyncio
+from typing import List, Set, Any, Optional
+
+from .log import LOGGER
+
+LOGGER = LOGGER.getChild('monitor')
+
+class Watchdog(object):
+
+    LOGGER = LOGGER.getChild('Watchdog')
+
+    def __init__(self) -> None:
+        '''
+        Specifiy event types to ignore with ignore list.
+        '''
+        self.queue: asyncio.Queue = asyncio.Queue()
+
+    async def enqueue(self, event, msg):
+        self.LOGGER.debug('put: %r', (event, msg))
+        await self.queue.put((event, msg))
+
+    async def events(self):
+        event = ''
+        while event != 'done':
+            event, msg = await self.queue.get()
+            self.LOGGER.debug('got: %r', (event, msg))
+            self.queue.task_done()
+            if event == 'done':
+                await self.queue.join()
+            yield event, msg
+
+class Task(object):
+
+    LOGGER = LOGGER.getChild('Task')
+
+    def __init__(self, func = None, key: Any = '') -> None:
+        coro = None
+        if not func is None:
+            coro = func(task=self)
+            if not key:
+                key = coro
+        self.__key = key
+        self.__coro = coro
+        self.__lock = asyncio.Lock()
+        # Previous updates so addded watchdogs get all updates ever
+        self.__events: List[Any] = []
+        self.__watchdogs: List[Watchdog] = []
+
+    @property
+    def key(self):
+        return self.__key
+
+    @property
+    def coro(self):
+        return self.__coro
+
+    async def add_watchdog(self, watchdog: Watchdog):
+        async with self.__lock:
+            self.__watchdogs.append(watchdog)
+            self.LOGGER.debug('[%r] adding watcher', self.__key)
+            self.LOGGER.debug('[%r] adding watcher backlog: %r', self.__key, self.__events)
+            self.LOGGER.debug('[%r] watchers: %r', self.__key, self.__watchdogs)
+            async for event, msg in self.get_events():
+                await watchdog.enqueue(event, msg)
+
+    async def completed(self, result):
+        async with self.__lock:
+            self.LOGGER.debug('[%r] completed', self.__key)
+            await self.append_event('done', result)
+            for watchdog in self.__watchdogs:
+                await watchdog.enqueue('done', result)
+            self.__watchdogs = []
+
+    async def update(self, msg, event='update'):
+        async with self.__lock:
+            self.LOGGER.debug('[%r] sending %s: %r', self.__key, event, msg)
+            await self.append_event(event, msg)
+            for watchdog in self.__watchdogs:
+                await watchdog.enqueue(event, msg)
+
+    async def log(self, fmt, *args):
+        await self.update(fmt % args, event='log')
+
+    async def append_event(self, event, msg):
+        self.__events.append((event, msg))
+
+    async def get_events(self):
+        for event, msg in self.__events:
+            yield event, msg
+
+    async def complete(self):
+        async for event, msg in self.events():
+            if event == 'done':
+                self.LOGGER.debug('[%r] complete %r', self.__key, msg)
+                return msg
+
+    async def events(self):
+        watchdog = Watchdog()
+        await self.add_watchdog(watchdog)
+        async for event, msg in watchdog.events():
+            self.LOGGER.debug('[%r] got event %r: %r', self.__key, event, msg)
+            yield event, msg
+
+    async def status(self):
+        async for event, msg in self.events():
+            if event == 'done':
+                break
+            elif event == 'update':
+                yield msg
+
+    async def statuses(self):
+        return [msg async for msg in self.status()]
+
+    async def logs(self):
+        return [msg async for event, msg in self.events() if event == 'log']
+
+class Monitor(object):
+
+    LOGGER = LOGGER.getChild('Monitor')
+
+    def __init__(self):
+        self.in_progress = {}
+        self.lock = asyncio.Lock()
+        self.log_lock = asyncio.Lock()
+
+    async def task(self, key: Any):
+        task = None
+        async with self.lock:
+            task = self.in_progress.get(key, None)
+            if task is None:
+                return
+        return task
+
+    async def complete(self, key: Any):
+        task = await self.task(key)
+        if task is None:
+            return
+        await task.complete()
+
+    async def events(self, key: Any):
+        task = await self.task(key)
+        if task is None:
+            return
+        async for event, msg in task.events():
+            yield event, msg
+
+    async def status(self, key: Any):
+        task = None
+        async with self.lock:
+            task = self.in_progress.get(key, None)
+            if task is None:
+                return
+        async for msg in task.status():
+            yield msg
+
+    async def statuses(self, key: Any):
+        return [msg async for msg in self.status(key)]
+
+    async def log_status(self, key: Any):
+        async for msg in self.status(key):
+            self.LOGGER.debug('status [%r]: %r', key, msg)
+            yield msg
+        self.LOGGER.debug('log status [%r] is done', key)
+
+    async def run_task(self, task: Task):
+        self.LOGGER.debug('Started running %r', task.key)
+        result = await task.coro # type: ignore
+        self.LOGGER.debug('Done running %r', task.key)
+        async with self.lock:
+            await task.completed(result)
+            del self.in_progress[task.key]
+        self.LOGGER.debug('Removed running %r', task.key)
+
+    async def start(self, func, key: Any = '', mktask = Task):
+        async with self.lock:
+            if key in self.in_progress:
+                self.LOGGER.debug('Already running %r', key)
+                return
+            task = mktask(func, key)
+            self.in_progress[task.key] = task
+            asyncio.ensure_future(self.run_task(task))
+            return task
diff --git a/dffml/util/tempdir.py b/dffml/util/tempdir.py
new file mode 100644
index 0000000000..ee44fbbc7f
--- /dev/null
+++ b/dffml/util/tempdir.py
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Create and remove OS temporary directories.
+'''
+import queue
+import shutil
+import os.path
+import tempfile
+from typing import List
+
+from .log import LOGGER
+
+LOGGER = LOGGER.getChild('tempdir')
+
+class TempDir(object):
+    '''
+    Creates and deletes temporary directories. Removes any created directories
+    when the program using this class terminates (see rmtempdirs for details).
+    '''
+
+    SUFFIX: str = None
+    PREFIX: str = 'dffml_'
+
+    def __init__(self):
+        self.suffix = self.__class__.SUFFIX
+        self.prefix = self.__class__.PREFIX
+        self.dirs: List[str] = []
+
+    def mktempdir(self):
+        '''
+        Creates a temporary directory using TempDir's SUFFIX and PREFIX.
+        Adds the directory to the to be deleted queue.
+        '''
+        dirname = tempfile.mkdtemp(suffix=self.suffix, prefix=self.prefix)
+        LOGGER.debug('Created directory %r', dirname)
+        self.dirs.append(dirname)
+        return dirname
+
+    def rmtempdirs(self):
+        '''
+        Removes all created temporary directories. Decorated with the
+        atexit.register method to ensure all created directories will be removed
+        on termination.
+        '''
+        for rmdir in self.dirs:
+            LOGGER.debug('Removing directory %r', rmdir)
+            # OSError 39 sometimes if removal isn't attempted twice
+            shutil.rmtree(rmdir, ignore_errors=True)
+            shutil.rmtree(rmdir, ignore_errors=True)
+
+    async def __aenter__(self):
+        pass
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        self.rmtempdirs()
diff --git a/dffml/version.py b/dffml/version.py
new file mode 100644
index 0000000000..d6c188270b
--- /dev/null
+++ b/dffml/version.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+'''
+Copyright (C) 2018 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+Version of DFFML
+'''
+VERSION = '0.1.0'
diff --git a/feature/git/.coveragerc b/feature/git/.coveragerc
new file mode 100644
index 0000000000..a5790b1341
--- /dev/null
+++ b/feature/git/.coveragerc
@@ -0,0 +1,15 @@
+[run]
+source =
+    dffml_feature_git
+    tests
+branch = True
+omit =
+    dffml_feature_git/cli.py
+
+[report]
+exclude_lines =
+    no cov
+    no qa
+    noqa
+    pragma: no cover
+    if __name__ == .__main__.:
diff --git a/feature/git/.gitattributes b/feature/git/.gitattributes
new file mode 100644
index 0000000000..dfe0770424
--- /dev/null
+++ b/feature/git/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/feature/git/.gitignore b/feature/git/.gitignore
new file mode 100644
index 0000000000..070ee81c83
--- /dev/null
+++ b/feature/git/.gitignore
@@ -0,0 +1,20 @@
+*.log
+*.pyc
+.cache/
+.coverage
+.idea/
+.vscode/
+*.egg-info/
+build/
+dist/
+docs/build/
+venv/
+wheelhouse/
+*.egss
+.mypy_cache/
+*.swp
+.venv/
+.eggs/
+*.modeldir
+*.db
+htmlcov/
diff --git a/feature/git/LICENSE b/feature/git/LICENSE
new file mode 100644
index 0000000000..8ce5aa9e27
--- /dev/null
+++ b/feature/git/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2017-2019 Intel
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/feature/git/MANIFEST.in b/feature/git/MANIFEST.in
new file mode 100644
index 0000000000..a9bf0b9e9d
--- /dev/null
+++ b/feature/git/MANIFEST.in
@@ -0,0 +1,3 @@
+include README.rst
+include LICENSE-APACHE
+include LICENSE-MIT
diff --git a/feature/git/README.rst b/feature/git/README.rst
new file mode 100644
index 0000000000..61a3be76b2
--- /dev/null
+++ b/feature/git/README.rst
@@ -0,0 +1,8 @@
+DFFML Features For Git Version Control
+======================================
+
+License
+-------
+
+DFFML DFFML Features For Git Version Control are distributed under the
+`MIT License <https://choosealicense.com/licenses/mit>`_
diff --git a/feature/git/dffml_feature_git/__init__.py b/feature/git/dffml_feature_git/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/feature/git/dffml_feature_git/feature/__init__.py b/feature/git/dffml_feature_git/feature/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/feature/git/dffml_feature_git/feature/authors.py b/feature/git/dffml_feature_git/feature/authors.py
new file mode 100644
index 0000000000..454a450b31
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/authors.py
@@ -0,0 +1,35 @@
+'''Authors Feature'''
+from dffml_feature_git.util.proc import create, stop, check_output
+
+from .git import GitFeature
+
+class GitAuthorsFeature(GitFeature):
+    '''
+    Counts the number of unique authors within the frequency.
+    '''
+
+    NAME: str = 'authors'
+
+    async def git_parse(self, data):
+        authors = []
+        for current in range(0, self.LENGTH * self.FREQUENCY.MONTHS,
+                self.FREQUENCY.MONTHS):
+            current_authors = {}
+            proc = await data.git.create('log',
+                    '--pretty=format:%aN',
+                    '--date', 'relative',
+                    '--before', '%d months' % (current),
+                    '--after', '%d months' % (current + \
+                            self.FREQUENCY.MONTHS))
+            while not proc.stdout.at_eof():
+                line = await proc.stdout.readline()
+                line = line.decode(errors='ignore').strip()
+                if line != '':
+                    current_authors.setdefault(line, 0)
+            await stop(proc)
+            authors.append(len(current_authors))
+        data.temp.setdefault(self.NAME, authors)
+        await data.data.set(self.NAME, await self.calc(data))
+
+    async def calc(self, data):
+        return data.temp.get(self.NAME)
diff --git a/feature/git/dffml_feature_git/feature/cloc.py b/feature/git/dffml_feature_git/feature/cloc.py
new file mode 100644
index 0000000000..a70dccbe4d
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/cloc.py
@@ -0,0 +1,66 @@
+'''Cloc Feature'''
+from dffml_feature_git.util.proc import inpath, create, stop
+
+from .log import LOGGER
+
+from .monthly import GitMonthlyFeature
+
+class GitClocFeature(GitMonthlyFeature):
+    '''
+    Count Lines Of Code
+    '''
+
+    NAME: str = 'cloc'
+    BINARY: str = 'cloc'
+    FASTER_THAN_CLOC = ['tokei']
+
+    def __init__(self):
+        super().__init__()
+        self.binary = self.BINARY
+        for binary in self.FASTER_THAN_CLOC:
+            if inpath(binary):
+                self.binary = binary
+
+    async def applicable(self, data):
+        return inpath(self.binary) \
+                and await GitMonthlyFeature.applicable(self, data)
+
+    async def git_parse(self, data):
+        if not data.temp.get('cloc_data', False):
+            data.temp.setdefault('cloc_data', [{'sum': 0}] * self.LENGTH)
+            await super().git_parse(data)
+
+    async def month_parse(self, data, i):
+        parsed = data.temp.get('cloc_data')
+        proc = await create(self.binary, data.git.cwd)
+        cols  = []
+        while not proc.stdout.at_eof():
+            line = (await proc.stdout.readline()).decode().split()
+            if not line or line[0].startswith('-'):
+                continue
+            LOGGER.debug('%s line: %r', self.binary, line)
+            if line[0].lower().startswith('lang'):
+                cols = [cat.lower() for cat in line[1:]]
+                # Tokei -> cloc compatibility
+                if 'comments' in cols:
+                    cols[cols.index('comments')] = 'comment'
+                continue
+            if cols:
+                header_cols = [word for word in line if not word.isdigit()]
+                header = ''.join([c for c in '_'.join(header_cols).lower() \
+                        if c.isalpha() or c == '_'])
+                # Tokei -> cloc compatibility
+                if header == 'total':
+                    header = 'sum'
+                parsed[i][header] = dict(zip(cols,
+                    map(int, line[len(header_cols):])))
+        LOGGER.debug('parsed[%d]: %r', i, parsed[i])
+        await stop(proc)
+
+    async def calc(self, data):
+        try:
+            return [int(100 * month['sum']['comment'] / \
+                    (month['sum']['comment'] + month['sum']['code']))
+                    for month in (data.temp.get('cloc_data'))]
+        except ZeroDivisionError:
+            return [0 for month in (data.temp.get('cloc_data'))]
diff --git a/feature/git/dffml_feature_git/feature/commits.py b/feature/git/dffml_feature_git/feature/commits.py
new file mode 100644
index 0000000000..6cbaccbef1
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/commits.py
@@ -0,0 +1,32 @@
+'''Commits Feature'''
+from dffml_feature_git.util.proc import stop
+
+from .git import GitFeature
+
+class GitCommitsFeature(GitFeature):
+    '''
+    Counts the number of commits within the frequency.
+    '''
+
+    NAME: str = 'commits'
+
+    async def git_parse(self, data):
+        commits = []
+        for current in range(0, self.LENGTH * self.FREQUENCY.MONTHS,
+                self.FREQUENCY.MONTHS):
+            lines = 0
+            proc = await data.git.create('log',
+                    '--oneline', '--date', 'relative',
+                    '--before', '%d months' % (current),
+                    '--after', '%d months' % (current + \
+                            self.FREQUENCY.MONTHS))
+            while not proc.stdout.at_eof():
+                if (await proc.stdout.readline()) != b'':
+                    lines += 1
+            commits.append(lines)
+            await stop(proc)
+        data.temp.setdefault('commits', commits)
+        await data.data.set('commits', await self.calc(data))
+
+    async def calc(self, data):
+        return data.temp.get('commits')
diff --git a/feature/git/dffml_feature_git/feature/git.py b/feature/git/dffml_feature_git/feature/git.py
new file mode 100644
index 0000000000..d0c51ee5ab
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/git.py
@@ -0,0 +1,248 @@
+'''Git based features'''
+import os
+import sys
+import shutil
+import asyncio
+import tempfile
+from asyncio.subprocess import PIPE
+from typing import Type
+
+from dffml.feature import Feature
+from dffml.feature.feature import Quarterly
+from dffml.util.tempdir import TempDir
+
+from dffml_feature_git.util.proc import check_output, create, stop
+from .log import LOGGER
+
+LOGGER = LOGGER.getChild('git')
+
+if sys.platform == 'win32':
+    loop = asyncio.ProactorEventLoop()
+    asyncio.set_event_loop(loop)
+
+class Git(object):
+
+    TIMEOUT = 10
+    DEFAULT_MAIN_BRANCH = 'master'
+    NO_SPACE = 'No space left on device'
+
+    def __init__(self, tempdir: TempDir, cwd: str = '',
+            binary: str = 'git') -> None:
+        self.tempdir = tempdir
+        self.cwd = cwd if len(cwd) else ''
+        self.binary = binary
+        self.main_branch: str = ''
+
+    async def _create(self, *args, **kwargs):
+        return await create(self.binary, *args, **kwargs, cwd=self.cwd)
+
+    async def create(self, *args, **kwargs):
+        return await self._create(*args, **kwargs)
+
+    async def check_output(self, *args, **kwargs):
+        return await check_output(self.binary, *args, **kwargs, cwd=self.cwd)
+
+    async def checkout(self, branch: str = ''):
+        if not branch:
+            branch = self.main_branch
+        return await self.check_output('checkout', '-f', branch)
+
+    async def ls_remote(self, src_url):
+        return (await self._ls_remote(src_url) or await self._svn_info(src_url))
+
+    async def _svn_info(self, src_url):
+        env = os.environ.copy()
+        env['git_askpass'] = 'echo'
+        proc = await create('svn', 'info', src_url, env=env)
+        done, pending = await asyncio.wait(
+                [proc.stdout.read(), proc.stderr.read()],
+                timeout=self.TIMEOUT,
+                return_when=asyncio.FIRST_COMPLETED)
+        [fut.cancel() for fut in pending]
+        first = ''.join([fut.result().decode(errors='ignore') \
+                for fut in done])
+        LOGGER.debug('svn info result: %r', first)
+        try:
+            proc.kill()
+        except:
+            pass
+        exit_code = await proc.wait()
+        if exit_code != 0:
+            return False
+        return True
+
+    async def _ls_remote(self, src_url):
+        with tempfile.TemporaryDirectory(prefix='git_') as tempdir:
+            env = os.environ.copy()
+            env['git_askpass'] = 'echo'
+            proc = await create(self.binary, 'ls-remote', '--exit-code',
+                        src_url, '-h', 'HEAD', env=env, cwd=tempdir)
+            done, pending = await asyncio.wait(
+                    [proc.stdout.read(8), proc.stderr.read(5)],
+                    timeout=self.TIMEOUT,
+                    return_when=asyncio.FIRST_COMPLETED)
+            [fut.cancel() for fut in pending]
+            first = ''.join([fut.result().decode(errors='ignore') \
+                    for fut in done])
+            LOGGER.debug('ls-remote result: %r', first)
+            if first.startswith('fatal'):
+                LOGGER.debug('ls-remote result: fatal%s', await
+                        proc.stderr.read())
+                await proc.wait()
+                return False
+            elif first.startswith('Username'):
+                LOGGER.debug('ls-remote got auth challenge')
+                proc.kill()
+                await proc.wait()
+                return False
+            # TODO Configurable ls-remote timeout
+            done, pending = await asyncio.wait([proc.wait()],
+                    timeout=10)
+            cancelled = bool(len([fut.cancel() for fut in pending]))
+            if cancelled:
+                try:
+                    proc.kill()
+                except:
+                    pass
+                await proc.wait()
+                return False
+            else:
+                exit_code = [fut.result() for fut in done][0]
+                if exit_code != 0:
+                    return False
+            return True
+
+    async def clone(self, src_url: str):
+        if not (await self._clone(src_url) or await self._svn_clone(src_url)):
+            return False
+        self.main_branch = await self.infer_main_branch()
+        LOGGER.debug('main branch for %r is %r', src_url, self.main_branch)
+        return await self.check_output('log', '-n', '1')
+
+    async def _svn_clone(self, src_url: str):
+        if not await self._svn_info(src_url):
+            return False
+        env = os.environ.copy()
+        env['git_askpass'] = 'echo'
+        if self.cwd is False or not len(self.cwd):
+            self.cwd = self.tempdir.mktempdir()
+        proc = await self.create('svn', 'clone', src_url, self.cwd, env=env)
+        await self._handle_clone_stream(proc, src_url)
+        return True
+
+    async def _clone(self, src_url: str):
+        '''
+        Downloads a git repo using the git binary. This requires that the git
+        binary be in the PATH environment variable.
+        '''
+        if not await self._ls_remote(src_url):
+            return False
+        env = os.environ.copy()
+        env['git_askpass'] = 'echo'
+        if self.cwd is False or not len(self.cwd):
+            self.cwd = self.tempdir.mktempdir()
+        proc = await self.create('clone', src_url, self.cwd, env=env)
+        await self._handle_clone_stream(proc, src_url)
+        return True
+
+    async def _handle_clone_stream(self, proc, src_url: str):
+        error = Exception('No errors')
+        try:
+            done, pending = await asyncio.wait(
+                    [proc.stdout.read(8), proc.stderr.read(5)],
+                    timeout=self.TIMEOUT,
+                    return_when=asyncio.FIRST_COMPLETED)
+            [fut.cancel() for fut in pending]
+            first = ''.join([str(fut.result()) for fut in done])
+            LOGGER.debug('clone result: %s', first)
+            if first == 'fatal':
+                raise RuntimeError(await proc.stderr.readline())
+            elif first == 'Username':
+                raise RuntimeError('Requires authentication')
+            stream = ''
+            while proc.returncode is None:
+                done, pending = await asyncio.wait(
+                        [proc.stdout.readline(),
+                            proc.stderr.readline()],
+                        timeout=self.TIMEOUT,
+                        return_when=asyncio.FIRST_COMPLETED)
+                [fut.cancel() for fut in pending]
+                stream = ''.join([fut.result().decode(errors='ignore') \
+                        for fut in done])
+                LOGGER.debug('clone stream %r: %r', src_url, stream)
+            await stop(proc)
+        except RuntimeError as err:
+            error = RuntimeError(repr(stream))
+            if self.NO_SPACE in stream:
+                LOGGER.critical('Git clone error: %s', self.NO_SPACE)
+                if os.path.isdir(self.cwd):
+                    shutil.rmtree(self.cwd)
+                error = RuntimeError(self.NO_SPACE)
+        if str(error) != 'No errors':
+            raise error
+
+    async def infer_main_branch(self):
+        try:
+            branches = (await self.check_output('branch', '-r')).split('\n')
+            main = [branch for branch in branches \
+                    if '->' in branch][0].split()[-1]
+            main = '/'.join(main.split('/')[1:])
+        except Exception as error:
+            LOGGER.error('Infering main branch: %s', error)
+            return self.DEFAULT_MAIN_BRANCH
+        return main
+
+class GitFeature(Feature):
+    '''
+    Git repo based features
+    '''
+
+    NAME: str = 'git'
+    INAPPLICABLE_MESSAGE = 'Not a git repo'
+    LENGTH: int = 10
+    FREQUENCY: int = Quarterly # type: ignore
+
+    def dtype(self) -> Type:
+        return int
+
+    def length(self) -> int:
+        return self.LENGTH
+
+    async def applicable(self, data):
+        async with (await data.mklock('git_lock')):
+            # Count number of git features so that only the last feature removes
+            # the directory on tearDown
+            num_git_features = data.temp.get('num_git_features', 0)
+            num_git_features += 1
+            data.temp['num_git_features'] = num_git_features
+            # If is_git_repo has been set to False then src is not a git repo
+            is_git_repo = data.temp.get('is_git_repo', None)
+            if not is_git_repo is None:
+                return is_git_repo
+            # Create an instance of the git helper so we can run git commands
+            data.git = Git(TempDir())
+            await data.log('Git start ls-remote')
+            is_git_repo = await data.git.ls_remote(data.src_url)
+            await data.log('Git ls-remote complete')
+            data.temp.setdefault('is_git_repo', is_git_repo)
+            return is_git_repo
+
+    async def fetch(self, data):
+        async with (await data.mklock('git_lock')):
+            if not os.path.isdir(data.git.cwd):
+                await data.log('Git start clone')
+                await data.git.clone(data.src_url)
+                await data.log('Git clone complete')
+                LOGGER.debug('Cloned to: %s', data.git.cwd)
+
+    async def parse(self, data):
+        async with (await data.mklock('git_lock')):
+            LOGGER.debug('%s took git_lock', self.__class__.__qualname__)
+            await data.git.checkout()
+            return await self.git_parse(data)
+
+    async def tearDown(self, data):
+        async with (await data.mklock('git_lock')):
+            data.temp['num_git_features'] -= 1
+            if data.temp['num_git_features'] == 0:
+                data.git.tempdir.rmtempdirs()
diff --git a/feature/git/dffml_feature_git/feature/lang.py b/feature/git/dffml_feature_git/feature/lang.py
new file mode 100644
index 0000000000..afc5c906e8
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/lang.py
@@ -0,0 +1,45 @@
+'''Lang Feature'''
+from typing import Dict
+
+from .cloc import GitClocFeature
+
+class GitLangsFeature(GitClocFeature):
+    '''
+    Language usage by percentage for a git repo
+    '''
+
+    NAME: str = 'langs'
+
+    def dtype(self):
+        return Dict[str, float]
+
+    def length(self):
+        return 1
+
+    def percentage_of(self, numbers):
+        for key in ['sum', 'total']:
+            if key in numbers:
+                del numbers[key]
+        whole = sum(numbers.values())
+        for key in numbers.keys():
+            numbers[key] /= whole
+        return numbers
+
+    async def calc(self, data):
+        return self.percentage_of({lang: numbers['code'] for lang, numbers in \
+                    (data.temp.get('cloc_data'))[0].items()})
+
+class GitLangFeature(GitLangsFeature):
+    '''
+    Most used language for a git repo
+    '''
+
+    NAME: str = 'lang'
+
+    def dtype(self):
+        return str
+
+    async def calc(self, data):
+        langs_percentages = await super().calc(data)
+        return sorted(langs_percentages,
+                key=langs_percentages.__getitem__)[::-1][0]
diff --git a/feature/git/dffml_feature_git/feature/log.py b/feature/git/dffml_feature_git/feature/log.py
new file mode 100644
index 0000000000..283f375316
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/log.py
@@ -0,0 +1,3 @@
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/feature/git/dffml_feature_git/feature/monthly.py b/feature/git/dffml_feature_git/feature/monthly.py
new file mode 100644
index 0000000000..265ddd5107
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/monthly.py
@@ -0,0 +1,36 @@
+'''Checkout each month Feature'''
+import abc
+
+from dffml.feature import Data
+
+from .git import GitFeature
+
+class GitMonthlyFeature(GitFeature):
+    '''
+    Checkout the repo each month
+    '''
+
+    NAME: str = 'cloc'
+
+    async def git_parse(self, data):
+        i = -1
+        for current in range(0, self.LENGTH * self.FREQUENCY.MONTHS,
+                self.FREQUENCY.MONTHS):
+            last_commit = (await data.git.check_output('log',
+                '--pretty=oneline',
+                '--no-abbrev-commit', '-n', '1', '--date', 'relative',
+                '--before', '%d months' % (current))).strip()
+            i += 1
+            if len(last_commit) == 0:
+                continue
+            last_commit = last_commit.split()[0]
+            await data.git.check_output('reset', '--hard', last_commit)
+            await data.git.checkout(last_commit)
+            await self.month_parse(data, i)
+
+    @abc.abstractmethod
+    async def month_parse(self, data: Data, i: int):
+        '''
+        Parse the git repo this month
+        '''
+        pass
diff --git a/feature/git/dffml_feature_git/feature/release.py b/feature/git/dffml_feature_git/feature/release.py
new file mode 100644
index 0000000000..b085e79dbc
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/release.py
@@ -0,0 +1,56 @@
+'''Release Feature'''
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+
+from dffml_feature_git.util.proc import create, stop, check_output
+
+from .log import LOGGER
+from .git import GitFeature
+
+class GitReleaseFeature(GitFeature):
+    '''
+    Was there a release within the last 6 months
+    '''
+
+    NAME = 'release'
+    # Number of months since last release
+    LAST: int = 18
+
+    def valid_version(self, tag):
+        # Remove v from v1 to make isnumeric return True
+        tag = tag.replace('v', '')
+        # Make the only seperator . instead of - or _
+        for replace in ['-', '_']:
+            tag = tag.replace(replace, '.')
+        # Make sure there is at least one number in the tag when split by .
+        return bool(sum([1 for num in tag.split('.') if num.isnumeric()]))
+
+    async def git_parse(self, data):
+        release = [0] * self.LENGTH
+        releases = []
+        # Parse log
+        proc = await data.git.create('log', '--tags',
+                '--simplify-by-decoration', '--pretty=format:%at %D')
+        while not proc.stdout.at_eof():
+            line = await proc.stdout.readline()
+            line = line.decode(errors='ignore').strip().split()
+            LOGGER.debug('%r %s: %r', self, data.src_url, line)
+            # Ensure there is at'v'
+            # or it starts with v and then a number
+            if not line or not self.valid_version(line[-1]):
+                continue
+            releases.append(datetime.fromtimestamp(int(line[0])))
+        await stop(proc)
+        # Check if there was a release within LAST months of each quarter
+        current = datetime.now()
+        for i in range(0, self.LENGTH):
+            six_months_from_current = current - relativedelta(months=self.LAST)
+            for date in releases:
+                if date < current and date > six_months_from_current:
+                    release[i] = 1
+            current -= relativedelta(months=self.FREQUENCY.MONTHS)
+        data.temp.setdefault(self.NAME, release)
+        await data.data.set(self.NAME, await self.calc(data))
+
+    async def calc(self, data):
+        return data.temp.get(self.NAME)
diff --git a/feature/git/dffml_feature_git/feature/work.py b/feature/git/dffml_feature_git/feature/work.py
new file mode 100644
index 0000000000..c5126effce
--- /dev/null
+++ b/feature/git/dffml_feature_git/feature/work.py
@@ -0,0 +1,63 @@
+'''Work Feature'''
+import asyncio
+from asyncio.subprocess import PIPE
+
+from dffml_feature_git.util.proc import create, stop, check_output
+
+from .git import GitFeature
+
+def simpsons_diversity_index(*args):
+    '''
+    From https://en.wikipedia.org/wiki/Diversity_index#Simpson_index
+
+    The measure equals the probability that two entities taken at random from
+    the dataset of interest represent the same type.
+    '''
+    if len(args) < 2:
+        return 0
+    def __n_times_n_minus_1(number):
+        return number * (number - 1)
+    try:
+        return int(round((1.0 - (float(sum(map(__n_times_n_minus_1, args))) \
+                / float(sum(args) * (sum(args) - 1)))) * 100.0))
+    except ZeroDivisionError:
+        return 0
+
+class GitWorkFeature(GitFeature):
+    '''
+    Calculates the spread of authors and returns an integer between 0 and 10
+    representing how varying the authorship of code is. For example a repo with
+    two authors where one commits 90% of the lines of code would calculates to
+    a 1. Equal work would calculate to a 10.
+    '''
+
+    NAME: str = 'work'
+
+    async def git_parse(self, data):
+        work = []
+        for current in range(0, self.LENGTH * self.FREQUENCY.MONTHS,
+                self.FREQUENCY.MONTHS):
+            author = ''
+            current_work = {}
+            proc = await data.git.create('log',
+                    '--pretty=format:Author:%aN', '--numstat',
+                    '--before', '%d months' % (current),
+                    '--after', '%d months' % (current + \
+                            self.FREQUENCY.MONTHS))
+            while not proc.stdout.at_eof():
+                line = await proc.stdout.readline()
+                line = line.decode(errors='ignore').rstrip()
+                if line.startswith('Author:'):
+                    author = line.split(':')[1]
+                    if author and author not in current_work:
+                        current_work[author] = 0
+                elif line and author in current_work and \
+                        line.split()[0].isdigit():
+                    current_work[author] += int(line.split()[0])
+            work.append(current_work)
+            await stop(proc)
+        data.temp.setdefault(self.NAME, work)
+
+    async def calc(self, data):
+        return [simpsons_diversity_index(*authorship.values()) \
+                for authorship in data.temp.get(self.NAME)]
diff --git a/feature/git/dffml_feature_git/log.py b/feature/git/dffml_feature_git/log.py
new file mode 100644
index 0000000000..283f375316
--- /dev/null
+++ b/feature/git/dffml_feature_git/log.py
@@ -0,0 +1,3 @@
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/feature/git/dffml_feature_git/util/__init__.py b/feature/git/dffml_feature_git/util/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/feature/git/dffml_feature_git/util/log.py b/feature/git/dffml_feature_git/util/log.py
new file mode 100644
index 0000000000..283f375316
--- /dev/null
+++ b/feature/git/dffml_feature_git/util/log.py
@@ -0,0 +1,3 @@
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/feature/git/dffml_feature_git/util/proc.py b/feature/git/dffml_feature_git/util/proc.py
new file mode 100644
index 0000000000..6562fd0e6e
--- /dev/null
+++ b/feature/git/dffml_feature_git/util/proc.py
@@ -0,0 +1,56 @@
+'''
+Asynchronous subprocess interaction.
+'''
+import os
+import asyncio.subprocess
+
+from .log import LOGGER
+
+def inpath(binary):
+    return any(list(map(lambda dirname: os.path.isfile(os.path.join(dirname,
+        binary)), os.environ.get('PATH', '').split(':'))))
+
+async def stop(proc):
+    '''
+    Stops a subprocess
+    '''
+    exit_code = await proc.wait()
+    if exit_code != 0:
+        raise RuntimeError('\'%s\' exited with code %d: \'%s\'' \
+                % (getattr(proc, 'name', 'subprocess'), exit_code,
+                    getattr(proc, 'data', '').rstrip()))
+    return exit_code, proc
+
+async def create(*args, **kwargs):
+    '''
+    Runs a subprocess using asyncio.create_subprocess_exec and returns the
+    process.
+    '''
+    LOGGER.debug('proc.create: %r', args)
+    proc = await asyncio.create_subprocess_exec(*args,
+                                                stdout=asyncio.subprocess.PIPE,
+                                                stderr=asyncio.subprocess.PIPE,
+                                                start_new_session=True,
+                                                **kwargs)
+    proc.name = args[0]
+    proc.args = args[1:]
+    return proc
+
+async def get_output(proc):
+    '''
+    Combines stdout and stderr
+    '''
+    stderr = (await proc.stderr.read()).decode(errors='ignore')
+    stdout = (await proc.stdout.read()).decode(errors='ignore')
+    proc.data = stdout + stderr
+    return stdout, stderr
+
+async def check_output(*args, **kwargs):
+    '''
+    Runs a subprocess using asyncio.create_subprocess_exec and returns either
+    its standard error or output.
+    '''
+    proc = await create(*args, **kwargs)
+    stdout, stderr = await get_output(proc)
+    await stop(proc)
+    return stdout or stderr
diff --git a/feature/git/dffml_feature_git/version.py b/feature/git/dffml_feature_git/version.py
new file mode 100644
index 0000000000..856ce1d12d
--- /dev/null
+++ b/feature/git/dffml_feature_git/version.py
@@ -0,0 +1 @@
+VERSION = '0.1.2'
diff --git a/feature/git/pyproject.toml b/feature/git/pyproject.toml
new file mode 100644
index 0000000000..22002d6625
--- /dev/null
+++ b/feature/git/pyproject.toml
@@ -0,0 +1,17 @@
+[metadata]
+name = 'wllearn'
+version = '0.0.1'
+description = ''
+author = 'U.N. Owen'
+author_email = 'me@un.known'
+license = 'MIT/Apache-2.0'
+url = 'https://github.com/_/wllearn'
+
+[requires]
+python_version = ['2.7', '3.5', '3.6', 'pypy', 'pypy3']
+
+[build-system]
+requires = ['setuptools', 'wheel']
+
+[tool.hatch.commands]
+prerelease = 'hatch build'
diff --git a/feature/git/setup.py b/feature/git/setup.py
new file mode 100644
index 0000000000..12e5442656
--- /dev/null
+++ b/feature/git/setup.py
@@ -0,0 +1,65 @@
+import os
+import ast
+from io import open
+
+from setuptools import find_packages, setup
+
+self_path = os.path.dirname(os.path.realpath(__file__))
+
+with open(os.path.join(self_path, 'dffml_feature_git', 'version.py'),
+          'r') as f:
+    for line in f:
+        if line.startswith('VERSION'):
+            version = ast.literal_eval(line.strip().split('=')[-1].strip())
+            break
+
+with open(os.path.join(self_path, 'README.rst'), 'r', encoding='utf-8') as f:
+    readme = f.read()
+
+INSTALL_REQUIRES = [
+    "python-dateutil>=2.7.3"
+    ]
+
+setup(
+    name='dffml_feature_git',
+    version=version,
+    description='',
+    long_description=readme,
+    author='John Andersen',
+    author_email='john.s.andersen@intel.com',
+    url='https://github.com/intel/dffml/blob/master/feature/git/README.rst',
+    license='MIT',
+
+    keywords=[
+        '',
+    ],
+
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'License :: OSI Approved :: Apache Software License',
+        'Natural Language :: English',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Programming Language :: Python :: Implementation :: PyPy',
+    ],
+
+    install_requires=INSTALL_REQUIRES,
+    tests_require=[],
+
+    packages=find_packages(),
+    entry_points={
+        'dffml.feature': [
+            'git = dffml_feature_git.feature.git:GitFeature',
+            'work = dffml_feature_git.feature.work:GitWorkFeature',
+            'cloc = dffml_feature_git.feature.cloc:GitClocFeature',
+            'lang = dffml_feature_git.feature.lang:GitLangFeature',
+            'langs = dffml_feature_git.feature.lang:GitLangsFeature',
+            'commits = dffml_feature_git.feature.commits:GitCommitsFeature',
+            'authors = dffml_feature_git.feature.authors:GitAuthorsFeature',
+            'release = dffml_feature_git.feature.release:GitReleaseFeature',
+        ],
+    },
+)
diff --git a/feature/git/tests/__init__.py b/feature/git/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/feature/git/tests/test_feature.py b/feature/git/tests/test_feature.py
new file mode 100644
index 0000000000..a48e3af765
--- /dev/null
+++ b/feature/git/tests/test_feature.py
@@ -0,0 +1,79 @@
+# pylint: disable=missing-docstring,no-self-use
+import unittest
+
+from dffml.feature import Feature, Features
+from dffml.source import MemorySource
+from dffml.util.asynctestcase import AsyncTestCase
+
+# Git Repo based features
+from dffml_feature_git.feature.git import GitFeature
+from dffml_feature_git.feature.cloc import GitClocFeature
+from dffml_feature_git.feature.lang import GitLangsFeature, GitLangFeature
+from dffml_feature_git.feature.work import GitWorkFeature
+from dffml_feature_git.feature.release import GitReleaseFeature
+from dffml_feature_git.feature.commits import GitCommitsFeature
+from dffml_feature_git.feature.authors import GitAuthorsFeature
+
+FEATURES = [
+    # Git repo features
+    GitCommitsFeature,
+    GitAuthorsFeature,
+    GitWorkFeature,
+    GitClocFeature,
+    GitReleaseFeature,
+]
+GIT_FEATURES = Features(
+        *[feature() for feature in FEATURES if issubclass(feature, GitFeature)])
+
+class TestFeature(unittest.TestCase):
+
+    def test_load_builtin_features(self):
+        features = Feature.load()
+        for mustLoad in FEATURES:
+            with self.subTest(mustLoad=mustLoad):
+                self.assertIn(mustLoad, features)
+
+class TestGitFeatures(AsyncTestCase):
+
+    async def test_git_features(self):
+        async with GIT_FEATURES:
+            for src_url in ['https://github.com/tpm2-software/tpm2-tss',
+                    'https://github.com/github/gitignore']:
+                with self.subTest(src_url=src_url):
+                    features = await GIT_FEATURES.evaluate(src_url)
+                    self.assertEqual(len(features.values()), len(GIT_FEATURES))
+                    for results in features.values():
+                        self.assertEqual(len(results), 10)
+
+    async def test_git_feature_fail(self):
+        async with GIT_FEATURES:
+            for src_url in ['https://github.com/github/nope',
+                    'https://google.com']:
+                with self.subTest(src_url=src_url):
+                    features = await GIT_FEATURES.evaluate(src_url)
+                    self.assertEqual(len(features.values()), 0)
+
+class TestLangs(AsyncTestCase):
+
+    def setUp(self):
+        self.src_url = 'https://github.com/tpm2-software/tpm2-tss'
+        self.features = Features(GitLangsFeature())
+
+    async def test_langs(self):
+        async with self.features:
+            features = await self.features.evaluate(self.src_url)
+            self.assertIn('langs', features)
+            self.assertIn('c', features['langs'])
+            self.assertGreater(features['langs']['c'], 0.1)
+
+class TestLang(AsyncTestCase):
+
+    def setUp(self):
+        self.src_url = 'https://github.com/tpm2-software/tpm2-tss'
+        self.features = Features(GitLangFeature())
+
+    async def test_lang(self):
+        async with self.features:
+            features = await self.features.evaluate(self.src_url)
+            self.assertIn('lang', features)
+            self.assertEqual('c', features['lang'])
diff --git a/feature/git/tests/test_git.py b/feature/git/tests/test_git.py
new file mode 100644
index 0000000000..5f6958b77c
--- /dev/null
+++ b/feature/git/tests/test_git.py
@@ -0,0 +1,111 @@
+# pylint: disable=missing-docstring,no-self-use
+import shutil
+import random
+import os.path
+import unittest
+import subprocess
+
+from dffml.util.tempdir import TempDir
+from dffml.util.asynctestcase import AsyncTestCase
+
+from dffml_feature_git.feature.git import Git
+
+def has_git_svn() -> bool:
+    '''
+    Travis installs git from the maintainers ppa the xenial git-svn does not
+    work with, and therefore does not install.
+    '''
+    try:
+        subprocess.check_output(['git', 'svn'], stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as error:
+        if b'clone' in error.output:
+            return True
+    return False
+
+def mkgitrepo(gitdir):
+    subprocess.check_output(['git', 'init'], cwd=gitdir)
+    with open(os.path.join(gitdir, 'README.md'), 'w') as handle:
+        handle.write('# Hello World')
+    subprocess.check_output(['git', 'add', '-A'], cwd=gitdir)
+    subprocess.check_output(['git', 'commit', '-m', 'Initial Commit'],
+                            cwd=gitdir)
+
+def mksvnrepo(gitdir):
+    return
+
+class TestGit(AsyncTestCase):
+
+    async def setUp(self):
+        self.tempdir = TempDir()
+        await self.tempdir.__aenter__()
+        self.gcreated = self.tempdir.mktempdir()
+        self.screated = self.tempdir.mktempdir()
+        mkgitrepo(self.gcreated)
+        mksvnrepo(self.screated)
+
+    async def tearDown(self):
+        await self.tempdir.__aexit__(None, None, None)
+
+    async def test_git_clone(self):
+        git = Git(self.tempdir)
+        self.assertTrue(await git.clone(self.gcreated))
+        shutil.rmtree(git.cwd, ignore_errors=True)
+
+    @unittest.skipUnless(has_git_svn() and os.getenv('LONG_TESTS', '') != '',
+                         'Long SVN clone')
+    async def test_git_clone_svn(self):
+        git = Git(self.tempdir)
+        self.assertTrue(
+                len(await git.clone('https://svn.code.sf.net/p/lame/svn/trunk/lame')))
+        shutil.rmtree(git.cwd, ignore_errors=True)
+
+    async def test_no_repo(self):
+        git = Git(self.tempdir)
+        self.assertFalse(await git.clone(str(random.random())))
+        self.assertFalse(os.path.isdir(git.cwd))
+
+    async def test_not_a_git_repo(self):
+        git = Git(self.tempdir)
+        self.assertFalse(await git.clone('https://example.com'))
+        self.assertFalse(os.path.isdir(git.cwd))
+
+    async def test_ls_remote_no_repo(self):
+        git = Git(self.tempdir)
+        self.assertFalse(await git.ls_remote(str(random.random())))
+
+    async def test_ls_remote(self):
+        git = Git(self.tempdir)
+        self.assertTrue(await git.ls_remote(self.gcreated))
+
+    @unittest.skipUnless(has_git_svn() and os.getenv('LONG_TESTS', '') != '',
+                         'Long SVN ls-remote')
+    async def test_ls_remote_svn(self):
+        git = Git(self.tempdir)
+        self.assertTrue(await git.ls_remote('https://svn.code.sf.net/p/lame/svn/trunk/lame'))
+
+    @unittest.skipUnless(os.getenv('LONG_TESTS', '') != '', 'Hanging test')
+    async def test_ls_remote_forever(self):
+        '''
+        Test case for a repo which hangs for a long time to make sure we git
+        ls-remote eventually.
+        '''
+        git = Git(self.tempdir)
+        self.assertFalse(await git.ls_remote('git://java.net/jax-rs-spec~api'))
+
+    async def test_infer_main_branch(self):
+        gitdir = self.tempdir.mktempdir()
+        subprocess.check_output(['git', 'init'], cwd=gitdir)
+        with open(os.path.join(gitdir, 'README.md'), 'w') as handle:
+            handle.write('# Hello World')
+        subprocess.check_output(['git', 'add', '-A'], cwd=gitdir)
+        subprocess.check_output(['git', 'checkout', '-b', 'v2'], cwd=gitdir)
+        subprocess.check_output(['git', 'commit', '-m', 'Initial Commit'],
+                                cwd=gitdir)
+        for src_url, branch in [
+                (self.gcreated, 'master'),
+                (gitdir, 'v2')]:
+            git = Git(self.tempdir)
+            with self.subTest(src_url=src_url, branch=branch):
+                self.assertTrue(await git.clone(src_url))
+                self.assertEqual(git.main_branch, branch)
+                shutil.rmtree(git.cwd, ignore_errors=True)
diff --git a/feature/git/tests/test_release.py b/feature/git/tests/test_release.py
new file mode 100644
index 0000000000..65d04cf3aa
--- /dev/null
+++ b/feature/git/tests/test_release.py
@@ -0,0 +1,31 @@
+# pylint: disable=missing-docstring,no-self-use
+import unittest
+
+from dffml_feature_git.feature.release import GitReleaseFeature
+
+class TestReleaseFeature(unittest.TestCase):
+
+    VALID = [
+            '1.0.0',
+            'v1.0.0',
+            'curl-7_19_7',
+            'miniupnpc_2_1',
+            '2_7_5',
+            ]
+    NOT_VALID = [
+            'asdf1',
+            'as.df1',
+            ]
+
+    def setUp(self):
+        self.feature = GitReleaseFeature()
+
+    def test_valid(self):
+        for line in self.VALID:
+            with self.subTest(line=line):
+                self.assertTrue(self.feature.valid_version(line))
+
+    def test_not_valid(self):
+        for line in self.NOT_VALID:
+            with self.subTest(line=line):
+                self.assertFalse(self.feature.valid_version(line))
diff --git a/feature/git/tox.ini b/feature/git/tox.ini
new file mode 100644
index 0000000000..e25dc9a70c
--- /dev/null
+++ b/feature/git/tox.ini
@@ -0,0 +1,18 @@
+[tox]
+envlist =
+    py27,
+    py35,
+    py36,
+    pypy,
+    pypy3,
+
+[testenv]
+passenv = *
+deps =
+    coverage
+    pytest
+commands =
+    python setup.py --quiet clean develop
+    coverage run --parallel-mode -m pytest
+    coverage combine --append
+    coverage report -m
diff --git a/model/tensorflow/.coveragerc b/model/tensorflow/.coveragerc
new file mode 100644
index 0000000000..5b22b1dcfa
--- /dev/null
+++ b/model/tensorflow/.coveragerc
@@ -0,0 +1,13 @@
+[run]
+source =
+    dffml_model_tensorflow
+    tests
+branch = True
+
+[report]
+exclude_lines =
+    no cov
+    no qa
+    noqa
+    pragma: no cover
+    if __name__ == .__main__.:
diff --git a/model/tensorflow/.gitattributes b/model/tensorflow/.gitattributes
new file mode 100644
index 0000000000..dfe0770424
--- /dev/null
+++ b/model/tensorflow/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/model/tensorflow/.gitignore b/model/tensorflow/.gitignore
new file mode 100644
index 0000000000..070ee81c83
--- /dev/null
+++ b/model/tensorflow/.gitignore
@@ -0,0 +1,20 @@
+*.log
+*.pyc
+.cache/
+.coverage
+.idea/
+.vscode/
+*.egg-info/
+build/
+dist/
+docs/build/
+venv/
+wheelhouse/
+*.egss
+.mypy_cache/
+*.swp
+.venv/
+.eggs/
+*.modeldir
+*.db
+htmlcov/
diff --git a/model/tensorflow/LICENSE b/model/tensorflow/LICENSE
new file mode 100644
index 0000000000..8ce5aa9e27
--- /dev/null
+++ b/model/tensorflow/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2017-2019 Intel
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/model/tensorflow/MANIFEST.in b/model/tensorflow/MANIFEST.in
new file mode 100644
index 0000000000..a5021c60e3
--- /dev/null
+++ b/model/tensorflow/MANIFEST.in
@@ -0,0 +1,2 @@
+include README.rst
+include LICENSE
diff --git a/model/tensorflow/README.rst b/model/tensorflow/README.rst
new file mode 100644
index 0000000000..d7ca81816c
--- /dev/null
+++ b/model/tensorflow/README.rst
@@ -0,0 +1,53 @@
+DFFML Models for Tensorflow Library
+===================================
+
+About
+-----
+
+DFFML models backed by Tensorflow.
+
+Install
+-------
+
+.. code-block:: console
+
+    virtualenv -p python3.7 .venv
+    . .venv/bin/activate
+    python3.7 -m pip install --user -U dffml[tensorflow]
+
+Usage
+-----
+
+.. code-block:: console
+
+     wget http://download.tensorflow.org/data/iris_training.csv
+     wget http://download.tensorflow.org/data/iris_test.csv
+     head iris_training.csv
+     sed -i 's/.*setosa,versicolor,virginica/SepalLength,SepalWidth,PetalLength,PetalWidth,classification/g' *.csv
+     head iris_training.csv
+     dffml train \
+       -model dnn \
+       -sources csv=iris_training.csv \
+       -classifications 0 1 2 \
+       -features \
+         def:SepalLength:float:1 \
+         def:SepalWidth:float:1 \
+         def:PetalLength:float:1 \
+         def:PetalWidth:float:1 \
+       -num_epochs 3000 \
+       -steps 20000
+     dffml accuracy \
+       -model dnn \
+       -sources csv=iris_training.csv \
+       -classifications 0 1 2 \
+       -features \
+         def:SepalLength:float:1 \
+         def:SepalWidth:float:1 \
+         def:PetalLength:float:1 \
+         def:PetalWidth:float:1
+
+License
+-------
+
+DFFML Tensorflow Models are distributed under the terms of the `MIT License
+<https://choosealicense.com/licenses/mit>`_
diff --git a/model/tensorflow/dffml_model_tensorflow/__init__.py b/model/tensorflow/dffml_model_tensorflow/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/model/tensorflow/dffml_model_tensorflow/model/__init__.py b/model/tensorflow/dffml_model_tensorflow/model/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/model/tensorflow/dffml_model_tensorflow/model/dnn.py b/model/tensorflow/dffml_model_tensorflow/model/dnn.py
new file mode 100644
index 0000000000..081634c7ea
--- /dev/null
+++ b/model/tensorflow/dffml_model_tensorflow/model/dnn.py
@@ -0,0 +1,241 @@
+'''
+Uses Tensorflow to create a generic DNN which learns on all of the features in a
+repo.
+'''
+import os
+import asyncio
+import hashlib
+import numpy as np
+import tensorflow
+from typing import List, Dict, Any, AsyncIterator, Tuple, Optional
+
+from dffml.repo import Repo
+from dffml.feature import Feature, Features
+from dffml.source import Sources
+from dffml.model import Model
+from dffml.accuracy import Accuracy
+
+from .log import LOGGER
+
+LOGGER = LOGGER.getChild('dnn')
+
+class DNN(Model):
+    '''
+    Model using tensorflow to make predictions. Handels creation of feature
+    columns for real valued, string, and list of real valued features.
+    '''
+
+    def __init__(self):
+        super().__init__()
+        self._model = None
+        # Load packages with lots of dependencies durring instantiation so that
+        # users can choose to install these or not.
+        self.__np = np
+        self._tf = tensorflow
+
+    def mkclassifications(self, classifications):
+        classifications = {value: key for key, value in \
+                self.mkcids(classifications).items()}
+        LOGGER.debug('classifications(%d): %r', len(classifications),
+                classifications)
+        return classifications
+
+    def mkcids(self, classifications):
+        cids = dict(zip(range(0, len(classifications)),
+            sorted(classifications)))
+        LOGGER.debug('cids(%d): %r', len(cids), cids)
+        return cids
+
+    async def applicable_features(self, features: Features):
+        usable = await self.features(features)
+        return [name for name in features.names() if name in usable]
+
+    async def training_input_fn(self, sources: Sources, features: Features,
+            classifications: List[Any],
+            batch_size=20, shuffle=False, num_epochs=1, **kwargs):
+        '''
+        Uses the numpy input function with data from repo features.
+        '''
+        classifications = self.mkclassifications(classifications)
+        features = await self.applicable_features(features)
+        LOGGER.debug('Training on features: %r', features)
+        x_cols: Dict[str, Any] = {feature: [] for feature in features}
+        y_cols = []
+        for repo in [repo async for repo in \
+                sources.classified_with_features(features) \
+                if repo.classification() in classifications]:
+            for feature, results in repo.features(features).items():
+                x_cols[feature].append(self.__np.array(results))
+            y_cols.append(classifications[repo.classification()])
+        presplit = len(y_cols)
+        if not presplit:
+            raise ValueError('No repos to train on')
+        split = 0.7
+        split = int(float(presplit) * split)
+        y_cols = self.__np.array(y_cols[:split])
+        for feature in x_cols:
+            x_cols[feature] = self.__np.array(x_cols[feature][:split])
+        LOGGER.info('------ Repo Data ------')
+        LOGGER.info('total:     %d', presplit)
+        LOGGER.info('x_cols:    %d', len(list(x_cols.values())[0]))
+        LOGGER.info('y_cols:    %d', len(y_cols))
+        LOGGER.info('-----------------------')
+        input_fn = self._tf.estimator.inputs.numpy_input_fn(x_cols,
+                y_cols, batch_size=batch_size,
+                shuffle=shuffle, num_epochs=num_epochs, **kwargs)
+        return input_fn
+
+    async def accuracy_input_fn(self, sources: Sources, features: Features,
+            classifications: List[Any],
+            batch_size=20, shuffle=False, num_epochs=1, **kwargs):
+        '''
+        Uses the numpy input function with data from repo features.
+        '''
+        features = await self.applicable_features(features)
+        classifications = self.mkclassifications(classifications)
+        x_cols: Dict[str, Any] = {feature: [] for feature in features}
+        y_cols = []
+        for repo in [repo async for repo in \
+                sources.classified_with_features(features) \
+                if repo.classification() in classifications]:
+            for feature, results in repo.features(features).items():
+                x_cols[feature].append(self.__np.array(results))
+            y_cols.append(classifications[repo.classification()])
+        presplit = len(y_cols)
+        split = 0.7
+        split = int(float(presplit) * split)
+        y_cols = self.__np.array(y_cols[split:])
+        for feature in x_cols:
+            x_cols[feature] = self.__np.array(x_cols[feature][split:])
+        LOGGER.info('------ Repo Data ------')
+        LOGGER.info('total:     %d', presplit)
+        LOGGER.info('x_cols:    %d', len(list(x_cols.values())[0]))
+        LOGGER.info('y_cols:    %d', len(y_cols))
+        LOGGER.info('-----------------------')
+        input_fn = self._tf.estimator.inputs.numpy_input_fn(x_cols,
+                y_cols, batch_size=batch_size,
+                shuffle=shuffle, num_epochs=num_epochs, **kwargs)
+        return input_fn
+
+    async def predict_input_fn(self, repos: AsyncIterator[Repo],
+            features: Features, classifications: List[Any], **kwargs):
+        '''
+        Uses the numpy input function with data from repo features.
+        '''
+        features = await self.applicable_features(features)
+        classifications = self.mkclassifications(classifications)
+        x_cols: Dict[str, Any] = {feature: [] for feature in features}
+        ret_repos = []
+        async for repo in repos:
+            if not repo.features(features):
+                continue
+            ret_repos.append(repo)
+            for feature, results in repo.features(features).items():
+                x_cols[feature].append(self.__np.array(results))
+        for feature in x_cols:
+            x_cols[feature] = self.__np.array(x_cols[feature])
+        LOGGER.info('------ Repo Data ------')
+        LOGGER.info('x_cols:    %d', len(list(x_cols.values())[0]))
+        LOGGER.info('-----------------------')
+        input_fn = self._tf.estimator.inputs.numpy_input_fn(x_cols,
+                shuffle=False, num_epochs=1, **kwargs)
+        return input_fn, ret_repos
+
+    async def features(self, features: Features):
+        '''
+        Converts repos into training data
+        '''
+        cols: Dict[str, Any] = {}
+        for feature in features:
+            col = self.feature_feature_column(feature)
+            if not col is None:
+                cols[feature.NAME] = col
+        return cols
+
+    def feature_feature_column(self, feature: Feature):
+        '''
+        Creates a feature column for a feature
+        '''
+        dtype = feature.dtype()
+        if dtype is int or issubclass(dtype, int) \
+                or dtype is float or issubclass(dtype, float):
+            return self._tf.feature_column.numeric_column(feature.NAME,
+                    shape=feature.length())
+        LOGGER.warning('Unknown dtype %r. Cound not create column' % (dtype))
+        return None
+
+    def model_dir_path(self, features: Features):
+        '''
+        Creates the path to the model dir by using the provided model dir and
+        the sha256 hash of the concatenated feature names.
+        '''
+        if self.model_dir is None:
+            return None
+        model = hashlib.sha256(''.join(features.names()).encode('utf-8'))\
+                .hexdigest()
+        if not os.path.isdir(self.model_dir):
+            raise NotADirectoryError('%s is not a directory' % (self.model_dir))
+        return os.path.join(self.model_dir, model)
+
+    async def model(self, features: Features, classifications: List[Any]):
+        '''
+        Generates or loads a model
+        '''
+        if self._model is not None:
+            return self._model
+        # Build 3 layer DNN with 10, 20, 10 units respectively.
+        # 2 classifications whitelist or blacklist
+        LOGGER.debug('Loading model with classifications(%d): %r',
+                len(classifications), classifications)
+        self._model = self._tf.estimator.DNNClassifier(
+                feature_columns=list((await self.features(features)).values()),
+                hidden_units=[10, 20, 10],
+                n_classes=len(classifications),
+                model_dir=self.model_dir_path(features))
+        return self._model
+
+    async def train(self, sources: Sources, features: Features,
+            classifications: List[Any], steps: int, num_epochs: int):
+        '''
+        Train on data submitted via classify.
+        '''
+        input_fn = await self.training_input_fn(sources, features,
+                classifications,
+                batch_size=20, shuffle=True, num_epochs=num_epochs)
+        (await self.model(features, classifications))\
+                .train(input_fn=input_fn, steps=steps)
+
+    async def accuracy(self, sources: Sources, features: Features,
+            classifications: List[Any]) -> Accuracy:
+        '''
+        Evaluates the accuracy of our model after training using the input repos
+        as test data.
+        '''
+        if not os.path.isdir(self.model_dir_path(features)):
+            raise NotADirectoryError('Model not trained')
+        input_fn = await self.accuracy_input_fn(sources, features,
+                classifications,
+                batch_size=20, shuffle=False, num_epochs=1)
+        accuracy_score = (await self.model(features, classifications))\
+                .evaluate(input_fn=input_fn)
+        return Accuracy(accuracy_score['accuracy'])
+
+    async def predict(self, repos: AsyncIterator[Repo], features: Features,
+            classifications: List[Any]) -> \
+                    AsyncIterator[Tuple[Repo, Any, float]]:
+        '''
+        Uses trained data to make a prediction about the quality of a repo.
+        '''
+        if not os.path.isdir(self.model_dir_path(features)):
+            raise NotADirectoryError('Model not trained')
+        cids = self.mkcids(classifications)
+        # Create the input function
+        input_fn, predict = await self.predict_input_fn(repos, features,
+                classifications)
+        # Makes predictions on classifications
+        predictions = (await self.model(features, classifications))\
+                .predict(input_fn=input_fn)
+        for repo, pred_dict in zip(predict, predictions):
+            class_id = pred_dict['class_ids'][0]
+            probability = pred_dict['probabilities'][class_id]
+            yield repo, cids[class_id], probability
diff --git a/model/tensorflow/dffml_model_tensorflow/model/log.py b/model/tensorflow/dffml_model_tensorflow/model/log.py
new file mode 100644
index 0000000000..283f375316
--- /dev/null
+++ b/model/tensorflow/dffml_model_tensorflow/model/log.py
@@ -0,0 +1,3 @@
+'''Logging'''
+import logging
+LOGGER = logging.getLogger(__package__)
diff --git a/model/tensorflow/dffml_model_tensorflow/version.py b/model/tensorflow/dffml_model_tensorflow/version.py
new file mode 100644
index 0000000000..856ce1d12d
--- /dev/null
+++ b/model/tensorflow/dffml_model_tensorflow/version.py
@@ -0,0 +1 @@
+VERSION = '0.1.2'
diff --git a/model/tensorflow/setup.py b/model/tensorflow/setup.py
new file mode 100644
index 0000000000..bc0230be3a
--- /dev/null
+++ b/model/tensorflow/setup.py
@@ -0,0 +1,58 @@
+import os
+import ast
+from io import open
+
+from setuptools import find_packages, setup
+
+self_path = os.path.dirname(os.path.realpath(__file__))
+
+with open(os.path.join(self_path, 'dffml_model_tensorflow', 'version.py'),
+          'r') as f:
+    for line in f:
+        if line.startswith('VERSION'):
+            version = ast.literal_eval(line.strip().split('=')[-1].strip())
+            break
+
+with open(os.path.join(self_path, 'README.rst'), 'r', encoding='utf-8') as f:
+    readme = f.read()
+
+INSTALL_REQUIRES = [
+    "tensorflow>=1.13.1,<2.0.0"
+    ]
+
+setup(
+    name='dffml-model-tensorflow',
+    version=version,
+    description='',
+    long_description=readme,
+    author='John Andersen',
+    author_email='john.s.andersen@intel.com',
+    url='https://github.com/intel/dffml/blob/master/model/tensorflow/README.rst',
+    license='MIT',
+
+    keywords=[
+        '',
+    ],
+
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'License :: OSI Approved :: Apache Software License',
+        'Natural Language :: English',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python :: 3 :: Only',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Programming Language :: Python :: Implementation :: PyPy',
+    ],
+
+    install_requires=INSTALL_REQUIRES,
+
+    packages=find_packages(),
+    entry_points={
+        'dffml.model': [
+            'dnn = dffml_model_tensorflow.model.dnn:DNN',
+        ],
+    },
+)
diff --git a/model/tensorflow/tests/__init__.py b/model/tensorflow/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/model/tensorflow/tests/test_dnn.py b/model/tensorflow/tests/test_dnn.py
new file mode 100644
index 0000000000..ee8567a01f
--- /dev/null
+++ b/model/tensorflow/tests/test_dnn.py
@@ -0,0 +1,68 @@
+import random
+import tempfile
+from typing import Type
+
+from dffml.repo import Repo, RepoData
+from dffml.source import Sources, RepoSource
+from dffml.feature import Data, Feature, Features
+from dffml.util.asynctestcase import AsyncTestCase
+
+from dffml_model_tensorflow.model.dnn import DNN
+
+class StartsWithA(Feature):
+
+    NAME: str = 'starts_with_a'
+
+    def dtype(self) -> Type:
+        return int
+
+    def length(self) -> int:
+        return 1
+
+    async def calc(self, data: Data) -> int:
+        return 1 if data.src_url.lower().startswith('a') \
+                else 0
+
+class TestDNN(AsyncTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.model_dir = tempfile.TemporaryDirectory()
+        cls.model = DNN()
+        cls.model.model_dir = cls.model_dir.name
+        cls.feature = StartsWithA()
+        cls.features = Features(cls.feature)
+        cls.classifications = ['a', 'not a']
+        cls.repos = [Repo('a' + str(random.random()),
+            data={'features': {cls.feature.NAME: 1},
+                'classification': 'a'}) for _ in range(0, 1000)]
+        cls.repos += [Repo('b' + str(random.random()),
+            data={'features': {cls.feature.NAME: 0},
+                'classification': 'not a'}) for _ in range(0, 1000)]
+        cls.sources = Sources(RepoSource(*cls.repos))
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.model_dir.cleanup()
+
+    async def test_00_train(self):
+        async with self.sources as sources, self.features as features:
+            await self.model.train(sources, features,
+                    self.classifications, steps=1000,
+                    num_epochs=30)
+
+    async def test_01_accuracy(self):
+        async with self.sources as sources, self.features as features:
+            res = await self.model.accuracy(sources, features,
+                    self.classifications)
+            self.assertGreater(res, 0.9)
+
+    async def test_02_predict(self):
+        a = Repo('a', data={'features': {self.feature.NAME: 1}})
+        sources = Sources(RepoSource(a))
+        async with sources as sources, self.features as features:
+            res = [repo async for repo in self.model.predict(sources.repos(),
+                features, self.classifications)]
+            self.assertEqual(len(res), 1)
+            self.assertEqual(res[0][0].src_url, a.src_url)
+            self.assertTrue(res[0][1])
diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh
new file mode 100755
index 0000000000..a3f02cac0a
--- /dev/null
+++ b/scripts/docker-entrypoint.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+echo "#!/usr/bin/env bash" > /usr/bin/cmd.sh
+chmod 755 /usr/bin/cmd.sh
+runit () {
+  exec /usr/bin/cmd.sh
+}
+if [ "x$USER" != "x" ] && [ "x$UID" != "x" ]; then
+  export HOME="/home/$USER"
+  mkdir -p $HOME/.cache
+  useradd -o -u $UID $USER
+  chown $UID $HOME
+  chown $UID $HOME/.cache
+  runit () {
+    exec su - $USER -m -s /usr/bin/cmd.sh
+  }
+fi
+
+if [ "$1" == "pip" ]; then
+  # Run pip. Used in case the user want to install something
+  echo "$@" >> /usr/bin/cmd.sh
+else
+  # Run dffml otherwise.
+  echo "dffml $@" >> /usr/bin/cmd.sh
+fi
+
+runit
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000..c6f193f123
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import ast
+from io import open
+from setuptools import find_packages, setup
+
+with open('dffml/version.py', 'r') as f:
+    for line in f:
+        if line.startswith('VERSION'):
+            version = ast.literal_eval(line.strip().split('=')[-1].strip())
+            break
+
+with open('README.rst', 'r', encoding='utf-8') as f:
+    readme = f.read()
+
+setup(
+    name='dffml',
+    version=version,
+    description='Data Flow Facilitator for Machine Learning',
+    long_description=readme,
+    author='John Andersen',
+    author_email='john.s.andersen@intel.com',
+    maintainer='John Andersen',
+    maintainer_email='john.s.andersen@intel.com',
+    url='https://github.com/intel/dffml',
+    license='MIT',
+    keywords=[
+        '',
+    ],
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'Natural Language :: English',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python :: 3 :: Only',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Programming Language :: Python :: Implementation :: PyPy',
+    ],
+    packages=find_packages(),
+    extras_require={
+        'tensorflow': ['dffml-model-tensorflow'],
+        'git': ['dffml-feature-git'],
+    },
+    entry_points={
+        'console_scripts': [
+            'dffml = dffml.cli:CLI.main',
+        ],
+        'dffml.source': [
+            'csv = dffml.source.csvfile:CSVSource',
+            'json = dffml.source.json:JSONSource',
+            'memory = dffml.source.memory:MemorySource',
+        ],
+        'dffml.port': [
+            'json = dffml.port.json:JSON',
+        ],
+    },
+)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000000..5bbefb030a
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
diff --git a/tests/source/__init__.py b/tests/source/__init__.py
new file mode 100644
index 0000000000..5bbefb030a
--- /dev/null
+++ b/tests/source/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
diff --git a/tests/source/test_file.py b/tests/source/test_file.py
new file mode 100644
index 0000000000..26898d7ec8
--- /dev/null
+++ b/tests/source/test_file.py
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import os
+import io
+import atexit
+import shutil
+import random
+import inspect
+import asyncio
+import logging
+import tempfile
+import unittest
+import collections
+from unittest.mock import patch, mock_open
+from functools import wraps
+from contextlib import contextmanager
+from typing import List, Dict, Any, Optional, Tuple, AsyncIterator
+
+from dffml.repo import Repo
+from dffml.source import Sources, FileSource
+from dffml.util.asynctestcase import AsyncTestCase
+
+class FakeFileSource(FileSource):
+
+    async def update(self, repo: Repo):
+        pass # pragma: no cover
+
+    async def repos(self) -> AsyncIterator[Repo]:
+        yield Repo('') # pragma: no cover
+
+    async def repo(self, src_url: str):
+        pass # pragma: no cover
+
+    async def load_fd(self, fd):
+        pass # pragma: no cover
+
+    async def dump_fd(self, fd):
+        pass # pragma: no cover
+
+class TestFileSource(AsyncTestCase):
+
+    def test_readonly(self) -> bool:
+        self.assertTrue(FakeFileSource('testfile:ro').readonly)
+        self.assertFalse(FakeFileSource('testfile').readonly)
+
+    def test_filename(self) -> bool:
+        self.assertEqual(FakeFileSource('testfile').filename,
+                         'testfile')
+
+    def test_filename_readonly(self) -> bool:
+        self.assertEqual(FakeFileSource('testfile:ro').filename,
+                         'testfile')
+
+    def test_repr(self):
+        self.assertEqual(repr(FakeFileSource('testfile')),
+                         'FakeFileSource(\'testfile\')')
+
+    async def test_open(self):
+        source = FakeFileSource('testfile')
+        m_open = mock_open()
+        with patch('os.path.isfile', return_value=True), \
+                patch('builtins.open', m_open):
+            await source.open()
+            m_open.assert_called_once_with('testfile', 'r')
+
+    async def test_open_no_file(self):
+        source = FakeFileSource('testfile')
+        with patch('os.path.isfile', return_value=False):
+            await source.open()
+            self.assertTrue(isinstance(source.mem, dict))
+
+    async def test_close(self):
+        source = FakeFileSource('testfile')
+        m_open = mock_open()
+        with patch('builtins.open', m_open):
+            await source.close()
+            m_open.assert_called_once_with('testfile', 'w')
+
+    async def test_close_readonly(self):
+        source = FakeFileSource('testfile:ro')
+        m_open = mock_open()
+        with patch('builtins.open', m_open):
+            await source.close()
+            m_open.assert_not_called()
diff --git a/tests/test_accuracy.py b/tests/test_accuracy.py
new file mode 100644
index 0000000000..7c4a8ac295
--- /dev/null
+++ b/tests/test_accuracy.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import unittest
+
+from dffml.accuracy import Accuracy
+
+class TestAccuracry(unittest.TestCase):
+
+    def test_str(self):
+        self.assertEqual(str(Accuracy(0.04242)), '4.24')
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000000..de27d04728
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import os
+import io
+import atexit
+import shutil
+import random
+import inspect
+import asyncio
+import logging
+import tempfile
+import unittest
+import collections
+from unittest.mock import patch
+from functools import wraps
+from contextlib import contextmanager
+from typing import List, Dict, Any, Optional, Tuple, AsyncIterator
+
+from dffml.repo import Repo
+from dffml.feature import Feature, Features
+from dffml.source import Sources, RepoSource
+from dffml.model import Model
+from dffml.accuracy import Accuracy as AccuracyType
+from dffml.util.asynctestcase import AsyncTestCase
+
+from dffml.cli import EvaluateAll, EvaluateRepo, \
+        Train, Accuracy, PredictAll, PredictRepo
+
+class ReposTestCase(AsyncTestCase):
+
+    def setUp(self):
+        self.repos = [Repo(str(random.random())) for _ in range(0, 10)]
+        self.sources = Sources(RepoSource(*self.repos))
+        self.features = Features(FakeFeature())
+
+class FakeFeature(Feature):
+
+    NAME: str = 'fake'
+
+    def dtype(self):
+        return float # pragma: no cov
+
+    def length(self):
+        return 1 # pragma: no cov
+
+    async def applicable(self, data):
+        return True
+
+    async def fetch(self, data):
+        pass
+
+    async def parse(self, data):
+        pass
+
+    async def calc(self, data):
+        return float(data.src_url)
+
+class FakeModel(Model):
+
+    async def train(self, sources: Sources, features: Features,
+            classifications: List[Any], steps: int, num_epochs: int):
+        pass
+
+    async def accuracy(self, sources: Sources, features: Features,
+            classifications: List[Any]) -> AccuracyType:
+        return AccuracyType(1.00)
+
+    async def predict(self, repos: AsyncIterator[Repo], features: Features,
+            classifications: List[Any]) -> \
+                    AsyncIterator[Tuple[Repo, Any, float]]:
+        async for repo in repos:
+            yield repo, '', 1.0
+
+class TestEvaluateAll(ReposTestCase):
+
+    def setUp(self):
+        super().setUp()
+        self.cli = EvaluateAll(sources=self.sources, features=self.features)
+
+    async def test_run(self):
+        repos = {repo.src_url: repo async for repo in self.cli.run()}
+        self.assertEqual(len(repos), len(self.repos))
+        for repo in self.repos:
+            self.assertIn(repo.src_url, repos)
+            self.assertIn('fake', repos[repo.src_url].features())
+            self.assertEqual(float(repo.src_url),
+                    repos[repo.src_url].features(['fake'])['fake'])
+
+class TestEvaluateRepo(ReposTestCase):
+
+    def setUp(self):
+        super().setUp()
+        self.subset = self.repos[int(len(self.repos) / 2):]
+        self.cli = EvaluateRepo(sources=self.sources, features=self.features,
+                keys=[repo.src_url for repo in self.subset])
+
+    async def test_run(self):
+        repos = {repo.src_url: repo async for repo in self.cli.run()}
+        self.assertEqual(len(repos), len(self.subset))
+        for repo in self.subset:
+            self.assertIn(repo.src_url, repos)
+            self.assertIn('fake', repos[repo.src_url].features())
+            self.assertEqual(float(repo.src_url),
+                    repos[repo.src_url].features(['fake'])['fake'])
+
+class TestTrain(AsyncTestCase):
+
+    def setUp(self):
+        self.cli = Train(model=FakeModel(), model_dir=None,
+                sources=Sources(RepoSource()), features=Features())
+
+    async def test_run(self):
+        await self.cli.run()
+
+class TestAccuracy(AsyncTestCase):
+
+    def setUp(self):
+        self.cli = Accuracy(model=FakeModel(),
+                sources=Sources(RepoSource()), features=Features())
+
+    async def test_run(self):
+        self.assertEqual(1.0, await self.cli.run())
+
+class TestPredictAll(ReposTestCase):
+
+    def setUp(self):
+        super().setUp()
+        self.cli = PredictAll(model=FakeModel(), sources=self.sources,
+                features=self.features)
+
+    async def test_run(self):
+        repos = {repo.src_url: repo async for repo in self.cli.run()}
+        self.assertEqual(len(repos), len(self.repos))
+        for repo in self.repos:
+            self.assertIn(repo.src_url, repos)
+
+class TestPredictRepo(ReposTestCase):
+
+    def setUp(self):
+        super().setUp()
+        self.subset = self.repos[int(len(self.repos) / 2):]
+        self.cli = PredictRepo(model=FakeModel(), sources=self.sources,
+                features=self.features,
+                keys=[repo.src_url for repo in self.subset])
+
+    async def test_run(self):
+        repos = {repo.src_url: repo async for repo in self.cli.run()}
+        self.assertEqual(len(repos), len(self.subset))
+        for repo in self.subset:
+            self.assertIn(repo.src_url, repos)
diff --git a/tests/test_feature.py b/tests/test_feature.py
new file mode 100644
index 0000000000..9e7cfe588c
--- /dev/null
+++ b/tests/test_feature.py
@@ -0,0 +1,189 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import asyncio
+import unittest
+from unittest.mock import patch
+
+from dffml.feature import Data, Feature, Features, LoggingDict, DefFeature
+from dffml.util.asynctestcase import AsyncTestCase
+
+class SingleFeature(Feature):
+
+    def dtype(self):
+        return bool # pragma: no cov
+
+    def length(self):
+        return 1 # pragma: no cov
+
+class OneFeatureTester(SingleFeature):
+    NAME: str = 'one'
+
+class TwoFeatureTester(SingleFeature):
+    NAME: str = 'two'
+
+    async def calc(self, data: Data) -> bool:
+        return True
+
+class TwoBFeatureTester(SingleFeature):
+    pass
+
+class ThreeFeatureTester(SingleFeature):
+    NAME: str = 'three'
+
+    async def applicable(self, data: Data) -> bool:
+        return False
+
+class ProgessFeatureTester(SingleFeature):
+    NAME: str = 'progress'
+
+    async def calc(self, data: Data) -> bool:
+        await data.log('Hi')
+        return True
+
+class TestLoggingDict(AsyncTestCase):
+
+    def setUp(self):
+        self.data = Data('test')
+        self.ldict = LoggingDict(self.data)
+
+    def ginternal(self, key):
+        return getattr(self.ldict,
+                '_%s__dict' % (self.ldict.__class__.__qualname__,))[key]
+
+    async def test_get(self):
+        self.assertEqual(await self.ldict.get('feed', default='face'), 'face')
+
+    async def test_set(self):
+        await self.ldict.set('dead', 'beef')
+        self.assertEqual(self.ginternal('dead'), 'beef')
+
+    async def test_set_ignored(self):
+        lock = asyncio.Lock()
+        await self.ldict.set('dead', lock)
+
+    async def test_inc(self):
+        await self.ldict.set('babe', 0)
+        self.assertEqual(self.ginternal('babe'), 0)
+        await self.ldict.inc('babe')
+        self.assertEqual(self.ginternal('babe'), 1)
+
+class TestData(AsyncTestCase):
+
+    def setUp(self):
+        self.data = Data('test')
+
+    async def test_mklock_new(self):
+        self.assertNotIn('feed', self.data.locks)
+        await self.data.mklock('feed')
+        self.assertIn('feed', self.data.locks)
+
+    async def test_mklock_exists(self):
+        self.data.locks['feed'] = asyncio.Lock()
+        self.assertIn('feed', self.data.locks)
+        await self.data.mklock('feed')
+        self.assertIn('feed', self.data.locks)
+
+    async def test_results(self):
+        async def complete(*args):
+            return 'face'
+        with patch.object(self.data, 'complete', complete):
+            await self.data.result()
+            self.assertEqual(self.data.results, 'face')
+
+class TestFeature(AsyncTestCase):
+
+    def setUp(self):
+        self.feature = Feature()
+
+    def test_default_dtype(self):
+        self.assertEqual(self.feature.dtype(), int)
+
+    def test_default_length(self):
+        self.assertEqual(self.feature.length(), 1)
+
+    async def test_default_applicable(self):
+        self.assertEqual(await self.feature.applicable(Data('test')), True)
+
+class TestDefFeature(AsyncTestCase):
+
+    def test_deffeature(self):
+        feature = DefFeature('test', float, 10)
+        self.assertEqual(feature.NAME, 'test')
+        self.assertEqual(feature.dtype(), float)
+        self.assertEqual(feature.length(), 10)
+
+class TestFeatures(AsyncTestCase):
+
+    def setUp(self):
+        self.one = OneFeatureTester()
+        self.two = TwoFeatureTester()
+        self.three = ThreeFeatureTester()
+        self.features = Features(self.one, self.two, self.three)
+
+    async def test_names(self):
+        async with self.features:
+            names = self.features.names()
+            for check in ['one', 'two', 'three']:
+                self.assertIn(check, names)
+
+    async def test_applicable(self):
+        async with self.features:
+            applicable = await self.features.applicable('test')
+            self.assertIn(self.one, applicable)
+            self.assertIn(self.two, applicable)
+            self.assertNotIn(self.three, applicable)
+
+    async def test_evaluate(self):
+        async with self.features:
+            results = await self.features.evaluate('test')
+            self.assertIn(self.one.NAME, results)
+            self.assertIn(self.two.NAME, results)
+            self.assertNotIn(self.three.NAME, results)
+            self.assertEqual(results[self.one.NAME], False)
+            self.assertEqual(results[self.two.NAME], True)
+
+    async def test_one_applicable_other_not(self):
+        twob = TwoBFeatureTester()
+        features = Features(self.two, twob)
+        async with features:
+            results = await features.evaluate('test')
+            self.assertIn(self.two.NAME, results)
+            self.assertEqual(len(results), 1)
+            self.assertEqual(results[self.two.NAME], True)
+
+    async def test_monitor_progess(self):
+        progress = ProgessFeatureTester()
+        features = Features(progress)
+        async with features:
+            data = await features.submit('test')
+            logs = await data.logs()
+            results = await data.result()
+            self.assertTrue(logs)
+            self.assertIn('Hi', logs)
+            self.assertIn(progress.NAME, results)
+            self.assertEqual(len(results), 1)
+            self.assertEqual(results[progress.NAME], True)
+
+    def test_load_def(self):
+        feature = Features.load_def('test', 'float', 10)
+        self.assertEqual(feature.NAME, 'test')
+        self.assertEqual(feature.dtype(), float)
+        self.assertEqual(feature.length(), 10)
+
+    def test_load_defs(self):
+        no_def, (one, two) = Features.load_defs('na', 'def:one:float:10',
+                                               'def:two:bool:1')
+        self.assertEqual(no_def, ['na'])
+        self.assertEqual(one.NAME, 'one')
+        self.assertEqual(one.dtype(), float)
+        self.assertEqual(one.length(), 10)
+        self.assertEqual(two.NAME, 'two')
+        self.assertEqual(two.dtype(), bool)
+        self.assertEqual(two.length(), 1)
+
+    def test_convert_dtype(self):
+        self.assertEqual(Features.convert_dtype('float'), float)
+
+    def test_convert_dtype_invalid(self):
+        with self.assertRaisesRegex(TypeError, 'Failed to convert'):
+            Features.convert_dtype('not a python data type')
diff --git a/tests/test_monitor.py b/tests/test_monitor.py
new file mode 100644
index 0000000000..c8588b9c7d
--- /dev/null
+++ b/tests/test_monitor.py
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import asyncio
+import unittest
+
+from dffml.util.monitor import Monitor, Task
+from dffml.util.asynctestcase import AsyncTestCase
+
+async def test_task(task=Task()):
+    for i in range(0, 10):
+        await asyncio.sleep(0.01)
+        await task.update(i)
+
+async def log_task(task=Task()):
+    for i in range(0, 10):
+        await task.log('i is now %d', i)
+
+async def recv_statuses(status, sleep):
+    log = []
+    await asyncio.sleep(sleep)
+    async for msg in status:
+        log.append(msg)
+    return log
+
+class TestMonitor(AsyncTestCase):
+
+    def setUp(self):
+        self.monitor = Monitor()
+
+    async def test_00_await_complete(self):
+        await self.monitor.complete((await self.monitor.start(test_task)).key)
+
+    async def test_01_single_watching_status(self):
+        task = await self.monitor.start(test_task)
+        statuses = await recv_statuses(self.monitor.status(task.key), 0.05)
+        self.assertEqual(len(statuses), 10)
+        for i in range(0, 10):
+            self.assertEqual(statuses[i], i)
+
+    async def test_02_multiple_watching(self):
+        task = await self.monitor.start(test_task)
+        res = await asyncio.gather(
+                *[recv_statuses(self.monitor.status(task.key), i * 0.01)
+                    for i in range(0, 5)])
+        for statuses in res:
+            self.assertEqual(len(statuses), 10)
+            for i in range(0, 10):
+                self.assertEqual(statuses[i], i)
+
+    async def test_03_log(self):
+        await self.monitor.complete((await self.monitor.start(log_task)).key)
+
+    async def test_04_already_complete(self):
+        task = await self.monitor.start(log_task)
+        await self.monitor.complete(task.key)
+        await self.monitor.complete(task.key)
+
+    async def test_05_already_complete_status(self):
+        task = await self.monitor.start(log_task)
+        await self.monitor.complete(task.key)
+        self.assertFalse([msg async for msg in self.monitor.status(task.key)])
+
+    async def test_06_log_status(self):
+        i = 0
+        async for msg in self.monitor.log_status(
+                (await self.monitor.start(test_task)).key):
+            self.assertEqual(msg, i)
+            i += 1
+        self.assertEqual(i, 10)
+
+    async def test_07_already_running(self):
+        task = await self.monitor.start(test_task)
+        await self.monitor.start(task, task.key)
+        await self.monitor.complete(task.key)
diff --git a/tests/test_repo.py b/tests/test_repo.py
new file mode 100644
index 0000000000..6d828e6831
--- /dev/null
+++ b/tests/test_repo.py
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import unittest
+
+from dffml.repo import RepoPrediction, RepoData, Repo
+
+class TestRepoPrediction(unittest.TestCase):
+
+    def setUp(self):
+        self.confidence = 0.42
+        self.classification = 'good'
+        self.full = RepoPrediction(confidence=self.confidence,
+                classification=self.classification)
+        self.null = RepoPrediction()
+
+    def test_full_property_confidence(self):
+        self.assertEqual(self.confidence, self.full['confidence'])
+        self.assertEqual(self.full.confidence,
+                self.full['confidence'])
+
+    def test_full_property_classification(self):
+        self.assertEqual(self.classification, self.full['classification'])
+        self.assertEqual(self.full.classification,
+                self.full['classification'])
+
+    def test_full_dict_returns_self(self):
+        self.assertEqual(self.full, self.full.dict())
+
+    def test_full_len_2(self):
+        self.assertEqual(2, len(self.full))
+
+    def test_full_bool_true(self):
+        self.assertTrue(self.full)
+
+    def test_null_dict_empty_array(self):
+        self.assertEqual([], self.null.dict())
+
+    def test_null_len_0(self):
+        self.assertEqual(0, len(self.null))
+
+    def test_null_bool_false(self):
+        self.assertFalse(self.null)
+
+class TestRepoData(unittest.TestCase):
+
+    def setUp(self):
+        self.full = RepoData(
+                src_url=None,
+                features=None,
+                classification=None,
+                prediction=None,
+                last_updated=None)
+        self.null = RepoData()
+
+    def test_null_dict_no_prediction(self):
+        self.assertNotIn('prediction', self.null.dict())
+
+class TestRepo(unittest.TestCase):
+
+    def setUp(self):
+        self.null = Repo('null')
+        self.full = Repo('full',
+                data=dict(features=dict(dead='beef'),
+                    extra=dict(extra='read all about it')),
+                extra=dict(half=True))
+
+    def test_dict(self):
+        data = self.full.dict()
+        self.assertIn('extra', data)
+
+    def test_repr(self):
+        repr(self.full)
+
+    def test_str(self):
+        self.full.prediction = RepoPrediction()
+        self.assertIn('Undetermined', str(self.full))
+        self.full.data.prediction = RepoPrediction(classification='Good')
+        self.assertIn('Good', str(self.full))
+        self.full.data.classification = 'Great'
+        self.assertIn('Great', str(self.full))
+        self.full.extra.update(dict(hi=5))
+        self.assertIn('5', str(self.full))
+        self.full.extra = dict()
+        self.assertNotIn('5', str(self.full))
+
+    def test_merge(self):
+        null = Repo('null')
+        null.merge(self.full)
+        self.assertIn('half', null.extra)
+        self.assertTrue(null.extra['half'])
+
+    def test_src_url(self):
+        return self.full.data.src_url
+
+    def test_evaluated(self):
+        old_last_updated = self.full.data.last_updated
+        results = {'new': 'feature'}
+        self.full.evaluated({'feed': 'face'})
+        self.assertIn('feed', self.full.data.features)
+        self.assertEqual('face', self.full.data.features['feed'])
+        self.full.evaluated(results, overwrite=True)
+        self.assertEqual(self.full.data.features, results)
+        self.assertNotEqual(old_last_updated, self.full.data.last_updated)
+
+    def test_features(self):
+        self.assertIn('dead', self.full.features())
+        self.assertIn('dead', self.full.features(['dead']))
+        self.assertFalse(self.full.features(['dead', 'beaf']))
+
+    def test_predicted(self):
+        old_prediction = self.full.data.prediction
+        old_last_updated = self.full.data.last_updated
+        self.full.predicted('feed', 1.00)
+        self.assertNotEqual(old_prediction, self.full.data.prediction)
+        self.assertNotEqual(old_last_updated, self.full.data.last_updated)
+
+    def test_prediction(self):
+        self.full.predicted('feed', 1.00)
+        self.assertTrue(self.full.prediction())
+
+    def test_classify(self):
+        self.full.classify('face')
+        self.assertEqual(self.full.data.classification, 'face')
+
+    def test_classified(self):
+        self.full.classify('')
+        self.assertFalse(self.full.classified())
+        self.full.classify(True)
+        self.assertTrue(self.full.classified())
+
+    def test_classification(self):
+        self.full.classify(True)
+        self.assertTrue(self.full.classification())
+        self.full.classify('')
+        with self.assertRaisesRegex(ValueError, 'Unclassified'):
+            self.full.classification()
diff --git a/tests/util/__init__.py b/tests/util/__init__.py
new file mode 100644
index 0000000000..5bbefb030a
--- /dev/null
+++ b/tests/util/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
diff --git a/tests/util/test_asynccontextmanager.py b/tests/util/test_asynccontextmanager.py
new file mode 100644
index 0000000000..5fce878a1c
--- /dev/null
+++ b/tests/util/test_asynccontextmanager.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import unittest
+
+from dffml.util.asynchelper import AsyncContextManagerList
+from dffml.util.asynctestcase import AsyncTestCase
+
+class OpenCloseTester(object):
+
+    def __init__(self):
+        self.isopen = False
+
+    async def __aenter__(self):
+        self.isopen = True
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        self.isopen = False
+
+class TestAsyncContextManagerList(AsyncTestCase):
+
+    async def test_open_close_all(self):
+        test_list = AsyncContextManagerList(OpenCloseTester(),
+                OpenCloseTester())
+        for listel in test_list:
+            self.assertFalse(listel.isopen)
+        async with test_list:
+            for listel in test_list:
+                self.assertTrue(listel.isopen)
+        for listel in test_list:
+            self.assertFalse(listel.isopen)
diff --git a/tests/util/test_cli.py b/tests/util/test_cli.py
new file mode 100644
index 0000000000..73988ff0b9
--- /dev/null
+++ b/tests/util/test_cli.py
@@ -0,0 +1,252 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import sys
+import json
+import asyncio
+import logging
+import unittest
+from unittest.mock import patch
+
+from dffml.repo import Repo
+from dffml.port import Port
+from dffml.feature import Feature, Features
+from dffml.source import Source, Sources
+from dffml.model import Model
+
+from dffml.util.cli import \
+        ParseSourcesAction, \
+        ParseFeaturesAction, \
+        ParseModelAction, \
+        ParsePortAction, \
+        ParseLoggingAction, \
+        Arg, \
+        JSONEncoder, \
+        CMD, \
+        Parser, \
+        ListEntrypoint, \
+        FeaturesCMD, \
+        ModelCMD
+
+from dffml.util.asynctestcase import AsyncTestCase
+
+def Namespace(**kwargs):
+    class MakeNamespace(object):
+        pass
+    for key, value in kwargs.items():
+        setattr(MakeNamespace, key, value)
+    return MakeNamespace
+
+class TestParseActions(unittest.TestCase):
+
+    def test_sources(self):
+        def load_from_dict(toload):
+            return toload
+        namespace = Namespace(sources=False)
+        with patch.object(Source, 'load_from_dict',
+                          new=load_from_dict) \
+                          as mock_method:
+            action = ParseSourcesAction(dest='sources', option_strings='')
+            action(None, namespace, ['first=src0', 'second=src1'])
+            self.assertEqual(len(namespace.sources), 2)
+            self.assertEqual(namespace.sources[0], 'first')
+            self.assertEqual(namespace.sources[1], 'second')
+            action(None, namespace, 'second=src2')
+            self.assertEqual(len(namespace.sources), 1)
+            self.assertEqual(namespace.sources[0], 'second')
+
+    def test_features(self):
+        dest, cls, parser = ('features', Features, ParseFeaturesAction)
+        namespace = Namespace(**{dest: False})
+        with patch.object(cls, 'load') as mock_method:
+            action = parser(dest=dest, option_strings='')
+            action(None, namespace, 'fake_%s' % (dest,))
+            mock_method.assert_called_once_with(*('fake_%s' % (dest,)))
+            self.assertTrue(getattr(namespace, dest, False))
+
+    def test_features_model_port(self):
+        for dest, cls, parser in [('model', Model, ParseModelAction),
+                                  ('port', Port, ParsePortAction)]:
+            namespace = Namespace(**{dest: False})
+            with self.subTest(dest=dest, cls=cls, parser=parser):
+                with patch.object(cls, 'load',
+                                  return_value=lambda: True) as mock_method:
+                    action = parser(dest=dest, option_strings='')
+                    action(None, namespace, 'fake_%s' % (dest,))
+                    mock_method.assert_called_once_with('fake_%s' % (dest,))
+                    self.assertTrue(getattr(namespace, dest, False))
+
+    def test_logging(self):
+        namespace = Namespace(log=False)
+        action = ParseLoggingAction(dest='log', option_strings='')
+        with patch.object(logging, 'basicConfig') as mock_method:
+            action(None, namespace, 'DEBUG')
+            mock_method.assert_called_once_with(level=logging.DEBUG)
+        with patch.object(logging, 'basicConfig') as mock_method:
+            action(None, namespace, 'WARNING')
+            mock_method.assert_called_once_with(level=logging.WARNING)
+
+class TestArg(unittest.TestCase):
+
+    def test_init(self):
+        arg = Arg('-test', key='value')
+        self.assertEqual(arg.name, '-test')
+        self.assertIn('key', arg)
+        self.assertEqual(arg['key'], 'value')
+
+    def test_modify(self):
+        arg = Arg('-test', key='value')
+        first = arg.modify(name='-first')
+        second = arg.modify(key='new_value')
+        self.assertEqual(arg.name, '-test')
+        self.assertEqual(first.name, '-first')
+        self.assertEqual(second.name, '-test')
+        self.assertEqual(second['key'], 'new_value')
+
+class TestJSONEncoder(unittest.TestCase):
+
+    def test_default(self):
+        class UnregisteredObject(object):
+            pass
+        with self.assertRaisesRegex(TypeError, 'not JSON serializable'):
+            json.dumps(UnregisteredObject, cls=JSONEncoder)
+
+    def test_repo(self):
+        self.assertIn('face', json.dumps(Repo('face'), cls=JSONEncoder))
+
+    def test_feature(self):
+        class FaceFeature(Feature):
+            NAME = 'face'
+        self.assertIn('face', json.dumps(FaceFeature(), cls=JSONEncoder))
+
+class TestCMD(AsyncTestCase):
+
+    def test_init(self):
+        class CMDTest(CMD):
+            arg_nope_present = Arg('nope', default=False)
+            arg_ignored = Arg('ignored')
+        cmd = CMDTest(nope=True)
+        self.assertTrue(getattr(cmd, 'log', False))
+        self.assertTrue(getattr(cmd, 'nope', False))
+
+    async def test_async_context_management(self):
+        async with CMD():
+            pass
+
+    async def test_parse_args(self):
+        with patch.object(Parser, 'add_subs') as mock_method:
+            await CMD.parse_args()
+            mock_method.assert_called_once_with(CMD)
+
+    async def test_cli_no_sub_command(self):
+        with patch.object(Parser, 'print_help') as mock_method:
+            await CMD.cli()
+            mock_method.assert_called_once()
+
+    async def test_cli_sub_command_without_run(self):
+        class Secondary(CMD):
+            pass
+        class Primary(CMD):
+            secondary = Secondary
+        with patch.object(Parser, 'print_help') as mock_method:
+            await Primary.cli('secondary')
+            mock_method.assert_called_once()
+
+    async def test_cli_run_sub_command_asyncgen(self):
+        class Secondary(CMD):
+            async def run(self):
+                yield 1
+        class Primary(CMD):
+            secondary = Secondary
+        self.assertEqual(sum(await Primary.cli('secondary')), 1)
+
+    async def test_cli_run_sub_command(self):
+        class Secondary(CMD):
+            async def run(self):
+                return 2
+        class Primary(CMD):
+            secondary = Secondary
+        self.assertEqual(await Primary.cli('secondary'), 2)
+
+    def test_sanitize_args(self):
+        args = {'cmd': True, 'non_internal': True}
+        args = CMD().sanitize_args(args)
+        self.assertNotIn('cmd', args)
+        self.assertIn('non_internal', args)
+
+    def test_main_result_none(self):
+        class Secondary(CMD):
+            async def run(self):
+                return None
+        class Primary(CMD):
+            secondary = Secondary
+        Primary.main(loop=asyncio.new_event_loop(), argv=['t', 'secondary'])
+
+    def test_main_result_not_none(self):
+        class Secondary(CMD):
+            async def run(self):
+                return True
+        class Primary(CMD):
+            secondary = Secondary
+        with patch.object(json, 'dump') as mock_method:
+            Primary.main(loop=asyncio.new_event_loop(), argv=['t', 'secondary'])
+            mock_method.assert_called_once()
+
+class TestParser(unittest.TestCase):
+
+    def test_add_subs(self):
+        class FakeSubCMD(CMD):
+            arg_test = Arg('-test')
+        class FakeCMD(CMD):
+            sub_cmd = FakeSubCMD
+        parser = Parser()
+        with patch.object(parser, 'add_subparsers') as mock_method:
+            parser.add_subs(FakeCMD)
+            mock_method.assert_called_once()
+        parser = Parser()
+        with patch.object(parser, 'add_subparsers') as mock_method:
+            parser.add_subs(FakeSubCMD)
+            with self.assertRaisesRegex(AssertionError, 'Called 0 times'):
+                mock_method.assert_called_once()
+
+class TestListEntrypoint(AsyncTestCase):
+
+    def test_display_no_docstring(self):
+        class FakeClass(CMD):
+            pass
+        with patch.object(sys.stdout, 'write') as mock_method:
+            ListEntrypoint().display(FakeClass)
+            with self.assertRaisesRegex(AssertionError, 'call not found'):
+                mock_method.assert_any_call('docstring!')
+
+    def test_display_docstring(self):
+        class FakeClass(CMD):
+            'docstring!'
+        with patch.object(sys.stdout, 'write') as mock_method:
+            ListEntrypoint().display(FakeClass)
+            mock_method.assert_any_call('docstring!')
+
+    async def test_run(self):
+        class FakeClass(CMD):
+            'docstring!'
+        class FakeEntrypoint(object):
+            @classmethod
+            def load(cls):
+                return [FakeClass]
+        class FakeListEntrypoint(ListEntrypoint):
+            ENTRYPOINT = FakeEntrypoint
+        with patch.object(sys.stdout, 'write') as mock_method:
+            await FakeListEntrypoint().run()
+            mock_method.assert_any_call('docstring!')
+
+class TestFeaturesCMD(unittest.TestCase):
+
+    def test_set_timeout(self):
+        cmd = FeaturesCMD(timeout=5)
+        self.assertEqual(cmd.features.timeout, 5)
+
+class TestModelCMD(unittest.TestCase):
+
+    def test_set_model_dir(self):
+        with patch.multiple(Model, __abstractmethods__=set()):
+            cmd = ModelCMD(model_dir='feed', model=Model)
+            self.assertEqual(cmd.model.model_dir, 'feed')
diff --git a/tests/util/test_entrypoint.py b/tests/util/test_entrypoint.py
new file mode 100644
index 0000000000..de03a8e9f4
--- /dev/null
+++ b/tests/util/test_entrypoint.py
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import os
+import unittest
+import pkg_resources
+from unittest.mock import patch
+from typing import Type
+
+from dffml.util.entrypoint import Entrypoint
+
+class Loadable(object):
+
+    def __init__(self, name: str, parent_class: Type[object]):
+        self.name = name
+        self.parent_class = parent_class
+
+    def load(self):
+        class NewClass(self.parent_class):
+            name = self.name
+        return NewClass
+
+class FakeEntrypoint(Entrypoint):
+
+    ENTRY_POINT = 'fake'
+
+class TestEntrypoint(unittest.TestCase):
+
+    FAKE_ITER = [
+        Loadable('one', FakeEntrypoint),
+        Loadable('two', object),
+        Loadable('three', FakeEntrypoint)
+        ]
+
+    def test_load_only_subclasses(self):
+        with patch.object(pkg_resources, 'iter_entry_points',
+                          return_value=self.FAKE_ITER) as mock_method:
+            loaded = FakeEntrypoint.load()
+            self.assertTrue(loaded)
+            names = [i.name for i in loaded]
+            for should_load in ['one', 'three']:
+                with self.subTest(should_load=should_load):
+                    self.assertIn(should_load, names)
+            with self.subTest(should_not_load='two'):
+                self.assertNotIn('two', names)
+
+    def test_load_given_name(self):
+        with patch.object(pkg_resources, 'iter_entry_points',
+                          return_value=self.FAKE_ITER) as mock_method:
+            loaded = FakeEntrypoint.load('three')
+            self.assertEqual('three', loaded.name)
+
+    def test_load_no_found(self):
+        with patch.object(pkg_resources, 'iter_entry_points',
+                          return_value=self.FAKE_ITER) as mock_method:
+            with self.assertRaisesRegex(KeyError, 'was not found in'):
+                FakeEntrypoint.load('four')
+
+    def test_load_multiple(self):
+        with patch.object(pkg_resources, 'iter_entry_points',
+                          return_value=self.FAKE_ITER) as mock_method:
+            loaded = FakeEntrypoint.load_multiple(['one', 'three'])
+            self.assertTrue(loaded)
+            self.assertIn('one', loaded)
+            self.assertNotIn('two', loaded)
+            self.assertIn('three', loaded)
diff --git a/tests/util/test_tempdir.py b/tests/util/test_tempdir.py
new file mode 100644
index 0000000000..81f221b38c
--- /dev/null
+++ b/tests/util/test_tempdir.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2019 Intel Corporation
+import os
+import unittest
+from typing import List
+
+from dffml.util.tempdir import TempDir
+from dffml.util.asynctestcase import AsyncTestCase
+
+class TestTempDir(unittest.TestCase):
+
+    def test_mktempdir(self):
+        dirname = TempDir().mktempdir()
+        self.assertEqual(os.path.isdir(dirname), True)
+        os.rmdir(dirname)
+
+    def test_rmtempdirs(self):
+        tempdir = TempDir()
+        dirname = tempdir.mktempdir()
+        self.assertEqual(os.path.isdir(dirname), True)
+        tempdir.rmtempdirs()
+        self.assertEqual(os.path.isdir(dirname), False)
+
+class TestTempDirAsyncContextManager(AsyncTestCase):
+
+    async def test_removes_on_aexit(self):
+        length: int = 10
+        dirs: List[str] = []
+        tempdir: TempDir = TempDir()
+        async with tempdir:
+            for _i in range(0, length):
+                dirs.append(tempdir.mktempdir())
+                self.assertTrue(os.path.isdir(dirs[-1]))
+        self.assertEqual(len(dirs), length)
+        for dirname in dirs:
+            self.assertFalse(os.path.exists(dirname))