diff --git a/helpers/report_hunter.py b/helpers/report_hunter.py
index 0158b3e5..4800f527 100644
--- a/helpers/report_hunter.py
+++ b/helpers/report_hunter.py
@@ -20,7 +20,7 @@
 from cpg_utils.config import get_config
 from metamist.graphql import gql, query
 
-from reanalysis.utils import get_logger
+from reanalysis.static_values import get_logger
 
 JINJA_TEMPLATE_DIR = Path(__file__).absolute().parent / 'templates'
 PROJECT_QUERY = gql(
diff --git a/reanalysis/hail_filter_sv.py b/reanalysis/hail_filter_sv.py
index c68a88c8..0a041878 100644
--- a/reanalysis/hail_filter_sv.py
+++ b/reanalysis/hail_filter_sv.py
@@ -22,7 +22,8 @@
     ONE_INT,
     MISSING_INT,
 )
-from reanalysis.utils import get_logger, read_json_from_path
+from reanalysis.utils import read_json_from_path
+from reanalysis.static_values import get_logger
 
 
 def filter_matrix_by_af(
diff --git a/reanalysis/interpretation_runner.py b/reanalysis/interpretation_runner.py
index f708195e..2dae2d1f 100644
--- a/reanalysis/interpretation_runner.py
+++ b/reanalysis/interpretation_runner.py
@@ -41,11 +41,10 @@
     seqr_loader,
 )
 from reanalysis.utils import (
-    FileTypes,
     identify_file_type,
-    get_granular_date,
-    get_logger,
 )
+from reanalysis.models import FileTypes
+from reanalysis.static_values import get_granular_date, get_logger
 
 # region: CONSTANTS
 # exact time that this run occurred
diff --git a/reanalysis/models.py b/reanalysis/models.py
index 11779a7a..25696e2c 100644
--- a/reanalysis/models.py
+++ b/reanalysis/models.py
@@ -4,13 +4,33 @@
 
 from enum import Enum
 from pydantic import BaseModel, Field
-from reanalysis.utils import get_granular_date
-
+from reanalysis.static_values import get_granular_date
 
 NON_HOM_CHROM = ['X', 'Y', 'MT', 'M']
 CHROM_ORDER = list(map(str, range(1, 23))) + NON_HOM_CHROM
 
 
+class VariantType(Enum):
+    """
+    enumeration of permitted variant types
+    """
+
+    SMALL = 'SMALL'
+    SV = 'SV'
+
+
+class FileTypes(Enum):
+    """
+    enumeration of permitted input file types
+    """
+
+    HAIL_TABLE = '.ht'
+    MATRIX_TABLE = '.mt'
+    VCF = '.vcf'
+    VCF_GZ = '.vcf.gz'
+    VCF_BGZ = '.vcf.bgz'
+
+
 class Coordinates(BaseModel):
     """
     A representation of genomic coordinates
@@ -61,24 +81,13 @@ def __eq__(self, other) -> bool:
         )
 
 
-class VariantType(Enum):
-    """
-    enumeration of permitted variant types
-    """
-
-    SMALL = 'SMALL'
-    SV = 'SV'
-
-
 class Variant(BaseModel):
     """
     the abstracted representation of a variant from any source
-    todo move some more of the parsing logic into here as an init?
     """
 
     coordinates: Coordinates = Field(repr=True)
-    info: dict[str, str | int | float] = Field(default_factory=dict)
-    categories: list[str] = Field(default_factory=list)
+    info: dict[str, str | int | float | list[str] | bool] = Field(default_factory=dict)
     het_samples: set[str] = Field(default_factory=set, exclude=True)
     hom_samples: set[str] = Field(default_factory=set, exclude=True)
     boolean_categories: list[str] = Field(default_factory=list, exclude=True)
@@ -153,7 +162,9 @@ def sample_support_only(self, sample_id: str) -> bool:
         Returns:
             True if support only
         """
-        return self.has_support and not self.sample_categorised_check(sample_id)
+        return self.has_support and not (
+            self.category_non_support or self.sample_categorised_check(sample_id)
+        )
 
     def category_values(self, sample: str) -> list[str]:
         """
@@ -227,8 +238,9 @@ def sample_category_check(self, sample_id: str, allow_support: bool = True) -> b
 class SmallVariant(Variant):
     depths: dict[str, int] = Field(default_factory=dict, exclude=True)
     ab_ratios: dict[str, float] = Field(default_factory=dict, exclude=True)
-    transcript_consequences: list[dict[str, str]] = Field(default_factory=list)
-    var_type: str = VariantType.SMALL.value
+    transcript_consequences: list[dict[str, str | float | int]] = Field(
+        default_factory=list
+    )
 
     def get_sample_flags(self, sample: str) -> list[str]:
         """
@@ -274,9 +286,6 @@ def check_ab_ratio(self, sample: str) -> list[str]:
 
 
 class StructuralVariant(Variant):
-
-    var_type: str = VariantType.SV.value
-
     def check_ab_ratio(self, *args, **kwargs) -> list[str]:
         """
         dummy method for AB ratio checking - not implemented for SVs
@@ -314,14 +323,7 @@ class ReportVariant(BaseModel):
     phenotypes: list[str] = Field(default_factory=list)
     labels: list[str] = Field(default_factory=list)
     first_seen: str = Field(default=get_granular_date())
-    independent: bool = False
-
-    @property
-    def is_independent(self):
-        """
-        check if this variant acts independently
-        """
-        return len(self.support_vars) == 0
+    independent: bool = Field(default=False)
 
     def __eq__(self, other):
         """
diff --git a/reanalysis/moi_tests.py b/reanalysis/moi_tests.py
index e287dacb..ef30ba64 100644
--- a/reanalysis/moi_tests.py
+++ b/reanalysis/moi_tests.py
@@ -12,14 +12,8 @@
 
 from cpg_utils.config import get_config
 
-from reanalysis.utils import (
-    AbstractVariant,
-    CompHetDict,
-    MinimalVariant,
-    ReportedVariant,
-    VariantType,
-    X_CHROMOSOME,
-)
+from reanalysis.models import SmallVariant, StructuralVariant, ReportVariant
+from reanalysis.utils import CompHetDict, X_CHROMOSOME
 
 # config keys to use for dominant MOI tests
 CALLSET_AF_SV_DOMINANT = 'callset_af_sv_dominant'
@@ -38,7 +32,7 @@
 
 def check_for_second_hit(
     first_variant: str, comp_hets: CompHetDict, sample: str
-) -> list[AbstractVariant]:
+) -> list[SmallVariant | StructuralVariant]:
     """
     checks for a second hit partner in this gene
 
@@ -46,10 +40,10 @@ def check_for_second_hit(
     {
         "SampleID": {
             "12-52287177-T-C": [
-                AbstractVariant(12-52287180-TGG-T)
+                Variant(12-52287180-TGG-T)
             ],
             "12-52287180-TGG-T": [
-                AbstractVariant(12-52287177-T-C)
+                Variant(12-52287177-T-C)
             ]
         } ...
     }
@@ -124,7 +118,7 @@ def run(
         principal_var,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
         run method - triggers each relevant inheritance model
 
@@ -165,10 +159,10 @@ def __init__(self, pedigree: Ped, applied_moi: str):
     @abstractmethod
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
         run all applicable inheritance patterns and finds good fits
         """
@@ -215,12 +209,12 @@ def check_familial_inheritance(
         return True
 
     def get_family_genotypes(
-        self, variant: AbstractVariant, sample_id: str
+        self, variant: SmallVariant | StructuralVariant, sample_id: str
     ) -> dict[str, str]:
         """
 
         Args:
-            variant (AbstractVariant):
+            variant (SmallVariant | StructuralVariant):
             sample_id (str): the sample ID to gather genotypes for
 
         Returns:
@@ -238,7 +232,7 @@ def get_sample_genotype(member_id: str, sex: str) -> str:
                 str: text representation of this genotype
             """
 
-            if variant.coords.chrom in X_CHROMOSOME:
+            if variant.coordinates.chrom in X_CHROMOSOME:
                 if sex == 'male' and (
                     member_id in variant.het_samples or member_id in variant.hom_samples
                 ):
@@ -281,7 +275,10 @@ def check_frequency_passes(info: dict, thresholds: dict[str, int | float]) -> bo
         return all({info.get(key, 0) <= test for key, test in thresholds.items()})
 
     def check_comp_het(
-        self, sample_id: str, variant_1: AbstractVariant, variant_2: AbstractVariant
+        self,
+        sample_id: str,
+        variant_1: SmallVariant | StructuralVariant,
+        variant_2: SmallVariant | StructuralVariant,
     ) -> bool:
         """
         use parents to accept or dismiss the comp-het
@@ -293,8 +290,8 @@ def check_comp_het(
 
         Args:
             sample_id (str): sample ID to check for
-            variant_1 (AbstractVariant): first variant of comp-het pair
-            variant_2 (AbstractVariant): second variant of comp-het pair
+            variant_1 (SmallVariant | StructuralVariant): first variant of comp-het pair
+            variant_2 (SmallVariant | StructuralVariant): second variant of comp-het pair
 
         Returns:
             bool: True if these two variants form a comp-het
@@ -339,12 +336,12 @@ def __init__(
 
         # prepare the AF test dicts
         self.freq_tests = {
-            VariantType.SMALL: {key: self.hom_threshold for key in INFO_HOMS}
+            SmallVariant.__name__: {key: self.hom_threshold for key in INFO_HOMS}
             | {
                 'gnomad_ac': self.ac_threshold,
                 'gnomad_af': self.ad_threshold,
             },
-            VariantType.SV: {
+            StructuralVariant.__name__: {
                 'af': self.sv_af_threshold,
                 SV_AF_KEY: self.sv_af_threshold,
             },
@@ -353,10 +350,10 @@ def __init__(
 
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
         Simplest MOI, exclusions based on HOM count and AF
         Args:
@@ -370,7 +367,7 @@ def run(
         # reject support for dominant MOI, apply checks based on var type
         if principal.support_only or not (
             self.check_frequency_passes(
-                principal.info, self.freq_tests[principal.info['var_type']]
+                principal.info, self.freq_tests[principal.__class__.__name__]
             )
         ):
             return classifications
@@ -400,16 +397,18 @@ def run(
                 continue
 
             classifications.append(
-                ReportedVariant(
+                ReportVariant(
                     sample=sample_id,
                     family=self.pedigree[sample_id].family_id,
                     gene=principal.info.get('gene_id'),
-                    var_data=MinimalVariant(variant=principal, sample=sample_id),
+                    var_data=principal,
+                    categories=principal.category_values(sample_id),
                     reasons={self.applied_moi},
                     genotypes=self.get_family_genotypes(
                         variant=principal, sample_id=sample_id
                     ),
                     flags=principal.get_sample_flags(sample_id),
+                    independent=True,
                 )
             )
 
@@ -432,21 +431,21 @@ def __init__(
 
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
         valid if present as compound het
         counts as being phased if a compound het is split between parents
 
         Args:
-            principal (AbstractVariant): main variant being evaluated
+            principal (SmallVariant | StructuralVariant): main variant being evaluated
             comp_het (dict): comp-het partners
             partial_pen (bool):
 
         Returns:
-            list[ReportedVariant]: data object if RecessiveAutosomal fits
+            list[ReportVariant]: data object if RecessiveAutosomal fits
         """
 
         if comp_het is None:
@@ -471,7 +470,7 @@ def run(
                 continue
 
             for partner_variant in check_for_second_hit(
-                first_variant=principal.coords.string_format,
+                first_variant=principal.coordinates.string_format,
                 comp_hets=comp_het,
                 sample=sample_id,
             ):
@@ -500,18 +499,20 @@ def run(
                     continue
 
                 classifications.append(
-                    ReportedVariant(
+                    ReportVariant(
                         sample=sample_id,
                         family=self.pedigree[sample_id].family_id,
                         gene=principal.info.get('gene_id'),
-                        var_data=MinimalVariant(principal, sample_id),
+                        var_data=principal,
+                        categories=principal.category_values(sample_id),
                         reasons={self.applied_moi},
                         genotypes=self.get_family_genotypes(
                             variant=principal, sample_id=sample_id
                         ),
-                        support_vars={partner_variant.coords.string_format},
+                        support_vars={partner_variant.coordinates.string_format},
                         flags=principal.get_sample_flags(sample_id)
                         + partner_variant.get_sample_flags(sample_id),
+                        independent=False,
                     ),
                 )
 
@@ -532,27 +533,27 @@ def __init__(
         """ """
         self.hom_threshold = get_config()['moi_tests'][GNOMAD_REC_HOM_THRESHOLD]
         self.freq_tests = {
-            VariantType.SMALL.value: {key: self.hom_threshold for key in INFO_HOMS},
-            VariantType.SV.value: {key: self.hom_threshold for key in SV_HOMS},
+            SmallVariant.__name__: {key: self.hom_threshold for key in INFO_HOMS},
+            StructuralVariant.__name__: {key: self.hom_threshold for key in SV_HOMS},
         }
         super().__init__(pedigree=pedigree, applied_moi=applied_moi)
 
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
         explicitly tests HOMs
 
         Args:
-            principal (AbstractVariant): main variant being evaluated
+            principal (SmallVariant | StructuralVariant): main variant being evaluated
             comp_het (dict): comp-het partners
             partial_pen (bool):
 
         Returns:
-            list[ReportedVariant]: data object if RecessiveAutosomal fits
+            list[ReportVariant]: data object if RecessiveAutosomal fits
         """
 
         classifications = []
@@ -560,7 +561,7 @@ def run(
         # remove if too many homs are present in population databases
         if principal.support_only or not (
             self.check_frequency_passes(
-                principal.info, self.freq_tests[principal.var_type]
+                principal.info, self.freq_tests[principal.__class__.__name__]
             )
             or principal.info.get('categoryboolean1')
         ):
@@ -590,18 +591,19 @@ def run(
             ):
                 continue
 
-            # todo make this a pydantic model
             classifications.append(
-                ReportedVariant(
+                ReportVariant(
                     sample=sample_id,
                     family=self.pedigree[sample_id].family_id,
                     gene=principal.info.get('gene_id'),
-                    var_data=MinimalVariant(principal, sample_id),
+                    var_data=principal,
+                    categories=principal.category_values(sample_id),
                     genotypes=self.get_family_genotypes(
                         variant=principal, sample_id=sample_id
                     ),
                     reasons={self.applied_moi},
                     flags=principal.get_sample_flags(sample_id),
+                    independent=True,
                 )
             )
 
@@ -630,13 +632,13 @@ def __init__(self, pedigree: Ped, applied_moi: str = 'X_Dominant'):
         self.hemi_threshold = get_config()['moi_tests'][GNOMAD_HEMI_THRESHOLD]
 
         self.freq_tests = {
-            VariantType.SMALL: {key: self.hom_threshold for key in INFO_HOMS}
+            SmallVariant.__name__: {key: self.hom_threshold for key in INFO_HOMS}
             | {key: self.hemi_threshold for key in INFO_HEMI}
             | {
-                'gnomad_ad': self.ad_threshold,
                 'gnomad_ac': self.ac_threshold,
+                'gnomad_af': self.ad_threshold,
             },
-            VariantType.SV: {key: self.hom_threshold for key in SV_HOMS}
+            StructuralVariant.__name__: {key: self.hom_threshold for key in SV_HOMS}
             | {key: self.hemi_threshold for key in SV_HEMI},
         }
 
@@ -644,10 +646,10 @@ def __init__(self, pedigree: Ped, applied_moi: str = 'X_Dominant'):
 
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
         if variant is present and sufficiently rare, we take it
         discarded if support
@@ -666,7 +668,7 @@ def run(
         # never apply dominant MOI to support variants
         # more stringent Pop.Freq checks for dominant - hemi restriction
         if not self.check_frequency_passes(
-            principal.info, self.freq_tests[principal.info['var_type']]
+            principal.info, self.freq_tests[principal.__class__.__name__]
         ):
             return classifications
 
@@ -698,16 +700,18 @@ def run(
                 continue
 
             classifications.append(
-                ReportedVariant(
+                ReportVariant(
                     sample=sample_id,
                     family=self.pedigree[sample_id].family_id,
                     gene=principal.info.get('gene_id'),
-                    var_data=MinimalVariant(principal, sample_id),
+                    var_data=principal,
+                    categories=principal.category_values(sample_id),
                     reasons={self.applied_moi},
                     genotypes=self.get_family_genotypes(
                         variant=principal, sample_id=sample_id
                     ),
                     flags=principal.get_sample_flags(sample_id),
+                    independent=True,
                 )
             )
         return classifications
@@ -736,9 +740,9 @@ def __init__(
         self.hemi_threshold = get_config()['moi_tests'][GNOMAD_HEMI_THRESHOLD]
 
         self.freq_tests = {
-            VariantType.SMALL: {key: self.hom_dom_threshold for key in INFO_HOMS}
+            SmallVariant.__name__: {key: self.hom_dom_threshold for key in INFO_HOMS}
             | {key: self.hemi_threshold for key in INFO_HEMI},
-            VariantType.SV: {key: self.hom_dom_threshold for key in SV_HOMS}
+            StructuralVariant.__name__: {key: self.hom_dom_threshold for key in SV_HOMS}
             | {key: self.hemi_threshold for key in SV_HEMI},
         }
 
@@ -746,10 +750,10 @@ def __init__(
 
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
         Args:
             principal ():
@@ -761,7 +765,7 @@ def run(
 
         # remove from analysis if too many homs are present in population databases
         if not self.check_frequency_passes(
-            principal.info, self.freq_tests[principal.info['var_type']]
+            principal.info, self.freq_tests[principal.__class__.__name__]
         ):
             return classifications
 
@@ -797,16 +801,18 @@ def run(
                 continue
 
             classifications.append(
-                ReportedVariant(
+                ReportVariant(
                     sample=sample_id,
                     family=self.pedigree[sample_id].family_id,
                     gene=principal.info.get('gene_id'),
-                    var_data=MinimalVariant(principal, sample_id),
+                    var_data=principal,
+                    categories=principal.category_values(sample_id),
                     genotypes=self.get_family_genotypes(
                         variant=principal, sample_id=sample_id
                     ),
                     reasons={self.applied_moi},
                     flags=principal.get_sample_flags(sample_id),
+                    independent=True,
                 )
             )
         return classifications
@@ -832,17 +838,19 @@ def __init__(
 
         self.hom_rec_threshold = get_config()['moi_tests'][GNOMAD_REC_HOM_THRESHOLD]
         self.freq_tests = {
-            VariantType.SMALL: {key: self.hom_rec_threshold for key in INFO_HOMS},
-            VariantType.SV: {key: self.hom_rec_threshold for key in SV_HOMS},
+            SmallVariant.__name__: {key: self.hom_rec_threshold for key in INFO_HOMS},
+            StructuralVariant.__name__: {
+                key: self.hom_rec_threshold for key in SV_HOMS
+            },
         }
         super().__init__(pedigree=pedigree, applied_moi=applied_moi)
 
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
 
         Args:
@@ -856,7 +864,7 @@ def run(
         # remove from analysis if too many homs are present in population databases
         if principal.support_only or not (
             self.check_frequency_passes(
-                principal.info, self.freq_tests[principal.info['var_type']]
+                principal.info, self.freq_tests[principal.__class__.__name__]
             )
             or principal.info.get('categoryboolean1')
         ):
@@ -890,16 +898,18 @@ def run(
                 continue
 
             classifications.append(
-                ReportedVariant(
+                ReportVariant(
                     sample=sample_id,
                     family=self.pedigree[sample_id].family_id,
                     gene=principal.info.get('gene_id'),
-                    var_data=MinimalVariant(principal, sample_id),
+                    var_data=principal,
+                    categories=principal.category_values(sample_id),
                     genotypes=self.get_family_genotypes(
                         variant=principal, sample_id=sample_id
                     ),
                     reasons={self.applied_moi},
                     flags=principal.get_sample_flags(sample_id),
+                    independent=True,
                 )
             )
         return classifications
@@ -925,17 +935,19 @@ def __init__(
 
         self.hom_rec_threshold = get_config()['moi_tests'][GNOMAD_REC_HOM_THRESHOLD]
         self.freq_tests = {
-            VariantType.SMALL: {key: self.hom_rec_threshold for key in INFO_HOMS},
-            VariantType.SV: {key: self.hom_rec_threshold for key in SV_HOMS},
+            SmallVariant.__name__: {key: self.hom_rec_threshold for key in INFO_HOMS},
+            StructuralVariant.__name__: {
+                key: self.hom_rec_threshold for key in SV_HOMS
+            },
         }
         super().__init__(pedigree=pedigree, applied_moi=applied_moi)
 
     def run(
         self,
-        principal: AbstractVariant,
+        principal: SmallVariant | StructuralVariant,
         comp_het: CompHetDict | None = None,
         partial_pen: bool = False,
-    ) -> list[ReportedVariant]:
+    ) -> list[ReportVariant]:
         """
 
         Args:
@@ -946,18 +958,16 @@ def run(
 
         if comp_het is None:
             comp_het = {}
-
         classifications = []
 
         # remove from analysis if too many homs are present in population databases
         if not (
             self.check_frequency_passes(
-                principal.info, self.freq_tests[principal.info['var_type']]
+                principal.info, self.freq_tests[principal.__class__.__name__]
             )
             or principal.info.get('categoryboolean1')
         ):
             return classifications
-
         het_females = {
             sam for sam in principal.het_samples if self.pedigree[sam].sex == 'female'
         }
@@ -979,7 +989,7 @@ def run(
                 continue
 
             for partner in check_for_second_hit(
-                first_variant=principal.coords.string_format,
+                first_variant=principal.coordinates.string_format,
                 comp_hets=comp_het,
                 sample=sample_id,
             ):
@@ -989,7 +999,7 @@ def run(
                     not partner.sample_category_check(sample_id, allow_support=True)
                     or not (
                         self.check_frequency_passes(
-                            partner.info, self.freq_tests[partner.info['var_type']]
+                            partner.info, self.freq_tests[partner.__class__.__name__]
                         )
                         or partner.info.get('categoryboolean1')
                     )
@@ -1011,18 +1021,20 @@ def run(
                     continue
 
                 classifications.append(
-                    ReportedVariant(
+                    ReportVariant(
                         sample=sample_id,
                         family=self.pedigree[sample_id].family_id,
                         gene=principal.info.get('gene_id'),
-                        var_data=MinimalVariant(principal, sample_id),
+                        var_data=principal,
+                        categories=principal.category_values(sample_id),
                         reasons={self.applied_moi},
                         genotypes=self.get_family_genotypes(
                             variant=principal, sample_id=sample_id
                         ),
-                        support_vars={partner.coords.string_format},
+                        support_vars={partner.coordinates.string_format},
                         flags=principal.get_sample_flags(sample_id)
                         + partner.get_sample_flags(sample_id),
+                        independent=False,
                     )
                 )
 
diff --git a/reanalysis/static_values.py b/reanalysis/static_values.py
new file mode 100644
index 00000000..d70f025a
--- /dev/null
+++ b/reanalysis/static_values.py
@@ -0,0 +1,68 @@
+"""
+This is a placeholder, completely base class to prevent circular imports
+"""
+import logging
+import sys
+
+from datetime import datetime
+
+from cpg_utils.config import get_config
+
+
+_GRANULAR_DATE: str | None = None
+LOGGER = None
+
+
+def get_granular_date():
+    """
+    cached getter/setter
+    """
+    global _GRANULAR_DATE
+    if _GRANULAR_DATE is None:
+        # allow an override here - synthetic historic runs
+        try:
+            if fake_date := get_config().get('workflow', {}).get('fake_date'):
+                _GRANULAR_DATE = fake_date
+        except AssertionError:
+            get_logger().warning('No date set in config, falling back to real Date')
+        if _GRANULAR_DATE is None:
+            _GRANULAR_DATE = datetime.now().strftime('%Y-%m-%d')
+    return _GRANULAR_DATE
+
+
+def get_logger(
+    logger_name: str = 'AIP-logger', log_level: int = logging.INFO
+) -> logging.Logger:
+    """
+    creates a logger instance (so as not to use the root logger)
+
+    Args:
+        logger_name (str):
+        log_level ():
+
+    Returns:
+        a logger instance, or the global logger if already defined
+    """
+    global LOGGER
+
+    if LOGGER is None:
+        # this very verbose logging is to ensure that the log level requested (INFO)
+        # doesn't cause the unintentional logging of every Metamist query
+        # create a named logger
+        LOGGER = logging.getLogger(logger_name)
+        LOGGER.setLevel(log_level)
+
+        # create a stream handler to write output
+        stream_handler = logging.StreamHandler(sys.stdout)
+        stream_handler.setLevel(log_level)
+
+        # create format string for messages
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s %(lineno)d - %(levelname)s - %(message)s'
+        )
+        stream_handler.setFormatter(formatter)
+
+        # set the logger to use this handler
+        LOGGER.addHandler(stream_handler)
+
+    return LOGGER
diff --git a/reanalysis/utils.py b/reanalysis/utils.py
index eccb5058..b5794901 100644
--- a/reanalysis/utils.py
+++ b/reanalysis/utils.py
@@ -2,11 +2,9 @@
 classes and methods shared across reanalysis components
 """
 
-import logging
-import sys
 import time
 from collections import defaultdict
-from dataclasses import dataclass, is_dataclass, field
+from dataclasses import dataclass, is_dataclass
 from datetime import datetime
 from enum import Enum
 from itertools import chain, combinations_with_replacement, islice
@@ -16,11 +14,22 @@
 
 import json
 import re
+
+import cyvcf2
 import requests
 
 from cpg_utils import to_path, Path as CPGPathType
 from cpg_utils.config import get_config
 
+from reanalysis.models import (
+    Coordinates,
+    ReportVariant,
+    SmallVariant,
+    StructuralVariant,
+    VariantType,
+    FileTypes,
+)
+from reanalysis.static_values import get_granular_date, get_logger
 
 HOMREF: int = 0
 HETALT: int = 1
@@ -35,8 +44,6 @@
 X_CHROMOSOME = {'X'}
 TODAY = datetime.now().strftime('%Y-%m-%d_%H:%M')
 
-_GRANULAR_DATE: str | None = None
-
 # most lenient to most conservative
 # usage = if we have two MOIs for the same gene, take the broadest
 ORDERED_MOIS = [
@@ -56,84 +63,6 @@
 # CONFIG_FIELDS = ['workflow']  # , 'filter', 'panels', 'categories']
 # assert all(field in get_config(False).keys() for field in CONFIG_FIELDS)
 
-LOGGER = None
-
-
-def get_logger(
-    logger_name: str = 'AIP-logger', log_level: int = logging.INFO
-) -> logging.Logger:
-    """
-    creates a logger instance (so as not to use the root logger)
-
-    Args:
-        logger_name (str):
-        log_level ():
-
-    Returns:
-        a logger instance, or the global logger if already defined
-    """
-    global LOGGER
-
-    if LOGGER is None:
-        # this very verbose logging is to ensure that the log level requested (INFO)
-        # doesn't cause the unintentional logging of every Metamist query
-        # create a named logger
-        LOGGER = logging.getLogger(logger_name)
-        LOGGER.setLevel(log_level)
-
-        # create a stream handler to write output
-        stream_handler = logging.StreamHandler(sys.stdout)
-        stream_handler.setLevel(log_level)
-
-        # create format string for messages
-        formatter = logging.Formatter(
-            '%(asctime)s - %(name)s %(lineno)d - %(levelname)s - %(message)s'
-        )
-        stream_handler.setFormatter(formatter)
-
-        # set the logger to use this handler
-        LOGGER.addHandler(stream_handler)
-
-    return LOGGER
-
-
-def get_granular_date():
-    """
-    cached getter/setter
-    """
-    global _GRANULAR_DATE
-    if _GRANULAR_DATE is None:
-        # allow an override here - synthetic historic runs
-        try:
-            if fake_date := get_config().get('workflow', {}).get('fake_date'):
-                _GRANULAR_DATE = fake_date
-        except AssertionError:
-            get_logger().info(f'No config loaded, falling back to {_GRANULAR_DATE}')
-        if _GRANULAR_DATE is None:
-            _GRANULAR_DATE = datetime.now().strftime('%Y-%m-%d')
-    return _GRANULAR_DATE
-
-
-class VariantType(Enum):
-    """
-    enumeration of permitted variant types
-    """
-
-    SMALL = 'SMALL'
-    SV = 'SV'
-
-
-class FileTypes(Enum):
-    """
-    enumeration of permitted input file types
-    """
-
-    HAIL_TABLE = '.ht'
-    MATRIX_TABLE = '.mt'
-    VCF = '.vcf'
-    VCF_GZ = '.vcf.gz'
-    VCF_BGZ = '.vcf.bgz'
-
 
 def chunks(iterable, chunk_size):
     """
@@ -200,55 +129,56 @@ def identify_file_type(file_path: str) -> FileTypes | Exception:
     raise TypeError(f'File cannot be definitively typed: {str(extensions)}')
 
 
-@dataclass
-class Coordinates:
-    """
-    a home for the positional variant attributes
-    """
-
-    chrom: str
-    pos: int
-    ref: str
-    alt: str
-
-    @property
-    def string_format(self) -> str:
-        """
-        forms a string representation: chr-pos-ref-alt
-        """
-        return f'{self.chrom}-{self.pos}-{self.ref}-{self.alt}'
-
-    def __lt__(self, other) -> bool:
-        """
-        enables positional sorting
-        """
-        # this will return False for same chrom and position
-        if self.chrom == other.chrom:
-            return self.pos < other.pos
-        # otherwise take the relative index from sorted chromosomes list
-        if self.chrom in CHROM_ORDER and other.chrom in CHROM_ORDER:
-            return CHROM_ORDER.index(self.chrom) < CHROM_ORDER.index(other.chrom)
-        # if self is on a canonical chromosome, sort before HLA/Decoy etc.
-        if self.chrom in CHROM_ORDER:
-            return True
-        return False
-
-    def __eq__(self, other) -> bool:
-        """
-        equivalence check
-        Args:
-            other (Coordinates):
-
-        Returns:
-            true if self == other
-
-        """
-        return (
-            self.chrom == other.chrom
-            and self.pos == other.pos
-            and self.ref == other.ref
-            and self.alt == other.alt
-        )
+#
+# @dataclass
+# class Coordinates:
+#     """
+#     a home for the positional variant attributes
+#     """
+#
+#     chrom: str
+#     pos: int
+#     ref: str
+#     alt: str
+#
+#     @property
+#     def string_format(self) -> str:
+#         """
+#         forms a string representation: chr-pos-ref-alt
+#         """
+#         return f'{self.chrom}-{self.pos}-{self.ref}-{self.alt}'
+#
+#     def __lt__(self, other) -> bool:
+#         """
+#         enables positional sorting
+#         """
+#         # this will return False for same chrom and position
+#         if self.chrom == other.chrom:
+#             return self.pos < other.pos
+#         # otherwise take the relative index from sorted chromosomes list
+#         if self.chrom in CHROM_ORDER and other.chrom in CHROM_ORDER:
+#             return CHROM_ORDER.index(self.chrom) < CHROM_ORDER.index(other.chrom)
+#         # if self is on a canonical chromosome, sort before HLA/Decoy etc.
+#         if self.chrom in CHROM_ORDER:
+#             return True
+#         return False
+#
+#     def __eq__(self, other) -> bool:
+#         """
+#         equivalence check
+#         Args:
+#             other (Coordinates):
+#
+#         Returns:
+#             true if self == other
+#
+#         """
+#         return (
+#             self.chrom == other.chrom
+#             and self.pos == other.pos
+#             and self.ref == other.ref
+#             and self.alt == other.alt
+#         )
 
 
 def get_json_response(url, max_retries=4, base_delay=1, max_delay=32):
@@ -426,6 +356,188 @@ def get_phase_data(samples, var) -> dict[str, dict[int, str]]:
     return dict(phased_dict)
 
 
+def organise_pm5(info_dict: dict[str, Any]) -> dict[str, Any]:
+    """
+    method dedicated to handling the new pm5 annotations
+
+    e.g. categorydetailsPM5=27037::Pathogenic::1+27048::Pathogenic::1;
+    1. break into component allele data
+
+    Returns:
+        None, updates self. attributes
+    """
+
+    if 'categorydetailspm5' not in info_dict:
+        return info_dict
+
+    pm5_content = info_dict.pop('categorydetailspm5')
+
+    # nothing to do here
+    if pm5_content == 'missing':
+        info_dict['categorybooleanpm5'] = 0
+        return info_dict
+
+    # current clinvar annotation, if any
+    current_clinvar = str(info_dict.get('clinvar_allele', 'not_this'))
+
+    # instantiate a dict to store csq-matched results
+    pm5_data = {}
+
+    # break the strings into a set
+    pm5_strings = set(pm5_content.split('+'))
+    for clinvar_entry in pm5_strings:
+
+        # fragment each entry
+        allele_id, stars = clinvar_entry.split('::')
+
+        # never consider the exact match, pm5 is always separate
+        if allele_id == current_clinvar:
+            continue
+
+        # if non-self, add to the dict
+        pm5_data[allele_id] = stars
+
+    # case where no non-self alleles were found
+    # assigning False and not-assigning are equivalent, just return
+    if pm5_data:
+        # set boolean category and specific data
+        info_dict['categorybooleanpm5'] = 1
+        info_dict['pm5_data'] = pm5_data
+    else:
+        info_dict['categorybooleanpm5'] = 0
+
+    return info_dict
+
+
+def create_small_variant(
+    var: cyvcf2.Variant,
+    samples: list[str],
+    as_singletons=False,
+    new_genes: dict[str, str] | None = None,
+):
+    """
+    takes a small variant and creates a Model from it
+
+    Args:
+        var ():
+        samples ():
+        as_singletons ():
+        new_genes ():
+    """
+    coordinates = Coordinates(
+        chrom=var.CHROM.replace('chr', ''), pos=var.POS, ref=var.REF, alt=var.ALT[0]
+    )
+    depths = dict(zip(samples, map(float, var.gt_depths)))  # type: ignore
+    info: dict[str, Any] = {x.lower(): y for x, y in var.INFO} | {
+        'seqr_link': coordinates.string_format
+    }
+    het_samples, hom_samples = get_non_ref_samples(variant=var, samples=samples)
+
+    # hot-swap cat 2 from a boolean to a sample list - if appropriate
+    if info.get('categoryboolean2', 0):
+        new_gene_samples = new_genes.get(info.get('gene_id'), '')
+
+        # if 'all', keep cohort-wide boolean flag
+        if new_gene_samples == 'all':
+            get_logger().debug('New applies to all samples')
+
+        # otherwise assign only a specific sample list
+        elif new_gene_samples:
+            _boolcat = info.pop('categoryboolean2')
+            info['categorysample2'] = new_gene_samples
+
+        # else just remove it - shouldn't happen in prod
+        else:
+            _boolcat = info.pop('categoryboolean2')
+
+    # set the class attributes
+    boolean_categories = [
+        key for key in info.keys() if key.startswith('categoryboolean')
+    ]
+    sample_categories = [key for key in info.keys() if key.startswith('categorysample')]
+    sample_support = [key for key in info.keys() if key.startswith('categorysupport')]
+
+    # overwrite with true booleans
+    for cat in sample_support + boolean_categories:
+        info[cat] = info.get(cat, 0) == 1
+
+    # sample categories are a list of strings or 'missing'
+    # if cohort runs as singletons, remove possibility of de novo
+    # if not singletons, split each into a list of sample IDs
+    for sam_cat in sample_categories:
+        if as_singletons and sam_cat in REMOVE_IN_SINGLETONS:
+            info[sam_cat] = []
+        else:
+            info[sam_cat] = (
+                info[sam_cat].split(',') if info[sam_cat] != 'missing' else []
+            )
+
+    # organise PM5
+    info = organise_pm5(info)
+    phased = get_phase_data(samples, var)
+    ab_ratios = dict(zip(samples, map(float, var.gt_alt_freqs)))
+    transcript_consequences = extract_csq(csq_contents=info.pop('csq', []))
+
+    return SmallVariant(
+        coordinates=coordinates,
+        info=info,
+        het_samples=het_samples,
+        hom_samples=hom_samples,
+        boolean_categories=boolean_categories,
+        sample_categories=sample_categories,
+        sample_support=sample_support,
+        phased=phased,
+        depths=depths,
+        ab_ratios=ab_ratios,
+        transcript_consequences=transcript_consequences,
+    )
+
+
+def create_structural_variant(var: cyvcf2.Variant, samples: list[str]):
+    """
+    takes an SV and creates a Model from it
+    far less complicated than the SmallVariant model
+
+    Args:
+        var ():
+        samples ():
+    """
+
+    info: dict[str, Any] = {x.lower(): y for x, y in var.INFO}
+
+    # this is the right ID for Seqr
+    info['seqr_link'] = info['variantid']
+
+    coordinates = Coordinates(
+        chrom=var.CHROM.replace('chr', ''),
+        pos=var.POS,
+        ref=var.ALT[0],
+        alt=info['svlen'],
+    )
+
+    het_samples, hom_samples = get_non_ref_samples(variant=var, samples=samples)
+
+    # set the class attributes
+    boolean_categories = [
+        key for key in info.keys() if key.startswith('categoryboolean')
+    ]
+
+    # overwrite with true booleans
+    for cat in boolean_categories:
+        info[cat] = info.get(cat, 0) == 1
+
+    phased = get_phase_data(samples, var)
+
+    return StructuralVariant(
+        coordinates=coordinates,
+        info=info,
+        het_samples=het_samples,
+        hom_samples=hom_samples,
+        boolean_categories=boolean_categories,
+        phased=phased,
+    )
+
+
 @dataclass
 class AbstractVariant:
     """
@@ -438,7 +550,6 @@ def __init__(
         samples: list[str],
         as_singletons=False,
         new_genes: dict[str, str] | None = None,
-        var_type: VariantType = VariantType.SMALL,
     ):
         """
         Intention - this works for both small and structural variants
@@ -454,10 +565,6 @@ def __init__(
         # overwrite the non-standard cyvcf2 representation
         self.info: dict[str, Any] = {x.lower(): y for x, y in var.INFO}
 
-        # presumption of small variant/indel unless otherwise specified
-        # we could bulk this out as index, snv, etc...
-        self.info['var_type'] = var_type
-
         # extract the coordinates into a separate object
         # bump depths for SV calls
         if 'svtype' in self.info:
@@ -467,7 +574,6 @@ def __init__(
             # artificial depths used to trick logic
             self.depths = {sam: 999 for sam in samples}
             self.info['seqr_link'] = self.info['variantid']
-            self.info['var_type'] = VariantType.SV
 
         else:
             self.coords = Coordinates(
@@ -782,82 +888,10 @@ def check_ab_ratio(self, sample: str) -> list[str]:
         return []
 
 
-class MinimalVariant:
-    """
-    subset of the AbstractVariant data type
-    todo this is redundant with a model_dump exclude
-    https://docs.pydantic.dev/latest/concepts/serialization/#advanced-include-and-exclude
-    """
-
-    def __init__(self, variant: AbstractVariant, sample: str):
-        self.coords: Coordinates = variant.coords
-        self.categories: list[str] = variant.category_values(sample)
-        # no need to carry these though to the report
-        avoid_flags = (
-            variant.sample_categories
-            + variant.boolean_categories
-            + variant.sample_support
-        )
-        self.info: dict[str, Any] = {
-            key: value for key, value in variant.info.items() if key not in avoid_flags
-        }
-        self.transcript_consequences = variant.transcript_consequences
-        self.phased = variant.phased
-
-
 # CompHetDict structure: {sample: {variant_string: [variant, ...]}}
 # sample: string, e,g, CGP12345
-CompHetDict = dict[str, dict[str, list[AbstractVariant]]]
-GeneDict = dict[str, list[AbstractVariant]]
-
-
-@dataclass
-class ReportedVariant:
-    """
-    minimal model representing variant categorisation event
-    the initial variant (minimised)
-    the MOI applicable
-    the support ing variant(s), if any
-    allows for the presence of flags e.g. Borderline AB ratio
-
-    todo should self.categories (vardata.categories)
-    todo actually be populated here instead of in the variant?
-    """
-
-    sample: str
-    family: str
-    gene: str
-    var_data: MinimalVariant
-    reasons: set[str]
-    genotypes: dict[str, str]
-    support_vars: set[str] = field(default_factory=set)
-    flags: list[str] = field(default_factory=list)
-    panels: dict[str, str | list[int]] = field(default_factory=dict)
-    phenotypes: list[str] = field(default_factory=list)
-    labels: list[str] = field(default_factory=list)
-    first_seen: str = get_granular_date()
-    independent: bool = False
-
-    @property
-    def is_independent(self):
-        """
-        check if this variant acts independently
-        """
-        return len(self.support_vars) == 0
-
-    def __eq__(self, other):
-        """
-        makes reported variants comparable
-        """
-        # self_supvar = set(self.support_vars)
-        # other_supvar = set(other.support_vars)
-        return (
-            self.sample == other.sample
-            and self.var_data.coords == other.var_data.coords
-        )
-
-    def __lt__(self, other):
-        return self.var_data.coords < other.var_data.coords
+CompHetDict = dict[str, dict[str, list[SmallVariant | StructuralVariant]]]
+GeneDict = dict[str, list[SmallVariant | StructuralVariant]]
 
 
 def canonical_contigs_from_vcf(reader) -> set[str]:
@@ -921,16 +955,16 @@ def gather_gene_dict_from_contig(
     # if contig has no variants, prints an error and returns []
     for variant in variant_source(contig):
 
-        abs_var = AbstractVariant(
+        abs_var = create_small_variant(
             var=variant,
             samples=variant_source.samples,
             as_singletons=singletons,
             new_genes=new_gene_map,
         )
 
-        if abs_var.coords.string_format in blacklist:
+        if abs_var.coordinates.string_format in blacklist:
             get_logger().info(
-                f'Skipping blacklisted variant: {abs_var.coords.string_format}'
+                f'Skipping blacklisted variant: {abs_var.coordinates.string_format}'
             )
             continue
 
@@ -948,8 +982,8 @@ def gather_gene_dict_from_contig(
         second_source_variants = 0
         for variant in second_source(contig):
             # create an abstract SV variant
-            abs_var = AbstractVariant(
-                var=variant, samples=second_source.samples, as_singletons=singletons
+            abs_var = create_structural_variant(
+                var=variant, samples=second_source.samples
             )
             # update the variant count
             second_source_variants += 1
@@ -1109,6 +1143,7 @@ def extract_csq(csq_contents) -> list[dict]:
     return txc_dict
 
 
+# todo remove completely, use pydantic
 class CustomEncoder(json.JSONEncoder):
     """
     to be used as a JSON encoding class
@@ -1126,7 +1161,7 @@ def default(self, o):
             o (): python object being JSON encoded
         """
 
-        if is_dataclass(o) or isinstance(o, MinimalVariant):
+        if is_dataclass(o) or isinstance(o, (SmallVariant, StructuralVariant)):
             return o.__dict__
         if isinstance(o, set):
             return list(o)
@@ -1135,7 +1170,9 @@ def default(self, o):
         return json.JSONEncoder.default(self, o)
 
 
-def find_comp_hets(var_list: list[AbstractVariant], pedigree) -> CompHetDict:
+def find_comp_hets(
+    var_list: list[SmallVariant | StructuralVariant], pedigree
+) -> CompHetDict:
     """
     manual implementation to find compound hets
     variants provided in the format
@@ -1150,7 +1187,7 @@ def find_comp_hets(var_list: list[AbstractVariant], pedigree) -> CompHetDict:
     }
 
     Args:
-        var_list (list[AbstractVariant]): all variants in this gene
+        var_list (list[SmallVariant | StructuralVariant]): all variants in this gene
         pedigree (): Peddy.ped
     """
 
@@ -1299,7 +1336,7 @@ def find_latest_file(
 
 
 def date_annotate_results(
-    current: dict[str, dict | list[ReportedVariant]], historic: dict | None = None
+    current: dict[str, dict | list[ReportVariant]], historic: dict | None = None
 ) -> tuple[dict, dict]:
     """
     takes the current data, and annotates with previous dates if found
diff --git a/reanalysis/validate_categories.py b/reanalysis/validate_categories.py
index 4d0b1ec2..55d6a153 100644
--- a/reanalysis/validate_categories.py
+++ b/reanalysis/validate_categories.py
@@ -24,6 +24,7 @@
 from cpg_utils import to_path
 from cpg_utils.config import get_config
 
+from reanalysis.models import ReportVariant
 from reanalysis.moi_tests import MOIRunner, PEDDY_AFFECTED
 from reanalysis.utils import (
     canonical_contigs_from_vcf,
@@ -31,14 +32,12 @@
     find_comp_hets,
     gather_gene_dict_from_contig,
     get_cohort_config,
-    get_granular_date,
-    get_logger,
     get_new_gene_map,
     read_json_from_path,
     CustomEncoder,
     GeneDict,
-    ReportedVariant,
 )
+from reanalysis.static_values import get_granular_date, get_logger
 
 AMBIGUOUS_FLAG = 'Ambiguous Cat.1 MOI'
 MALE_FEMALE = {'male', 'female'}
@@ -98,7 +97,7 @@ def apply_moi_to_variants(
     moi_lookup: dict[str, MOIRunner],
     panelapp_data: dict[str, dict[str, str | bool]],
     pedigree: Ped,
-) -> list[ReportedVariant]:
+) -> list[ReportVariant]:
     """
     take all variants on a given contig & MOI filters
     find all variants/compound hets which fit the PanelApp MOI
@@ -145,7 +144,7 @@ def apply_moi_to_variants(
             variant_results = runner.run(
                 principal_var=variant,
                 comp_het=comp_het_dict,
-                partial_pen=variant.info.get('categoryboolean1', False),
+                partial_pen=bool(variant.info.get('categoryboolean1', False)),
             )
 
             # Flag! If this is a Category 1 (ClinVar) variant, and we are
@@ -172,11 +171,11 @@ def apply_moi_to_variants(
 
 def clean_and_filter(
     results_holder: dict,
-    result_list: list[ReportedVariant],
+    result_list: list[ReportVariant],
     panelapp_data: dict,
     dataset: str,
     participant_panels: dict | None = None,
-) -> dict[str, list[ReportedVariant]]:
+) -> dict[str, list[ReportVariant]]:
     """
     It's possible 1 variant can be classified multiple ways
     e.g. different MOIs (dominant and comp het)
@@ -190,7 +189,7 @@ def clean_and_filter(
 
     Args:
         results_holder (): container for all results data
-        result_list (): list of all ReportedVariant events
+        result_list (): list of all ReportVariant events
         panelapp_data ():
         dataset (str): dataset to use for getting the config portion
         participant_panels ():
@@ -215,8 +214,6 @@ def clean_and_filter(
 
     for each_event in result_list:
 
-        each_event.independent = each_event.is_independent
-
         # grab some attributes from the event
         sample = each_event.sample
         gene = each_event.gene
diff --git a/test/conftest.py b/test/conftest.py
index 7289537e..db0b59af 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -12,6 +12,7 @@
 
 from cpg_utils.config import set_config_paths
 
+
 # force this to come first
 PWD = Path(__file__).parent
 INPUT = PWD / 'input'
@@ -27,9 +28,9 @@
     SneakyTable,
 )
 
-from reanalysis.utils import AbstractVariant, read_json_from_path  # noqa: E402
+from reanalysis.utils import read_json_from_path, create_small_variant  # noqa: E402
 
-LABELLED = INPUT / '1_labelled_variant.vcf.bgz'
+LABELLED = str(INPUT / '1_labelled_variant.vcf.bgz')
 AIP_OUTPUT = INPUT / 'aip_output_example.json'
 DE_NOVO_PED = INPUT / 'de_novo_ped.fam'
 FAKE_OBO = INPUT / 'hpo_test.obo'
@@ -150,7 +151,7 @@ def fixture_phased_trio_variants():
     """path to the phased trio VCF"""
 
     vcf_reader = VCFReader(PHASED_TRIO)
-    two_variants = [AbstractVariant(var, vcf_reader.samples) for var in vcf_reader]
+    two_variants = [create_small_variant(var, vcf_reader.samples) for var in vcf_reader]
     return two_variants
 
 
@@ -177,7 +178,17 @@ def fixture_trio_abs_variant():
     vcf_reader = VCFReader(LABELLED)
     cyvcf_var = next(vcf_reader)
 
-    return AbstractVariant(cyvcf_var, vcf_reader.samples)
+    return create_small_variant(cyvcf_var, vcf_reader.samples)
+
+
+@pytest.fixture(name='cyvcf_example_variant')
+def fixture_cyvcf_variant():
+    """
+    sends the location of the Trio Pedigree (PLINK)
+    Cat. 3, and Cat. 4 for PROBAND only
+    """
+    vcf_reader = VCFReader(LABELLED)
+    return next(vcf_reader)
 
 
 @pytest.fixture(name='two_trio_abs_variants')
@@ -188,7 +199,7 @@ def fixture_two_trio_abs_variants():
     2) Cat. 1 + 3, and Cat. 4 for PROBAND only
     """
     vcf_reader = VCFReader(LABELLED)
-    two_variants = [AbstractVariant(var, vcf_reader.samples) for var in vcf_reader]
+    two_variants = [create_small_variant(var, vcf_reader.samples) for var in vcf_reader]
     return two_variants
 
 
diff --git a/test/test_moi_tests.py b/test/test_moi_tests.py
index e0a2009c..5df782e0 100644
--- a/test/test_moi_tests.py
+++ b/test/test_moi_tests.py
@@ -1,9 +1,6 @@
 """
 tests relating to the MOI filters
 """
-# mypy: ignore-errors
-from dataclasses import dataclass, field
-from typing import Any, Dict, List
 
 from unittest import mock
 
@@ -17,7 +14,6 @@
     MOIRunner,
     RecessiveAutosomalCH,
     RecessiveAutosomalHomo,
-    VariantType,
     XDominant,
     XRecessiveMale,
     XRecessiveFemaleCH,
@@ -31,130 +27,6 @@
 TEST_COORDS_X_2 = Coordinates(chrom='X', pos=2, ref='G', alt='T')
 
 
-@dataclass
-class SimpleVariant:
-    """
-    a fake version of AbstractVariant
-    """
-
-    info: Dict[str, Any]
-    coords: Coordinates
-    het_samples: set[str] = field(default_factory=set)
-    hom_samples: set[str] = field(default_factory=set)
-    categoryboolean1: bool = True
-    categorysample4: list[str] = field(default_factory=list)
-    ab_ratios = {'nobody': 1.0}
-    depths = {'female': 11, 'male': 11}
-    sample_categories = ['categorysample4']
-    boolean_categories = ['categoryboolean1']
-    sample_support = []
-    transcript_consequences = []
-    phased = {}
-    var_type = VariantType.SMALL.value
-
-    def sample_category_check(self, sample, allow_support=True):
-        """
-        :param sample:
-        :param allow_support:
-        """
-        _phony = allow_support
-        return self.categoryboolean1 or sample in self.categorysample4
-
-    def get_sample_flags(self, *args, **kwargs):
-        """
-        dummy method
-        """
-        if args and kwargs and self:
-            pass
-        return []
-
-    @staticmethod
-    def category_values(sample):
-        """
-        quick mock method
-        """
-        return [sample]
-
-    @property
-    def support_only(self):
-        """pass"""
-        return False
-
-
-@dataclass
-class RecessiveSimpleVariant:
-    """
-    a fake version of AbstractVariant
-    """
-
-    coords: Coordinates
-    ab_ratios: dict[str, float]
-    info: dict[str, Any] = field(default_factory=dict)
-    depths = {'female': 11, 'male': 11}
-    het_samples: set[str] = field(default_factory=set)
-    hom_samples: set[str] = field(default_factory=set)
-    categorysample4: list[str] = field(default_factory=list)
-    categoryboolean1: bool = True
-    boolean_categories = ['categoryboolean1']
-    sample_categories = ['categorysample4']
-    sample_support: list = field(default_factory=list)
-    transcript_consequences: list = field(default_factory=list)
-    phased: dict = field(default_factory=dict)
-    var_type = VariantType.SMALL
-
-    def sample_de_novo(self, sample):
-        """
-        :param sample:
-        """
-        return sample in self.categorysample4
-
-    def sample_category_check(self, sample, allow_support: bool = False):
-        """
-        Args:
-            sample ():
-            allow_support (bool): just for the consistent API
-        """
-        _phony = allow_support
-        return (sample in self.categorysample4) or self.categoryboolean1
-
-    def check_ab_ratio(self, sample) -> list[str]:
-        """
-        pass
-        """
-
-        het = sample in self.het_samples
-        hom = sample in self.hom_samples
-        variant_ab = self.ab_ratios.get(sample, 0.0)
-        if (
-            (variant_ab <= 0.15)
-            or (het and not 0.25 <= variant_ab <= 0.75)
-            or (hom and variant_ab <= 0.85)
-        ):
-            return ['AB Ratio']
-        return []
-
-    def get_sample_flags(self, sample: str):
-        """
-        gets all report flags for this sample
-        """
-        return self.check_ab_ratio(sample)
-
-    def category_values(self, sample):
-        """
-        quick mock method
-        """
-        return [sample]
-
-    @property
-    def support_only(self):
-        """pass"""
-        return False
-
-    def sample_support_only(self, sample_id: str) -> bool:
-        """dummy method - this will cause issues"""
-        return sample_id == 'dumdum'
-
-
 @pytest.mark.parametrize(
     'first,comp_hets,sample,values',
     (
@@ -205,7 +77,7 @@ def test_check_second_hit(first, comp_hets, sample, values):
         ('Hemi_Bi_In_Female', ['XRecessive']),
     ),
 )
-def test_moi_runner(moi_string: str, filters: List[str], peddy_ped):
+def test_moi_runner(moi_string: str, filters: list[str], peddy_ped):
     """
 
     :param moi_string:
@@ -231,19 +103,18 @@ def test_dominant_autosomal_fails_on_depth(peddy_ped):
         'gnomad_af': 0.0001,
         'gnomad_ac': 0,
         'gnomad_hom': 0,
-        'var_type': VariantType.SMALL,
+        'gene_id': 'TEST1',
     }
 
     dom = DominantAutosomal(pedigree=peddy_ped)
 
     # passes with heterozygous
-    shallow_variant = SimpleVariant(
+    shallow_variant = SmallVariant(
         info=info_dict,
         het_samples={'male'},
-        hom_samples=set(),
-        coords=TEST_COORDS,
+        coordinates=TEST_COORDS,
+        depths={'male': 1},
     )
-    shallow_variant.depths = {'male': 1}
     results = dom.run(principal=shallow_variant)  # noqa
     assert len(results) == 0
 
@@ -258,30 +129,44 @@ def test_dominant_autosomal_passes(peddy_ped):
         'gnomad_af': 0.0001,
         'gnomad_ac': 0,
         'gnomad_hom': 0,
-        'var_type': VariantType.SMALL,
+        'cat1': True,
+        'gene_id': 'TEST1',
     }
 
+    # attributes relating to categorisation
+    boolean_categories = ['cat1']
+
     dom = DominantAutosomal(pedigree=peddy_ped)
 
     # passes with heterozygous
-    passing_variant = SimpleVariant(
-        info=info_dict, het_samples={'male'}, hom_samples=set(), coords=TEST_COORDS
+    passing_variant = SmallVariant(
+        info=info_dict,
+        het_samples={'male'},
+        coordinates=TEST_COORDS,
+        boolean_categories=boolean_categories,
+        depths={'male': 999},
     )
     results = dom.run(principal=passing_variant)
     assert len(results) == 1
     assert results[0].reasons == {'Autosomal Dominant'}
 
     # also passes with homozygous
-    passing_variant = SimpleVariant(
-        info=info_dict, het_samples=set(), hom_samples={'male'}, coords=TEST_COORDS
+    passing_variant = SmallVariant(
+        info=info_dict,
+        hom_samples={'male'},
+        coordinates=TEST_COORDS,
+        boolean_categories=boolean_categories,
+        depths={'male': 999},
     )
     results = dom.run(principal=passing_variant)
     assert len(results) == 1
     assert results[0].reasons == {'Autosomal Dominant'}
 
     # no results if no samples
-    passing_variant = SimpleVariant(
-        info=info_dict, het_samples=set(), hom_samples=set(), coords=TEST_COORDS
+    passing_variant = SmallVariant(
+        info=info_dict,
+        coordinates=TEST_COORDS,
+        boolean_categories=boolean_categories,
     )
     assert len(dom.run(principal=passing_variant)) == 0
 
@@ -289,8 +174,8 @@ def test_dominant_autosomal_passes(peddy_ped):
 @pytest.mark.parametrize(
     'info',
     [
-        {'gnomad_af': 0.1, 'var_type': VariantType.SMALL},
-        {'gnomad_hom': 2, 'var_type': VariantType.SMALL},
+        {'gnomad_af': 0.1},
+        {'gnomad_hom': 2},
     ],
 )
 def test_dominant_autosomal_fails(info, peddy_ped):
@@ -303,8 +188,8 @@ def test_dominant_autosomal_fails(info, peddy_ped):
     dom = DominantAutosomal(pedigree=peddy_ped)
 
     # fails due to high af
-    failing_variant = SimpleVariant(
-        info=info, het_samples={'male'}, hom_samples=set(), coords=TEST_COORDS
+    failing_variant = SmallVariant(
+        info=info, het_samples={'male'}, coordinates=TEST_COORDS
     )
     assert not dom.run(principal=failing_variant)
 
@@ -316,11 +201,11 @@ def test_recessive_autosomal_hom_passes(peddy_ped):
     """
     passing_variant = SmallVariant(
         hom_samples={'male'},
-        info={'categoryboolean1': True},
         coordinates=TEST_COORDS,
         ab_ratios={'male': 1.0},
         depths={'male': 15},
         boolean_categories=['categoryboolean1'],
+        info={'categoryboolean1': True, 'gene_id': 'TEST1'},
     )
     rec = RecessiveAutosomalHomo(pedigree=peddy_ped)
     results = rec.run(passing_variant)
@@ -334,11 +219,13 @@ def test_recessive_autosomal_hom_passes_with_ab_flag(peddy_ped):
     we accept a homozygous variant as a Recessive
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         hom_samples={'male'},
-        coords=TEST_COORDS,
+        coordinates=TEST_COORDS,
         ab_ratios={'male': 0.4},
-        info={'var_type': VariantType.SMALL.value},
+        depths={'male': 40},
+        boolean_categories=['categoryboolean1'],
+        info={'categoryboolean1': True, 'gene_id': 'TEST1'},
     )
     rec = RecessiveAutosomalHomo(pedigree=peddy_ped)
     results = rec.run(passing_variant)
@@ -354,17 +241,21 @@ def test_recessive_autosomal_comp_het_male_passes(peddy_ped):
     we accept a heterozygous variant as a Comp-Het
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'male'},
-        coords=TEST_COORDS,
+        coordinates=TEST_COORDS,
         ab_ratios={'male': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'male': 50},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
-    passing_variant2 = RecessiveSimpleVariant(
+    passing_variant2 = SmallVariant(
         het_samples={'male'},
-        coords=TEST_COORDS2,
+        coordinates=TEST_COORDS2,
         ab_ratios={'male': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'male': 50},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
     comp_hets = {'male': {TEST_COORDS.string_format: [passing_variant2]}}
     rec = RecessiveAutosomalCH(pedigree=peddy_ped)
@@ -380,17 +271,21 @@ def test_recessive_autosomal_comp_het_male_passes_partner_flag(peddy_ped):
     we accept a heterozygous variant as a Comp-Het
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'male'},
-        coords=TEST_COORDS,
+        coordinates=TEST_COORDS,
         ab_ratios={'male': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'male': 50},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
-    passing_variant2 = RecessiveSimpleVariant(
+    passing_variant2 = SmallVariant(
         het_samples={'male'},
-        coords=TEST_COORDS2,
+        coordinates=TEST_COORDS2,
         ab_ratios={'male': 1.0},
-        info={'var_type': VariantType.SMALL},
+        depths={'male': 50},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
     comp_hets = {'male': {TEST_COORDS.string_format: [passing_variant2]}}
     rec = RecessiveAutosomalCH(pedigree=peddy_ped)
@@ -408,17 +303,21 @@ def test_recessive_autosomal_comp_het_female_passes(peddy_ped):
     :return:
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS,
+        coordinates=TEST_COORDS,
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 50},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
-    passing_variant2 = RecessiveSimpleVariant(
+    passing_variant2 = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS2,
+        coordinates=TEST_COORDS2,
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 50},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
     comp_hets = {'female': {TEST_COORDS.string_format: [passing_variant2]}}
     rec = RecessiveAutosomalCH(pedigree=peddy_ped)
@@ -437,11 +336,11 @@ def test_recessive_autosomal_comp_het_fails_no_ch_return(peddy_ped):
     :return:
     """
 
-    failing_variant = SimpleVariant(
-        info={'var_type': VariantType.SMALL},
+    failing_variant = SmallVariant(
+        info={'gene_id': 'TEST1'},
         het_samples={'male'},
-        hom_samples=set(),
-        coords=TEST_COORDS,
+        depths={'male': 50},
+        coordinates=TEST_COORDS,
     )
     rec = RecessiveAutosomalCH(pedigree=peddy_ped)
     assert not rec.run(failing_variant)
@@ -456,17 +355,19 @@ def test_recessive_autosomal_comp_het_fails_no_paired_call(peddy_ped):
     :return:
     """
 
-    failing_variant = RecessiveSimpleVariant(
+    failing_variant = SmallVariant(
         het_samples={'male'},
-        coords=TEST_COORDS,
+        coordinates=TEST_COORDS,
         ab_ratios={'male': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'male': 50},
+        info={'gene_id': 'TEST1'},
     )
-    failing_variant2 = RecessiveSimpleVariant(
+    failing_variant2 = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS2,
+        coordinates=TEST_COORDS2,
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 50},
+        info={'gene_id': 'TEST1'},
     )
 
     rec = RecessiveAutosomalCH(pedigree=peddy_ped)
@@ -477,7 +378,7 @@ def test_recessive_autosomal_comp_het_fails_no_paired_call(peddy_ped):
 
 
 @pytest.mark.parametrize(
-    'info', [{'gnomad_hom': 3, 'var_type': VariantType.SMALL}]
+    'info', [{'gnomad_hom': 3, 'gene_id': 'TEST1'}]
 )  # threshold is 2
 def test_recessive_autosomal_hom_fails(info, peddy_ped):
     """
@@ -485,8 +386,8 @@ def test_recessive_autosomal_hom_fails(info, peddy_ped):
     we have no confirmed MOI
     """
 
-    failing_variant = SimpleVariant(
-        info=info, het_samples={'male'}, hom_samples={'male'}, coords=TEST_COORDS
+    failing_variant = SmallVariant(
+        info=info, het_samples={'male'}, hom_samples={'male'}, coordinates=TEST_COORDS
     )
     rec = RecessiveAutosomalHomo(pedigree=peddy_ped)
     assert not rec.run(failing_variant)
@@ -497,10 +398,12 @@ def test_x_dominant_female_and_male_het_passes(peddy_ped):
     check that a male is accepted as a het
     :return:
     """
-    passing_variant = SimpleVariant(
-        info={'gnomad_hemi': 0, 'var_type': VariantType.SMALL},
+    passing_variant = SmallVariant(
+        boolean_categories=['categoryboolean1'],
+        info={'gnomad_hemi': 0, 'gene_id': 'TEST1', 'categoryboolean1': True},
         het_samples={'female', 'male'},
-        coords=TEST_COORDS_X_1,
+        depths={'female': 50, 'male': 50},
+        coordinates=TEST_COORDS_X_1,
     )
     x_dom = XDominant(pedigree=peddy_ped)
     results = x_dom.run(passing_variant)
@@ -515,10 +418,13 @@ def test_x_dominant_female_hom_passes(peddy_ped):
     check that a male is accepted as a het
     :return:
     """
-    passing_variant = SimpleVariant(
-        info={'gnomad_hemi': 0, 'var_type': VariantType.SMALL},
+    passing_variant = SmallVariant(
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
         hom_samples={'female'},
-        coords=TEST_COORDS_X_1,
+        depths={'female': 100},
+        ab_ratios={'female': 0.5},
+        coordinates=TEST_COORDS_X_1,
     )
     x_dom = XDominant(pedigree=peddy_ped)
     results = x_dom.run(passing_variant)
@@ -531,10 +437,12 @@ def test_x_dominant_male_hom_passes(peddy_ped):
     check that a male is accepted as a het
     :return:
     """
-    passing_variant = SimpleVariant(
-        info={'gnomad_hemi': 0, 'var_type': VariantType.SMALL},
+    passing_variant = SmallVariant(
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
         hom_samples={'male'},
-        coords=TEST_COORDS_X_1,
+        depths={'male': 100},
+        coordinates=TEST_COORDS_X_1,
     )
     x_dom = XDominant(pedigree=peddy_ped)
     results = x_dom.run(passing_variant)
@@ -545,23 +453,20 @@ def test_x_dominant_male_hom_passes(peddy_ped):
 @pytest.mark.parametrize(
     'info',
     [
-        {'gnomad_af': 0.1, 'var_type': VariantType.SMALL},
-        {'gnomad_hom': 2, 'var_type': VariantType.SMALL},
-        {'gnomad_hemi': 3, 'var_type': VariantType.SMALL},
+        {'gnomad_af': 0.1, 'gene_id': 'TEST1', 'categoryboolean1': True},
+        {'gnomad_hom': 2, 'gene_id': 'TEST1', 'categoryboolean1': True},
+        {'gnomad_hemi': 3, 'gene_id': 'TEST1', 'categoryboolean1': True},
     ],
 )
 def test_x_dominant_info_fails(info, peddy_ped):
     """
     check for info dict exclusions
-    :param info:
-    :return:
     """
-    passing_variant = SimpleVariant(
+    passing_variant = SmallVariant(
         info=info,
         hom_samples={'male'},
-        het_samples=set(),
-        coords=TEST_COORDS_X_1,
-        categoryboolean1=False,
+        coordinates=TEST_COORDS_X_1,
+        boolean_categories=['categoryboolean1'],
     )
     x_dom = XDominant(pedigree=peddy_ped)
     assert len(x_dom.run(passing_variant)) == 0
@@ -573,11 +478,13 @@ def test_x_recessive_male_hom_passes(peddy_ped):
     :return:
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         hom_samples={'female', 'male'},
-        coords=TEST_COORDS_X_1,
+        coordinates=TEST_COORDS_X_1,
         ab_ratios={'female': 1.0, 'male': 1.0},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 100, 'male': 100},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
     x_rec = XRecessiveMale(pedigree=peddy_ped)
     results = x_rec.run(passing_variant, comp_het={})
@@ -590,11 +497,13 @@ def test_x_recessive_female_hom_passes(peddy_ped):
     :return:
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         hom_samples={'female', 'male'},
-        coords=TEST_COORDS_X_1,
+        coordinates=TEST_COORDS_X_1,
         ab_ratios={'female': 1.0, 'male': 1.0},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 100, 'male': 100},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
     x_rec = XRecessiveFemaleHom(pedigree=peddy_ped)
     results = x_rec.run(passing_variant, comp_het={})
@@ -607,11 +516,13 @@ def test_x_recessive_male_het_passes(peddy_ped):
 
     :return:
     """
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'male'},
-        coords=TEST_COORDS_X_1,
+        coordinates=TEST_COORDS_X_1,
         ab_ratios={'male': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'male': 50},
+        boolean_categories=['categoryboolean1'],
+        info={'gene_id': 'TEST1', 'categoryboolean1': True},
     )
     x_rec = XRecessiveMale(pedigree=peddy_ped)
     results = x_rec.run(passing_variant)
@@ -625,19 +536,27 @@ def test_x_recessive_female_het_passes(peddy_ped):
     :return:
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS_X_1,
-        categorysample4=['female'],
+        coordinates=TEST_COORDS_X_1,
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
-    )
-    passing_variant_2 = RecessiveSimpleVariant(
+        depths={'female': 50},
+        sample_categories=['categorysample4'],
+        info={
+            'gene_id': 'TEST1',
+            'categorysample4': ['female'],
+        },
+    )
+    passing_variant_2 = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS_X_2,
-        categorysample4=['female'],
+        coordinates=TEST_COORDS_X_2,
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 50},
+        sample_categories=['categorysample4'],
+        info={
+            'gene_id': 'TEST1',
+            'categorysample4': ['female'],
+        },
     )
     comp_hets = {'female': {'X-1-G-T': [passing_variant_2]}}
     x_rec = XRecessiveFemaleCH(pedigree=peddy_ped)
@@ -646,18 +565,19 @@ def test_x_recessive_female_het_passes(peddy_ped):
     assert results[0].reasons == {'X_RecessiveFemaleCompHet'}
 
 
-def test_het_de_novo_het_passes(peddy_ped):
+def test_het_de_novo_passes(peddy_ped):
     """
 
     :return:
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS_X_1,
-        categorysample4=['female'],
+        coordinates=TEST_COORDS_X_1,
+        sample_categories=['categorysample4'],
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 99},
+        info={'gene_id': 'TEST1', 'categorysample4': ['female']},
     )
     dom_a = DominantAutosomal(pedigree=peddy_ped)
     results = dom_a.run(passing_variant)
@@ -672,12 +592,13 @@ def test_het_de_novo_het_passes_flagged(peddy_ped):
     :return:
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS_X_1,
-        categorysample4=['female'],
+        coordinates=TEST_COORDS_X_1,
+        sample_categories=['categorysample4'],
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 99},
+        info={'gene_id': 'TEST1', 'categorysample4': ['female']},
     )
     dom_a = DominantAutosomal(pedigree=peddy_ped)
     results = dom_a.run(passing_variant)
@@ -690,19 +611,27 @@ def test_x_recessive_female_het_fails(peddy_ped):
     :return:
     """
 
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS_X_1,
-        categorysample4=['male'],
+        coordinates=TEST_COORDS_X_1,
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
-    )
-    passing_variant_2 = RecessiveSimpleVariant(
+        depths={'female': 50},
+        sample_categories=['categorysample4'],
+        info={
+            'gene_id': 'TEST1',
+            'categorysample4': ['male'],
+        },
+    )
+    passing_variant_2 = SmallVariant(
         het_samples={'male'},
-        coords=TEST_COORDS_X_2,
-        categorysample4=['male'],
+        coordinates=TEST_COORDS_X_2,
         ab_ratios={'male': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'male': 50},
+        sample_categories=['categorysample4'],
+        info={
+            'gene_id': 'TEST1',
+            'categorysample4': ['male'],
+        },
     )
     comp_hets = {'female': {'x-2-A-C': [passing_variant_2]}}
     x_rec = XRecessiveFemaleCH(pedigree=peddy_ped)
@@ -711,18 +640,21 @@ def test_x_recessive_female_het_fails(peddy_ped):
 
 
 @mock.patch('reanalysis.moi_tests.check_for_second_hit')
-def test_x_recessive_female_het_no_pair_fails(second_hit: mock.patch, peddy_ped):
-    """
-    :return:
-    """
+def test_x_recessive_female_het_no_pair_fails(second_hit: mock.Mock, peddy_ped):
+    """ """
 
-    second_hit.return_value = []
-    passing_variant = RecessiveSimpleVariant(
+    passing_variant = SmallVariant(
         het_samples={'female'},
-        coords=TEST_COORDS_X_1,
+        coordinates=TEST_COORDS_X_1,
         ab_ratios={'female': 0.5},
-        info={'var_type': VariantType.SMALL},
+        depths={'female': 50},
+        info={
+            'gene_id': 'TEST1',
+            'categorysample1': True,
+            'boolean_categories': 'categorysample1',
+        },
     )
+    second_hit.return_value = []
     x_rec = XRecessiveFemaleCH(pedigree=peddy_ped)
     assert not x_rec.run(passing_variant)
 
@@ -850,10 +782,13 @@ def test_genotype_calls(peddy_ped):
         'gnomad_af': 0.0001,
         'gnomad_ac': 0,
         'gnomad_hom': 0,
-        'var_type': VariantType.SMALL,
+        'gene_id': 'TEST1',
     }
-    variant = SimpleVariant(
-        info=info_dict, het_samples={'male'}, hom_samples={'female'}, coords=TEST_COORDS
+    variant = SmallVariant(
+        info=info_dict,
+        het_samples={'male'},
+        hom_samples={'female'},
+        coordinates=TEST_COORDS,
     )
     assert base_moi.get_family_genotypes(variant, 'male') == {
         'father_1': 'WT',
@@ -865,11 +800,10 @@ def test_genotype_calls(peddy_ped):
         'female': 'Hom',
         'mother_2': 'WT',
     }
-    x_variant = SimpleVariant(
+    x_variant = SmallVariant(
         info=info_dict,
         het_samples={'male', 'female'},
-        hom_samples=set(),
-        coords=TEST_COORDS_X_1,
+        coordinates=TEST_COORDS_X_1,
     )
     assert base_moi.get_family_genotypes(x_variant, 'male') == {
         'father_1': 'WT',
@@ -882,11 +816,10 @@ def test_genotype_calls(peddy_ped):
         'mother_2': 'WT',
     }
 
-    x_variant_2 = SimpleVariant(
+    x_variant_2 = SmallVariant(
         info=info_dict,
-        het_samples=set(),
         hom_samples={'male', 'female'},
-        coords=TEST_COORDS_X_1,
+        coordinates=TEST_COORDS_X_1,
     )
     assert base_moi.get_family_genotypes(x_variant_2, 'male') == {
         'father_1': 'WT',
@@ -899,8 +832,8 @@ def test_genotype_calls(peddy_ped):
         'mother_2': 'WT',
     }
 
-    variant_missing = SimpleVariant(
-        info=info_dict, het_samples=set(), hom_samples=set(), coords=TEST_COORDS
+    variant_missing = SmallVariant(
+        info=info_dict, het_samples=set(), hom_samples=set(), coordinates=TEST_COORDS
     )
     assert base_moi.get_family_genotypes(variant_missing, 'male') == {
         'father_1': 'WT',
diff --git a/test/test_results_comparison.py b/test/test_results_comparison.py
index 4dd4e489..7b0f490f 100644
--- a/test/test_results_comparison.py
+++ b/test/test_results_comparison.py
@@ -10,12 +10,9 @@
 
 from cpg_utils.config import get_config
 
-from reanalysis.utils import (
-    date_annotate_results,
-    find_latest_file,
-    get_granular_date,
-    Coordinates,
-)
+from reanalysis.utils import date_annotate_results, find_latest_file
+from reanalysis.models import Coordinates
+from reanalysis.static_values import get_granular_date
 
 CATEGORY_META = get_config()['categories']
 
@@ -42,8 +39,8 @@ class MiniReport:
     independent: bool = False
 
 
-COORD_1 = Coordinates('1', 1, 'A', 'G')
-COORD_2 = Coordinates('2', 2, 'A', 'G')
+COORD_1 = Coordinates(chrom='1', pos=1, ref='A', alt='G')
+COORD_2 = Coordinates(chrom='2', pos=2, ref='A', alt='G')
 
 GENERIC_REPORT = MiniReport(MiniVariant(categories=['1'], coords=COORD_1))
 GENERIC_REPORT_12 = MiniReport(MiniVariant(categories=['1', '2'], coords=COORD_1))
diff --git a/test/test_utils.py b/test/test_utils.py
index 3e6d8975..c98f2f21 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -3,22 +3,22 @@
 """
 
 from copy import deepcopy
-from dataclasses import dataclass
-from typing import List
 import pytest
 from cyvcf2 import VCFReader
 from reanalysis.utils import (
-    AbstractVariant,
-    Coordinates,
     find_comp_hets,
     gather_gene_dict_from_contig,
     get_new_gene_map,
     get_non_ref_samples,
     get_simple_moi,
     identify_file_type,
+)
+from reanalysis.models import (
     FileTypes,
-    MinimalVariant,
-    ReportedVariant,
+    ReportVariant,
+    Coordinates,
+    SmallVariant,
+    StructuralVariant,
 )
 
 
@@ -26,16 +26,16 @@ def test_coord_sorting():
     """
     check that coord sorting methods work
     """
-    coord_1 = Coordinates('4', 20, 'A', 'C')
-    coord_1b = Coordinates('4', 21, 'A', 'C')
-    coord_1c = Coordinates('4', 21, 'A', 'C')
-    coord_2 = Coordinates('5', 20, 'A', 'C')
+    coord_1 = Coordinates(chrom='4', pos=20, ref='A', alt='C')
+    coord_1b = Coordinates(chrom='4', pos=21, ref='A', alt='C')
+    coord_1c = Coordinates(chrom='4', pos=21, ref='A', alt='C')
+    coord_2 = Coordinates(chrom='5', pos=20, ref='A', alt='C')
     assert coord_1 < coord_2
     assert coord_1 < coord_1b
     assert not coord_1b < coord_1c
 
 
-def test_abs_var_sorting(two_trio_abs_variants: list[AbstractVariant]):
+def test_abs_var_sorting(two_trio_abs_variants: list[SmallVariant]):
     """
     test sorting and equivalence at the AbsVar level
     """
@@ -53,7 +53,7 @@ def test_reported_variant_ordering(trio_abs_variant):
     """
     test that equivalence between Report objects works as exp.
     """
-    report_1 = ReportedVariant(
+    report_1 = ReportVariant(
         sample='1',
         family='1',
         gene='2',
@@ -61,7 +61,7 @@ def test_reported_variant_ordering(trio_abs_variant):
         reasons={'test'},
         genotypes={},
     )
-    report_2 = ReportedVariant(
+    report_2 = ReportVariant(
         sample='1',
         family='1',
         gene='2',
@@ -131,26 +131,19 @@ def test_get_simple_moi(string: str, expected: str, chrom: str):
     assert get_simple_moi(string, chrom) == expected
 
 
-def test_get_non_ref_samples():
+def test_get_non_ref_samples(cyvcf_example_variant):
     """
     this simple test can be done without the use of a cyvcf2 object
     :return:
     """
 
-    @dataclass
-    class SuperSimple:
-        """test_fixture"""
-
-        gt_types: List[int]
-
-    samples = ['a', 'b', 'c', 'd', 'e']
-    variant = SuperSimple([0, 1, 2, 3, 1])
-    het, hom = get_non_ref_samples(variant=variant, samples=samples)
-    assert het == {'b', 'e'}
-    assert hom == {'d'}
+    samples = ['male', 'father', 'mother']
+    het, hom = get_non_ref_samples(variant=cyvcf_example_variant, samples=samples)
+    assert het == {'male'}
+    assert hom == {}
 
 
-def test_av_categories(trio_abs_variant: AbstractVariant):
+def test_av_categories(trio_abs_variant: SmallVariant | StructuralVariant):
     """
     Cat. 3, and Cat. 4 for PROBAND only:
     """
@@ -164,7 +157,7 @@ def test_av_categories(trio_abs_variant: AbstractVariant):
     assert not trio_abs_variant.sample_categorised_check('father_1')
 
 
-def test_av_phase(trio_abs_variant: AbstractVariant):
+def test_av_phase(trio_abs_variant: SmallVariant):
     """
     nothing here yet
     :param trio_abs_variant:
@@ -189,12 +182,12 @@ def test_gene_dict(two_trio_variants_vcf):
     assert len(var_dict['ENSG00000075043']) == 2
 
 
-def test_comp_hets(two_trio_abs_variants: list[AbstractVariant], peddy_ped):
+def test_comp_hets(two_trio_abs_variants: list[SmallVariant], peddy_ped):
     """
     {
         'male': {
-            '20-63406931-C-CGG': [AbstractVariant()],
-            '20-63406991-C-CGG': [AbstractVariant()]
+            '20-63406931-C-CGG': [Variant()],
+            '20-63406991-C-CGG': [Variant()]
         }
     }
     :param two_trio_abs_variants:
@@ -228,7 +221,7 @@ def test_phased_dict(phased_vcf_path):
         assert variant.phased['mother_1'] == {420: '0|1'}
 
 
-def test_phased_comp_hets(phased_variants: list[AbstractVariant], peddy_ped):
+def test_phased_comp_hets(phased_variants: list[SmallVariant], peddy_ped):
     """
     phased variants shouldn't form a comp-het
     'mother_1' is het for both variants, but phase-set is same for both
@@ -315,11 +308,3 @@ def test_new_gene_map_complex():
         'ENSG3': 'sam2',
         'ENSG4': 'sam,sam2',
     }
-
-
-def test_minimise(trio_abs_variant: AbstractVariant):
-    """
-    check the variant minimiser
-    """
-    minvar = MinimalVariant(trio_abs_variant, 'male')
-    assert sorted(minvar.categories) == ['3', '4']