diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..700707c
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..d1d83c2
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,40 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+    tags: ['*']
+  pull_request:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.6'
+          - '1.9'
+          - 'nightly'
+        os:
+          - ubuntu-latest
+        arch:
+          - x64
+    steps:
+      - uses: actions/checkout@v3
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v3
+        with:
+          files: lcov.info
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
new file mode 100644
index 0000000..cba9134
--- /dev/null
+++ b/.github/workflows/CompatHelper.yml
@@ -0,0 +1,16 @@
+name: CompatHelper
+on:
+  schedule:
+    - cron: 0 0 * * *
+  workflow_dispatch:
+jobs:
+  CompatHelper:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Pkg.add("CompatHelper")
+        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
+      - name: CompatHelper.main()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
+        run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
new file mode 100644
index 0000000..f49313b
--- /dev/null
+++ b/.github/workflows/TagBot.yml
@@ -0,0 +1,15 @@
+name: TagBot
+on:
+  issue_comment:
+    types:
+      - created
+  workflow_dispatch:
+jobs:
+  TagBot:
+    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: JuliaRegistries/TagBot@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/register.yml b/.github/workflows/register.yml
new file mode 100644
index 0000000..5b7cd3b
--- /dev/null
+++ b/.github/workflows/register.yml
@@ -0,0 +1,16 @@
+name: Register Package
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: Version to register or component to bump
+        required: true
+jobs:
+  register:
+    runs-on: ubuntu-latest
+    permissions:
+        contents: write
+    steps:
+      - uses: julia-actions/RegisterAction@latest
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..51e904e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+Manifest.toml
+docs/build/
+docs/site/
+*.jl.cov
+*.jl.*.cov
+*.jl.mem
+lcov.info
+LocalPreferences.toml
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 57332f8..d730d7a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,4 +1,4 @@
-name = "ClapeyronGCIdentifier"
+name = "GCIdentifier"
 uuid = "b7ea765e-cbac-4e4a-9b0d-5427cc302506"
 authors = ["Hon Wa Yew <yewhonwa@gmail.com>", "Pierre Walker <pwalker@mit.edu>", "Andrés Riedemann <andres.riedemann@gmail.com>"]
 version = "0.1.0"
@@ -8,5 +8,18 @@ ChemicalIdentifiers = "fa4ea961-1416-484e-bda2-883ee1634ba5"
 Clapeyron = "7c7805af-46cc-48c9-995b-ed0ed2dc909a"
 RDKitMinimalLib = "44044271-7623-48dc-8250-42433c44e4b7"
 
+[weakdeps]
+Clapeyron = "7c7805af-46cc-48c9-995b-ed0ed2dc909a"
+
+[extensions]
+GCIdentifierClapeyronExt = "Clapeyron"
+
 [compat]
-Clapeyron = "0.4"
\ No newline at end of file
+Clapeyron = "0.4"
+
+[extras]
+Clapeyron = "7c7805af-46cc-48c9-995b-ed0ed2dc909a"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 249fb89..c5533b2 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,5 @@
-# ClapeyronGCIdentifier
-identifies subgroups, given a SMILES
+# GCIdentifier
+
+identifies subgroups, given a SMILES.
+
+A l
diff --git a/ext/GCIdentifierClapeyronExt.jl b/ext/GCIdentifierClapeyronExt.jl
new file mode 100644
index 0000000..535aae0
--- /dev/null
+++ b/ext/GCIdentifierClapeyronExt.jl
@@ -0,0 +1,19 @@
+module GCIdentifierClapeyronExt
+
+if !isdefined(Base,:get_extension)
+    using Clapeyron
+    using GCIdentifier
+else
+    using ..Clapeyron
+    using ..GCIdentifier
+end
+
+const GC = GCIdentifier
+
+GC.get_grouplist(m::Clapeyron.EoSModel) = GC.get_grouplist(typeof(m))
+GC.get_grouplist(Type{T}) where T <: UNIFAC = GC.UNIFACGroups
+GC.get_grouplist(Type{T}) where T <: SAFTgammaMie = GC.SAFTgammaMieGroups
+GC.get_grouplist(Type{T}) where T <: Joback = GC.JobackGroups
+GC.get_grouplist(Type{T}) where T <: gcPCSAFT = GC.gcPCSAFTgroups
+
+end #module
diff --git a/src/ClapeyronGCIdentifier.jl b/src/ClapeyronGCIdentifier.jl
deleted file mode 100644
index 13d6cf5..0000000
--- a/src/ClapeyronGCIdentifier.jl
+++ /dev/null
@@ -1,8 +0,0 @@
-module ClapeyronGCIdentifier
-using Clapeyron
-using RDKitMinimalLib, ChemicalIdentifiers
-
-include("group_search.jl")
-include("database/database.jl")
-
-end # module ClapeyronGCIdentifier
\ No newline at end of file
diff --git a/src/GCIdentifier.jl b/src/GCIdentifier.jl
new file mode 100644
index 0000000..427c6c5
--- /dev/null
+++ b/src/GCIdentifier.jl
@@ -0,0 +1,19 @@
+module GCIdentifier
+using RDKitMinimalLib, ChemicalIdentifiers
+
+@static if !isdefined(Base,:eachsplit)
+    eachsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true) = split(str,dlm;limit,keepempty)
+    eachsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)  = split(str;limit,keepempty)
+end
+
+split_2(str) = NTuple{2}(eachsplit(str, limit=2))
+split_2(str,dlm) = NTuple{2}(eachsplit(str,dlm, limit=2))
+
+include("group_search.jl")
+include("database/database.jl")
+
+if !isdefined(Base,:get_extension)
+    using Clapeyron
+    include("../ext/GCIdentifierClapeyronExt.jl")
+end
+end # module
\ No newline at end of file
diff --git a/src/database/Joback.jl b/src/database/Joback.jl
index dd42e60..5e4c403 100644
--- a/src/database/Joback.jl
+++ b/src/database/Joback.jl
@@ -1,43 +1,44 @@
-JobackGroups = [raw"[CX4H3]" "-CH3";
-raw"[!R;CX4H2]" "-CH2-";
-raw"[!R;CX4H]" ">CH-";
-raw"[!R;CX4H0]" ">C<";
-raw"[CX3H2][CX3H1]" "CH2=CH-";
-raw"[CX3H1][CX3H1]" "-CH=CH-";
-raw"[$([!R;#6X3H0]);!$([!R;#6X3H0]=[#8])]" "=C<";
-raw"[$([CX2H0](=*)=*)]" "=C=";
-raw"[$([CX2H1]#[!#7])]" "CH";
-raw"[$([CX2H0]#[!#7])]" "C";
-raw"[R;CX4H2]" "ring-CH2-";
-raw"[R;CX4H]" "ring>CH-";
-raw"[R;CX4H0]" "ring>C<";
-raw"[R;CX3H1,cX3H1]" "ring=CH-";
-raw"[$([R;#6X3H0]);!$([R;#6X3H0]=[#8])]" "ring=C<";
-raw"[F]" "-F";
-raw"[Cl]" "-Cl";
-raw"[Br]" "-Br";
-raw"[I]" "-I";
-raw"[OX2H;!$([OX2H]-[#6]=[O]);!$([OX2H]-a)]" "-OH (alcohol)";
-raw"[O;H1;$(O-!@c)]" "-OH (phenol)";
-raw"[OX2H0;!R;!$([OX2H0]-[#6]=[#8])]" "-O- (non-ring)";
-raw"[#8X2H0;R;!$([#8X2H0]~[#6]=[#8])]" "-O- (ring)";
-raw"[$([CX3H0](=[OX1]));!$([CX3](=[OX1])-[OX2]);!R]=O" ">C=O (non-ring)";
-raw"[$([#6X3H0](=[OX1]));!$([#6X3](=[#8X1])~[#8X2]);R]=O" ">C=O (ring)";
-raw"[CH;D2](=O)" "O=CH- (aldehyde)";
-raw"[OX2H]-[C]=O" "-COOH (acid)";
-raw"[#6X3H0;!$([#6X3H0](~O)(~O)(~O))](=[#8X1])[#8X2H0]" "-COO- (ester)";
-raw"[OX1H0;!$([OX1H0]~[#6X3]);!$([OX1H0]~[#7X3]~[#8])]" "=O (other than above)";
-raw"[NX3H2]" "-NH2";
-raw"[NX3H1;!R]" ">NH (non-ring)";
-raw"[#7X3H1;R]" ">NH (ring)";
-raw"[#7X3H0;!$([#7](~O)~O)]" ">N- (non-ring)";
-raw"[#7X2H0;!R]" "-N= (non-ring)";
-raw"[#7X2H0;R]" "-N= (ring)";
-raw"[#7X2H1]" "=NH";
-raw"[#6X2]#[#7X1H0]" "-CN";
-raw"[$([#7X3,#7X3+][!#8])](=[O])~[O-]" "-NO2";
-raw"[SX2H]" "-SH";
-raw"[#16X2H0;!R]" "-S- (non-ring)";
-raw"[#16X2H0;R]" "-S- (ring)"]
+JobackGroups = [GCPair(raw"[CX4H3]","-CH3"),
+GCPair(raw"[!R;CX4H2]","-CH2-"),
+GCPair(raw"[!R;CX4H]",">CH-"),
+GCPair(raw"[!R;CX4H0]",">C<"),
+GCPair(raw"[CX3H2][CX3H1]","CH2=CH-"),
+GCPair(raw"[CX3H1][CX3H1]","-CH=CH-"),
+GCPair(raw"[$([!R;#6X3H0]);!$([!R;#6X3H0]=[#8])]","=C<"),
+GCPair(raw"[$([CX2H0](=*)=*)]","=C="),
+GCPair(raw"[$([CX2H1]#[!#7])]","CH"),
+GCPair(raw"[$([CX2H0]#[!#7])]","C"),
+GCPair(raw"[R;CX4H2]","ring-CH2-"),
+GCPair(raw"[R;CX4H]","ring>CH-"),
+GCPair(raw"[R;CX4H0]","ring>C<"),
+GCPair(raw"[R;CX3H1,cX3H1]","ring=CH-"),
+GCPair(raw"[$([R;#6X3H0]);!$([R;#6X3H0]=[#8])]","ring=C<"),
+GCPair(raw"[F]","-F"),
+GCPair(raw"[Cl]","-Cl"),
+GCPair(raw"[Br]","-Br"),
+GCPair(raw"[I]","-I"),
+GCPair(raw"[OX2H;!$([OX2H]-[#6]=[O]);!$([OX2H]-a)]","-OH (alcohol)"),
+GCPair(raw"[O;H1;$(O-!@c)]","-OH (phenol)"),
+GCPair(raw"[OX2H0;!R;!$([OX2H0]-[#6]=[#8])]","-O- (non-ring)"),
+GCPair(raw"[#8X2H0;R;!$([#8X2H0]~[#6]=[#8])]","-O- (ring)"),
+GCPair(raw"[$([CX3H0](=[OX1]));!$([CX3](=[OX1])-[OX2]);!R]=O",">C=O (non-ring)"),
+GCPair(raw"[$([#6X3H0](=[OX1]));!$([#6X3](=[#8X1])~[#8X2]);R]=O",">C=O (ring)"),
+GCPair(raw"[CH;D2](=O)","O=CH- (aldehyde)"),
+GCPair(raw"[OX2H]-[C]=O","-COOH (acid)"),
+GCPair(raw"[#6X3H0;!$([#6X3H0](~O)(~O)(~O))](=[#8X1])[#8X2H0]","-COO- (ester)"),
+GCPair(raw"[OX1H0;!$([OX1H0]~[#6X3]);!$([OX1H0]~[#7X3]~[#8])]","=O (other than above)"),
+GCPair(raw"[NX3H2]","-NH2"),
+GCPair(raw"[NX3H1;!R]",">NH (non-ring)"),
+GCPair(raw"[#7X3H1;R]",">NH (ring)"),
+GCPair(raw"[#7X3H0;!$([#7](~O)~O)]",">N- (non-ring)"),
+GCPair(raw"[#7X2H0;!R]","-N= (non-ring)"),
+GCPair(raw"[#7X2H0;R]","-N= (ring)"),
+GCPair(raw"[#7X2H1]","=NH"),
+GCPair(raw"[#6X2]#[#7X1H0]","-CN"),
+GCPair(raw"[$([#7X3,#7X3+][!#8])](=[O])~[O-]","-NO2"),
+GCPair(raw"[SX2H]","-SH"),
+GCPair(raw"[#16X2H0;!R]","-S- (non-ring)"),
+GCPair(raw"[#16X2H0;R]","-S- (ring)")
+]
 
 export JobackGroups
\ No newline at end of file
diff --git a/src/database/SAFTgammaMie.jl b/src/database/SAFTgammaMie.jl
index 81c696f..64e31b4 100644
--- a/src/database/SAFTgammaMie.jl
+++ b/src/database/SAFTgammaMie.jl
@@ -1,34 +1,35 @@
-SAFTgammaMieGroups = [raw"[CX4H3]" "CH3";
-raw"[!R;CX4H2]" "CH2";
-raw"[!R;CX4H]" "CH";
-raw"[!R;CX4H0]" "C";
-raw"[cX3;H1]" "aCH";
-raw"[cX3;H0][CX4;H2]" "aCCH2";
-raw"[cX3;H0][CX4;H1]" "aCCH";
-raw"[CX3H2]" "CH2=";
-raw"[!R;CX3H1;!$([CX3H1](=O))]" "CH=";
-raw"[CH2;R]" "cCH2";
-raw"[OX2H]-[C]=O" "COOH";
-raw"[#6X3H0;!$([#6X3H0](~O)(~O)(~O))](=[#8X1])[#8X2H0]" "COO";
-raw"[OX2H;!$([OX2H]-[#6]=[O]);!$([OX2H]-a)]" "OH";
-raw"[CX4;H2;!R][OH1]" "CH2OH";
-raw"[CX4;H1;!R][OH1]" "CHOH";
-raw"[NX3H2]" "NH2";
-raw"[NX3H1;!R]" "NH";
-raw"[#7X3H0;!$([#7](~O)~O)]" "N";
-raw"[#7X3H1;R]" "cNH";
-raw"[#7X3H0;R]" "cN";
-raw"[!R;CX3H0;!$([CX3H0](=O))]" "CH=";
-raw"[cX3;H0][CX4;H3]" "aCCH3";
-raw"[cX3;H0;R][OX2;H1]" "aCOH";
-raw"[CH1;R]" "cCH";
-raw"[CH1;R][NH1;!R]" "cCHNH";
-raw"[CH1;R][NH0;!R]" "cCHN";
-raw"[cH0][C;!R](=O)[cH0]" "aCCOaC";
-raw"[OX2H]-[C](=O)[cH0]" "aCCOOH";
-raw"[cH0][NH1;!R][cH0]" "aCNHaC";
-raw"[CH3][CX3](=O)" "CH3CO";
-raw"[OH0;!R;$([OH0;!R][CH3;!R]);$([OH0;!R][CH2;!R])]" "eO";
-raw"[OH0;!R;$([OH0;!R][CH2;!R])]" "cO"]
+SAFTgammaMieGroups = [GCPair(raw"[CX4H3]","CH3"),
+GCPair(raw"[!R;CX4H2]","CH2"),
+GCPair(raw"[!R;CX4H]","CH"),
+GCPair(raw"[!R;CX4H0]","C"),
+GCPair(raw"[cX3;H1]","aCH"),
+GCPair(raw"[cX3;H0][CX4;H2]","aCCH2"),
+GCPair(raw"[cX3;H0][CX4;H1]","aCCH"),
+GCPair(raw"[CX3H2]","CH2="),
+GCPair(raw"[!R;CX3H1;!$([CX3H1](=O))]","CH="),
+GCPair(raw"[CH2;R]","cCH2"),
+GCPair(raw"[OX2H]-[C]=O","COOH"),
+GCPair(raw"[#6X3H0;!$([#6X3H0](~O)(~O)(~O))](=[#8X1])[#8X2H0]","COO"),
+GCPair(raw"[OX2H;!$([OX2H]-[#6]=[O]);!$([OX2H]-a)]","OH"),
+GCPair(raw"[CX4;H2;!R][OH1]","CH2OH"),
+GCPair(raw"[CX4;H1;!R][OH1]","CHOH"),
+GCPair(raw"[NX3H2]","NH2"),
+GCPair(raw"[NX3H1;!R]","NH"),
+GCPair(raw"[#7X3H0;!$([#7](~O)~O)]","N"),
+GCPair(raw"[#7X3H1;R]","cNH"),
+GCPair(raw"[#7X3H0;R]","cN"),
+GCPair(raw"[!R;CX3H0;!$([CX3H0](=O))]","CH="),
+GCPair(raw"[cX3;H0][CX4;H3]","aCCH3"),
+GCPair(raw"[cX3;H0;R][OX2;H1]","aCOH"),
+GCPair(raw"[CH1;R]","cCH"),
+GCPair(raw"[CH1;R][NH1;!R]","cCHNH"),
+GCPair(raw"[CH1;R][NH0;!R]","cCHN"),
+GCPair(raw"[cH0][C;!R](=O)[cH0]","aCCOaC"),
+GCPair(raw"[OX2H]-[C](=O)[cH0]","aCCOOH"),
+GCPair(raw"[cH0][NH1;!R][cH0]","aCNHaC"),
+GCPair(raw"[CH3][CX3](=O)","CH3CO"),
+GCPair(raw"[OH0;!R;$([OH0;!R][CH3;!R]);$([OH0;!R][CH2;!R])]","eO"),
+GCPair(raw"[OH0;!R;$([OH0;!R][CH2;!R])]","cO")
+]
 
 export SAFTgammaMieGroups
\ No newline at end of file
diff --git a/src/database/UNIFAC.jl b/src/database/UNIFAC.jl
index 1458d36..4661756 100644
--- a/src/database/UNIFAC.jl
+++ b/src/database/UNIFAC.jl
@@ -1,94 +1,94 @@
-UNIFACGroups = [raw"[CX4;H3;!R]" "CH3";
-raw"[CX4;H2;!R]" "CH2";
-raw"[CX4;H1;!R]" "CH";
-raw"[CX4;H0;!R]" "C";
-raw"[CX3;H2]=[CX3;H1]" "CH2=CH";
-raw"[CX3;H1]=[CX3;H1]" "CH=CH";
-raw"[CX3;H2]=[CX3;H0]" "CH2=C";
-raw"[CX3;H1]=[CX3;H0]" "CH=C";
-raw"[cX3;H1]" "ACH";
-raw"[cX3;H0]" "AC";
-raw"[cX3;H0][CX4;H3]" "ACCH3";
-raw"[cX3;H0][CX4;H2]" "ACCH2";
-raw"[cX3;H0][CX4;H1]" "ACCH";
-raw"[OH1;$([OH1][CX4H2])]" "OH(P)";
-raw"[CX4;H3][OX2;H1]" "CH3OH";
-raw"[OH2]" "H2O";
-raw"[cX3;H0;R][OX2;H1]" "ACOH";
-raw"[CX4;H3][CX3](=O)" "CH3CO";
-raw"[CX4;H2][CX3](=O)" "CH2CO";
-raw"[CX3H1](=O)" "CHO";
-raw"[CH3][CX3;H0](=[O])[O]" "CH3COO";
-raw"[CX4;H2][CX3](=[OX1])[OX2]" "CH2COO";
-raw"[CX3;H1](=[OX1])[OX2]" "HCOO";
-raw"[CH3;!R][OH0;!R]" "CH3O";
-raw"[CH2;!R][OH0;!R]" "CH2O";
-raw"[C;H1;!R][OH0;!R]" "CHO";
-raw"[CX4;H2;R][OX2;R][CX4;H2;R]" "THF";
-raw"[CX4;H3][NX3;H2]" "CH3NH2";
-raw"[CX4;H2][NX3;H2]" "CH2NH2";
-raw"[CX4;H1][NX3;H2]" "CHNH2";
-raw"[CX4;H3][NX3;H1]" "CH3NH";
-raw"[CX4;H2][NX3;H1]" "CH2NH";
-raw"[CX4;H1][NX3;H1]" "CHNH";
-raw"[CX4;H3][NX3;H0]" "CH3N";
-raw"[CX4;H2][NX3;H0]" "CH2N";
-raw"[c][NX3;H2]" "ACNH2";
-raw"[cX3H1][n][cX3H1]" "AC2H2N";
-raw"[cX3H0][n][cX3H1]" "AC2HN";
-raw"[cX3H0][n][cX3H0]" "AC2N";
-raw"[CX4;H3][CX2]#[NX1]" "CH3CN";
-raw"[CX4;H2][CX2]#[NX1]" "CH2CN";
-raw"[CX3,cX3](=[OX1])[OX2H0,oX2H0]" "COO";
-raw"[CX3](=[OX1])[O;H1]" "COOH";
-raw"[CX3;H1](=[OX1])[OX2;H1]" "HCOOH";
-raw"[CX4;H2;!$(C(Cl)(Cl))](Cl)" "CH2CL";
-raw"[CX4;H1;!$(C(Cl)(Cl))](Cl)" "CHCL";
-raw"[CX4;H0](Cl)" "CCL";
-raw"[CX4;H2;!$(C(Cl)(Cl)(Cl))](Cl)(Cl)" "CH2CL2";
-raw"[CX4;H1;!$(C(Cl)(Cl)(Cl))](Cl)(Cl)" "CHCL2";
-raw"[CX4;H0;!$(C(Cl)(Cl)(Cl))](Cl)(Cl)" "CCL2";
-raw"[CX4;H1;!$([CX4;H0](Cl)(Cl)(Cl)(Cl))](Cl)(Cl)(Cl)" "CHCL3";
-raw"[CX4;H0;!$([CX4;H0](Cl)(Cl)(Cl)(Cl))](Cl)(Cl)(Cl)" "CCL3";
-raw"[CX4;H0]([Cl])([Cl])([Cl])([Cl])" "CCL4";
-raw"[c][Cl]" "ACCL";
-raw"[CX4;H3][NX3](=[OX1])([OX1])" "CH3NO2";
-raw"[CX4;H2][NX3](=[OX1])([OX1])" "CH2NO2";
-raw"[CX4;H1][NX3](=[OX1])([OX1])" "CHNO2";
-raw"[cX3][NX3](=[OX1])([OX1])" "ACNO2";
-raw"C(=S)=S" "CS2";
-raw"[SX2H][CX4;H3]" "CH3SH";
-raw"[SX2H][CX4;H2]" "CH2SH";
-raw"c1cc(oc1)C=O" "FURFURAL";
-raw"[OX2;H1][CX4;H2][CX4;H2][OX2;H1]" "DOH";
-raw"[I]" "I";
-raw"[Br]" "BR";
-raw"[CX2;H1]#[CX2;H0]" "CH=-C";
-raw"[CX2;H0]#[CX2;H0]" "C=-C";
-raw"[SX3H0](=[OX1])([CX4;H3])[CX4;H3]" "DMSO";
-raw"[CX3;H2]=[CX3;H1][CX2;H0]#[NX1;H0]" "ACRY";
-raw"[$([Cl;H0]([C]=[C]))]" "CL-(C=C)";
-raw"[CX3;H0]=[CX3;H0]" "C=C";
-raw"[cX3][F]" "ACF";
-raw"[CX4;H3][N]([CX4;H3])[CX3;H1]=[O]" "DMF";
-raw"[NX3]([CX4;H2])([CX4;H2])[CX3;H1](=[OX1])" "HCON(CH2)2";
-raw"C(F)(F)F" "CF3";
-raw"C(F)F" "CF2";
-raw"C(F)" "CF";
-raw"[CH2;R]" "CY-CH2";
-raw"[CH1;R]" "CY-CH";
-raw"[CH0;R]" "CY-C";
-raw"[OH1;$([OH1][CX4H1])]" "OH(S)";
-raw"[OH1;$([OH1][CX4H0])]" "OH(T)";
-raw"[CX4H2;R][OX2;R]" "CY-CH2O";
-raw"[CX4H2;R][OX2;R]" "TRIOXAN";
-raw"[CX4H0][NH2]" "CNH2";
-raw"[OX1H0]=[C;R][NX3H0;R][CH3]" "NMP";
-raw"[OX1H0]=[CH0X3;R][H0;R][CH2]" "NEP";
-raw"[OX1H0;!R]=[CX3H0;R][NX3H0;R][C;!R]" "NIPP";
-raw"[OX1H0;!R]=[CH0X3;R][NX3H0;R][CH0;!R]" "NTBP";
-raw"[CX3H0](=[OX1H0])[NX3H2]" "CONH2";
-raw"[OX1H0;!R]=[CX3H0;!R][NH1X3;!R][CH3;!R]" "CONHCH3";
-raw"[CH2X4;!R][NH1X3;!R][CX3H0;!R]=[OX1H0;!R]" "CONHCH2"]
+UNIFACGroups = [GCPair(raw"[CX4;H3;!R]","CH3"),
+GCPair(raw"[CX4;H2;!R]","CH2"),
+GCPair(raw"[CX4;H1;!R]","CH"),
+GCPair(raw"[CX4;H0;!R]","C"),
+GCPair(raw"[CX3;H2]=[CX3;H1]","CH2=CH"),
+GCPair(raw"[CX3;H1]=[CX3;H1]","CH=CH"),
+GCPair(raw"[CX3;H2]=[CX3;H0]","CH2=C"),
+GCPair(raw"[CX3;H1]=[CX3;H0]","CH=C"),
+GCPair(raw"[cX3;H1]","ACH"),
+GCPair(raw"[cX3;H0]","AC"),
+GCPair(raw"[cX3;H0][CX4;H3]","ACCH3"),
+GCPair(raw"[cX3;H0][CX4;H2]","ACCH2"),
+GCPair(raw"[cX3;H0][CX4;H1]","ACCH"),
+GCPair(raw"[OH1;$([OH1][CX4H2])]","OH(P)"),
+GCPair(raw"[CX4;H3][OX2;H1]","CH3OH"),
+GCPair(raw"[OH2]","H2O"),
+GCPair(raw"[cX3;H0;R][OX2;H1]","ACOH"),
+GCPair(raw"[CX4;H3][CX3](=O)","CH3CO"),
+GCPair(raw"[CX4;H2][CX3](=O)","CH2CO"),
+GCPair(raw"[CX3H1](=O)","CHO"),
+GCPair(raw"[CH3][CX3;H0](=[O])[O]","CH3COO"),
+GCPair(raw"[CX4;H2][CX3](=[OX1])[OX2]","CH2COO"),
+GCPair(raw"[CX3;H1](=[OX1])[OX2]","HCOO"),
+GCPair(raw"[CH3;!R][OH0;!R]","CH3O"),
+GCPair(raw"[CH2;!R][OH0;!R]","CH2O"),
+GCPair(raw"[C;H1;!R][OH0;!R]","CHO"),
+GCPair(raw"[CX4;H2;R][OX2;R][CX4;H2;R]","THF"),
+GCPair(raw"[CX4;H3][NX3;H2]","CH3NH2"),
+GCPair(raw"[CX4;H2][NX3;H2]","CH2NH2"),
+GCPair(raw"[CX4;H1][NX3;H2]","CHNH2"),
+GCPair(raw"[CX4;H3][NX3;H1]","CH3NH"),
+GCPair(raw"[CX4;H2][NX3;H1]","CH2NH"),
+GCPair(raw"[CX4;H1][NX3;H1]","CHNH"),
+GCPair(raw"[CX4;H3][NX3;H0]","CH3N"),
+GCPair(raw"[CX4;H2][NX3;H0]","CH2N"),
+GCPair(raw"[c][NX3;H2]","ACNH2"),
+GCPair(raw"[cX3H1][n][cX3H1]","AC2H2N"),
+GCPair(raw"[cX3H0][n][cX3H1]","AC2HN"),
+GCPair(raw"[cX3H0][n][cX3H0]","AC2N"),
+GCPair(raw"[CX4;H3][CX2]#[NX1]","CH3CN"),
+GCPair(raw"[CX4;H2][CX2]#[NX1]","CH2CN"),
+GCPair(raw"[CX3,cX3](=[OX1])[OX2H0,oX2H0]","COO"),
+GCPair(raw"[CX3](=[OX1])[O;H1]","COOH"),
+GCPair(raw"[CX3;H1](=[OX1])[OX2;H1]","HCOOH"),
+GCPair(raw"[CX4;H2;!$(C(Cl)(Cl))](Cl)","CH2CL"),
+GCPair(raw"[CX4;H1;!$(C(Cl)(Cl))](Cl)","CHCL"),
+GCPair(raw"[CX4;H0](Cl)","CCL"),
+GCPair(raw"[CX4;H2;!$(C(Cl)(Cl)(Cl))](Cl)(Cl)","CH2CL2"),
+GCPair(raw"[CX4;H1;!$(C(Cl)(Cl)(Cl))](Cl)(Cl)","CHCL2"),
+GCPair(raw"[CX4;H0;!$(C(Cl)(Cl)(Cl))](Cl)(Cl)","CCL2"),
+GCPair(raw"[CX4;H1;!$([CX4;H0](Cl)(Cl)(Cl)(Cl))](Cl)(Cl)(Cl)","CHCL3"),
+GCPair(raw"[CX4;H0;!$([CX4;H0](Cl)(Cl)(Cl)(Cl))](Cl)(Cl)(Cl)","CCL3"),
+GCPair(raw"[CX4;H0]([Cl])([Cl])([Cl])([Cl])","CCL4"),
+GCPair(raw"[c][Cl]","ACCL"),
+GCPair(raw"[CX4;H3][NX3](=[OX1])([OX1])","CH3NO2"),
+GCPair(raw"[CX4;H2][NX3](=[OX1])([OX1])","CH2NO2"),
+GCPair(raw"[CX4;H1][NX3](=[OX1])([OX1])","CHNO2"),
+GCPair(raw"[cX3][NX3](=[OX1])([OX1])","ACNO2"),
+GCPair(raw"C(=S)=S","CS2"),
+GCPair(raw"[SX2H][CX4;H3]","CH3SH"),
+GCPair(raw"[SX2H][CX4;H2]","CH2SH"),
+GCPair(raw"c1cc(oc1)C=O","FURFURAL"),
+GCPair(raw"[OX2;H1][CX4;H2][CX4;H2][OX2;H1]","DOH"),
+GCPair(raw"[I]","I"),
+GCPair(raw"[Br]","BR"),
+GCPair(raw"[CX2;H1]#[CX2;H0]","CH=-C"),
+GCPair(raw"[CX2;H0]#[CX2;H0]","C=-C"),
+GCPair(raw"[SX3H0](=[OX1])([CX4;H3])[CX4;H3]","DMSO"),
+GCPair(raw"[CX3;H2]=[CX3;H1][CX2;H0]#[NX1;H0]","ACRY"),
+GCPair(raw"[$([Cl;H0]([C]=[C]))]","CL-(C=C)"),
+GCPair(raw"[CX3;H0]=[CX3;H0]","C=C"),
+GCPair(raw"[cX3][F]","ACF"),
+GCPair(raw"[CX4;H3][N]([CX4;H3])[CX3;H1]=[O]","DMF"),
+GCPair(raw"[NX3]([CX4;H2])([CX4;H2])[CX3;H1](=[OX1])","HCON(CH2)2"),
+GCPair(raw"C(F)(F)F","CF3"),
+GCPair(raw"C(F)F","CF2"),
+GCPair(raw"C(F)","CF"),
+GCPair(raw"[CH2;R]","CY-CH2"),
+GCPair(raw"[CH1;R]","CY-CH"),
+GCPair(raw"[CH0;R]","CY-C"),
+GCPair(raw"[OH1;$([OH1][CX4H1])]","OH(S)"),
+GCPair(raw"[OH1;$([OH1][CX4H0])]","OH(T)"),
+GCPair(raw"[CX4H2;R][OX2;R]","CY-CH2O"),
+GCPair(raw"[CX4H2;R][OX2;R]","TRIOXAN"),
+GCPair(raw"[CX4H0][NH2]","CNH2"),
+GCPair(raw"[OX1H0]=[C;R][NX3H0;R][CH3]","NMP"),
+GCPair(raw"[OX1H0]=[CH0X3;R][H0;R][CH2]","NEP"),
+GCPair(raw"[OX1H0;!R]=[CX3H0;R][NX3H0;R][C;!R]","NIPP"),
+GCPair(raw"[OX1H0;!R]=[CH0X3;R][NX3H0;R][CH0;!R]","NTBP"),
+GCPair(raw"[CX3H0](=[OX1H0])[NX3H2]","CONH2"),
+GCPair(raw"[OX1H0;!R]=[CX3H0;!R][NH1X3;!R][CH3;!R]","CONHCH3"),
+GCPair(raw"[CH2X4;!R][NH1X3;!R][CX3H0;!R]=[OX1H0;!R]","CONHCH2")]
 
 export UNIFACGroups
\ No newline at end of file
diff --git a/src/database/gcPCSAFT.jl b/src/database/gcPCSAFT.jl
index fb543fa..e804745 100644
--- a/src/database/gcPCSAFT.jl
+++ b/src/database/gcPCSAFT.jl
@@ -1,18 +1,20 @@
-gcPCSAFTgroups = [raw"[CX4H3]" "CH3";
-raw"[!R;CX4H2]" "CH2";
-raw"[!R;CX4H]" "CH";
-raw"[!R;CX4H0]" "C";
-raw"[CX3H2]" "CH2=";
-raw"[!R;CX3H1;!$([CX3H1](=O))]" "CH=";
-raw"[$([!R;#6X3H0]);!$([!R;#6X3H0]=[#8])]" "=C<";
-raw"[CX2;H1]#[CX2;H0]" "C#CH";
-raw"[CH2;R1;$(C1CCCC1)]" "cCH2_pen";
-raw"[CH1;R1;$(C1CCCC1)]" "cCH_pen";
-raw"[CH2;R1;$(C1CCCCC1)]" "cCH2_hex";
-raw"[CH1;R1;$(C1CCCCC1)]" "cCH_hex";
-raw"[cX3;H1]" "aCH";
-raw"[cX3;H0]" "aCH";
-raw"[OX2H;!$([OX2H]-[#6]=[O]);!$([OX2H]-a)]" "OH";
-raw"[NX3H2]" "NH2"]
+gcPCSAFTGroups = [
+    GCPair(raw"[CX4H3]", "CH3"),
+    GCPair(raw"[!R;CX4H2]", "CH2"),
+    GCPair(raw"[!R;CX4H]", "CH"),
+    GCPair(raw"[!R;CX4H0]", "C"),
+    GCPair(raw"[CX3H2]", "CH2="),
+    GCPair(raw"[!R;CX3H1;!$([CX3H1](=O))]", "CH="),
+    GCPair(raw"[$([!R;#6X3H0]);!$([!R;#6X3H0]=[#8])]", "=C<"),
+    GCPair(raw"[CX2;H1]#[CX2;H0]", "C#CH"),
+    GCPair(raw"[CH2;R1;$(C1CCCC1)]", "cCH2_pen"),
+    GCPair(raw"[CH1;R1;$(C1CCCC1)]", "cCH_pen"),
+    GCPair(raw"[CH2;R1;$(C1CCCCC1)]", "cCH2_hex"),
+    GCPair(raw"[CH1;R1;$(C1CCCCC1)]", "cCH_hex"),
+    GCPair(raw"[cX3;H1]", "aCH"),
+    GCPair(raw"[cX3;H0]", "aCH"),
+    GCPair(raw"[OX2H;!$([OX2H]-[#6]=[O]);!$([OX2H]-a)]", "OH"),
+    GCPair(raw"[NX3H2]", "NH2")
+    ]
 
-export gcPCSAFTgroups
\ No newline at end of file
+export gcPCSAFTGroups
\ No newline at end of file
diff --git a/src/group_search.jl b/src/group_search.jl
index 3d45f41..b0ff5bd 100644
--- a/src/group_search.jl
+++ b/src/group_search.jl
@@ -1,3 +1,18 @@
+
+"""
+    GCPair
+
+Struct used to hold a description of a group. contains the SMARTS string necessary to match the group within a SMILES query, and the assigned name.
+
+"""
+struct GCPair
+    smarts::String
+    name::String
+end
+
+smarts(x::GCPair) = x.smarts
+name(x::GCPair) = x.name
+
 function get_groups_from_name(component::String,groups::Array{String};connectivity=false)
     res = search_chemical(component)
     if connectivity == true
@@ -9,139 +24,212 @@ function get_groups_from_name(component::String,groups::Array{String};connectivi
     end
 end
 
-function get_groups_from_smiles(smiles::String,groups::Array{String};connectivity=false)
+
+"""
+    get_grouplist(x)
+
+Should return a `Vector{GCPair}` containing the available groups for SMILES matching.
+
+"""
+function get_grouplist end
+get_grouplist(x::Vector{GCPair}) = x
+
+
+"""
+    get_groups_from_smiles(smiles::String,groups;connectivity = false)
+
+Given a SMILES string and a group list (`groups::Vector{GCPair}`), returns a list of groups and their corresponding amount.
+
+If `connectivity` is true, then it will additionally return a vector containing the amount of bonds between each pair.
+
+## Example
+
+```julia
+julia> get_groups_from_smiles("CCO",UNIFACGroups)
+("CCO", ["CH3" => 1, "CH2" => 1, "OH(P)" => 1])
+
+julia> get_groups_from_smiles("CCO",JobackGroups,connectivity = true)
+("CCO", ["-CH3" => 1, "-CH2-" => 1, "-OH (alcohol)" => 1], [("-CH3", "-CH2-") => 1, ("-CH2-", "-OH (alcohol)") => 1])
+```
+"""
+function get_groups_from_smiles(smiles::String,groups;connectivity = false)
+    groups = get_grouplist(groups)
+    return get_groups_from_smiles(smiles,groups;connectivity = connectivity)
+end
+
+function get_groups_from_smiles(smiles::String,groups::Vector{GCPair};connectivity=false)
     mol = get_mol(smiles)
     mol_list = get_substruct_matches(mol,mol)
-    
-    queries = get_qmol.(groups[:,1])
-    
+    #queries = get_qmol.(smarts.(groups))
     atoms = mol_list[1]["atoms"]
     group_list = []
-    group_id = []
-    group_occ_list = []
-    atoms_list = []
+    group_id = Int[]
+    group_occ_list = Int[]
+    atoms_list = Int[]
     coverage_atoms = []
     coverage_bonds = []
-    for i in 1:length(groups[:,1])
-        if !isempty(get_substruct_match(mol,queries[i]))
-            smatch = get_substruct_matches(mol,queries[i])
-            for j in 1:length(smatch)
-                if isempty(atoms_list)
-                    append!(group_list,[groups[i,1]])
-                    append!(group_id,i)
-                    append!(group_occ_list,1)
+    
+    smatches = []
+    smatches_idx = Int[]
+    possible_groups = GCPair[]
+    
+    #step 0.a, find all groups that could get a match
+    for i in 1:length(groups)
+        query_i = get_qmol(smarts(groups[i]))
+        if !isempty(get_substruct_match(mol,query_i)) 
+            push!(smatches,get_substruct_matches(mol,query_i))
+            push!(smatches_idx,i)
+            push!(possible_groups,groups[i])
+        end
+    end
+
+    #step 0.b find all overlaps within the matched groups.
+
+
+    for (idx,smatch) in pairs(smatches)
+        i = smatches_idx[idx]
+        group_i = possible_groups[idx]
+        smarts_i = smarts(group_i)
+        for j in 1:length(smatch)
+            if isempty(atoms_list)
+                push!(group_list,smarts_i)
+                push!(group_id,i)
+                push!(group_occ_list,1)
+                append!(atoms_list,smatch[j]["atoms"])
+                append!(coverage_atoms,[smatch[j]["atoms"]])
+                append!(coverage_bonds,[smatch[j]["bonds"]])
+            else
+                # If no atoms covered by this group are already covered by other groups
+                if sum(smatch[j]["atoms"] .∈ [atoms_list])==0 
                     append!(atoms_list,smatch[j]["atoms"])
-                    append!(coverage_atoms,[smatch[j]["atoms"]])
-                    append!(coverage_bonds,[smatch[j]["bonds"]])
-                else
-                    # If no atoms covered by this group are already covered by other groups
-                    if sum(smatch[j]["atoms"] .∈ [atoms_list])==0 
-                        append!(atoms_list,smatch[j]["atoms"])
-                        if !(groups[i,1] in group_list)
-                            append!(group_list,[groups[i,1]])
-                            append!(group_id,i)
-                            append!(group_occ_list,1)
+                    if !(smarts_i in group_list)
+                        push!(group_list,smarts_i)
+                        push!(group_id,i)
+                        push!(group_occ_list,1)
+                        append!(coverage_atoms,[smatch[j]["atoms"]])
+                        append!(coverage_bonds,[smatch[j]["bonds"]])
+                    else
+                        group_occ_list[end] += 1
+                        append!(coverage_atoms[end],smatch[j]["atoms"])
+                        append!(coverage_bonds[end],smatch[j]["bonds"])
+                    end
+                else 
+                    # Check which groups group i has an overlap with
+                    id = 0
+                    ng_rm = 0
+                    for k in 1:length(group_id)
+                        id += 1
+                        # Does group 1 cover any atoms of group id
+                        if sum(smatch[j]["atoms"] .∈ [coverage_atoms[id]])>0
+                            # We only care if group i covers _more_ atoms than group k
+                            if ((length(smatch[j]["atoms"])>length(coverage_atoms[id])) & 
+                                # Also make sure that group i covers all the atoms of group k 
+                                (sum(smatch[j]["atoms"] .∈ [coverage_atoms[id]]).==length(coverage_atoms[id]))) |
+                                (length(smatch[j]["bonds"])>length(coverage_bonds[id]))
+                                # find out which atoms are covered
+                                overlap_atoms = coverage_atoms[id][coverage_atoms[id] .∈ [smatch[j]["atoms"]]]
+                                id_rm = group_id[id]
+                                name_rm = group_list[id]
+                                bond_rm =  coverage_bonds[id][coverage_bonds[id] .∈ [smatch[j]["bonds"]]]
+                                filter!(e->e ∉ overlap_atoms,atoms_list)
+                                filter!(e->e ∉ overlap_atoms,coverage_atoms[id])
+                                group_occ_list[id] -= 1
+
+                                # If group k no longer covers any atoms, remove it
+                                if group_occ_list[id] == 0
+                                    filter!(e->e ≠ 0,group_occ_list)
+                                    filter!(e->!isempty(e),coverage_atoms)
+                                    deleteat!(coverage_bonds,id)
+                                    filter!(e->e≠id_rm,group_id)
+                                    filter!(e->e≠name_rm,group_list)
+                                    id -= 1 
+                                end
+                                ng_rm +=1
+                            end
+                        end    
+                    end
+                    if ng_rm > 0
+                        if !(smarts_i in group_list)
+                            push!(group_list,smarts_i)
+                            push!(group_id,i)
+                            push!(group_occ_list,1)
                             append!(coverage_atoms,[smatch[j]["atoms"]])
                             append!(coverage_bonds,[smatch[j]["bonds"]])
+                            append!(atoms_list,smatch[j]["atoms"])
                         else
                             group_occ_list[end] += 1
+                            append!(atoms_list,smatch[j]["atoms"])
                             append!(coverage_atoms[end],smatch[j]["atoms"])
                             append!(coverage_bonds[end],smatch[j]["bonds"])
                         end
-                    else 
-                        # Check which groups group i has an overlap with
-                        id = 0
-                        ng_rm = 0
-                        for k in 1:length(group_id)
-                            id += 1
-                            # Does group 1 cover any atoms of group id
-                            if sum(smatch[j]["atoms"] .∈ [coverage_atoms[id]])>0
-                                # We only care if group i covers _more_ atoms than group k
-                                if ((length(smatch[j]["atoms"])>length(coverage_atoms[id])) & 
-                                    # Also make sure that group i covers all the atoms of group k 
-                                    (sum(smatch[j]["atoms"] .∈ [coverage_atoms[id]]).==length(coverage_atoms[id]))) |
-                                    (length(smatch[j]["bonds"])>length(coverage_bonds[id]))
-                                    # find out which atoms are covered
-                                    overlap_atoms = coverage_atoms[id][coverage_atoms[id] .∈ [smatch[j]["atoms"]]]
-                                    id_rm = group_id[id]
-                                    name_rm = group_list[id]
-                                    bond_rm =  coverage_bonds[id][coverage_bonds[id] .∈ [smatch[j]["bonds"]]]
-                                    filter!(e->e∉overlap_atoms,atoms_list)
-                                    filter!(e->e∉overlap_atoms,coverage_atoms[id])
-                                    group_occ_list[id] -= 1
-    
-                                    # If group k no longer covers any atoms, remove it
-                                    if group_occ_list[id] == 0
-                                        filter!(e->e≠0,group_occ_list)
-                                        filter!(e->!isempty(e),coverage_atoms)
-                                        deleteat!(coverage_bonds,id)
-                                        filter!(e->e≠id_rm,group_id)
-                                        filter!(e->e≠name_rm,group_list)
-                                        id -= 1 
-                                    end
-                                    ng_rm +=1
-                                end
-                            end    
-                        end
-                        if ng_rm > 0
-                            if !(groups[i,1] in group_list)
-                                append!(group_list,[groups[i,1]])
-                                append!(group_id,i)
-                                append!(group_occ_list,1)
-                                append!(coverage_atoms,[smatch[j]["atoms"]])
-                                append!(coverage_bonds,[smatch[j]["bonds"]])
-                                append!(atoms_list,smatch[j]["atoms"])
-                            else
-                                group_occ_list[end] += 1
-                                append!(atoms_list,smatch[j]["atoms"])
-                                append!(coverage_atoms[end],smatch[j]["atoms"])
-                                append!(coverage_bonds[end],smatch[j]["bonds"])
-                            end
-                        end
                     end
                 end
             end
         end
     end
     
-    if !(sum(atoms_list .∈ [atoms])==length(atoms))
-        error("Could not find all groups for "*smiles)
-    end
+    #if !(sum(atoms_list .∈ [atoms])==length(atoms))
+    #    error("Could not find all groups for "*smiles)
+    #end
 
-    if connectivity == true
-        connectivity = get_connectivity(mol,group_id,groups)
-        return (smiles,[groups[group_id[i],2] => group_occ_list[i] for i in 1:length(group_id)],connectivity)
+    gcpairs = [name(groups[group_id[i]]) => group_occ_list[i] for i in 1:length(group_id)]
+    if connectivity
+        return (smiles,gcpairs,get_connectivity(mol,group_id,groups))
     else
-        return (smiles,[groups[group_id[i],2] => group_occ_list[i] for i in 1:length(group_id)])
+        return (smiles,gcpairs)
     end
 end
 
-function get_connectivity(mol,group_id,groups)
+function get_connectivity(mol,group_id,groups,connectivity = false)
+
     ngroups = length(group_id)
     A = zeros(ngroups,ngroups)
-    connectivity = []
+    connectivity = Pair{NTuple{2,String},Int}[]
     for i in 1:ngroups
-        smart1 = groups[group_id[i],1]
-        smart2 = groups[group_id[i],1]
+        gci = groups[group_id[i]]
+        smart1 = smarts(gci)
+        smart2 = smarts(gci)
         querie = get_qmol(smart1*smart2)
         smatch = get_substruct_matches(mol,querie)
-        
+        name_i = name(gci)
         A[i,i] = length(smatch)
         if A[i,i]!=0
-            append!(connectivity,[(groups[group_id[i],2],groups[group_id[i],2])=>A[i,i]])
+            append!(connectivity,[(name_i,name_i)=>Int(A[i,i])])
         end
         
         for j in i+1:ngroups
-            smart2 = groups[group_id[j],1]
+            gcj = groups[group_id[j]]
+            smart2 = smarts(gcj)
             querie = get_qmol(smart1*smart2)
             smatch = get_substruct_matches(mol,querie)
             A[i,j] = length(smatch)
+            name_j = name(gcj)
             if A[i,j]!=0
-                append!(connectivity,[(groups[group_id[i],2],groups[group_id[j],2])=>A[i,j]])
+                append!(connectivity,[(name_i,name_j)=>Int(A[i,j])])
             end
         end
     end
     return connectivity
 end
 
+
+
+#TODO: move this to Clapeyron?
+"""
+    @gcstring_str(str)
+
+    given a string of the form "Group1:n1;Group2:2", returns ["Group1" => n1,"Group2 => n2]
+
+"""
+macro gcstring_str(str)
+    gcpairs = split(str,';')
+    res = Pair{String,Int}[]
+    for gci in gcpairs
+        gc,_ni = split_2(gci,':')
+        ni = parse(Int,_ni)
+        push!(res,gc => ni)
+    end
+    res
+end
+
 export get_groups_from_name, get_groups_from_smiles
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
new file mode 100644
index 0000000..4699b6d
--- /dev/null
+++ b/test/runtests.jl
@@ -0,0 +1,20 @@
+using GCIdentifier
+using Test
+using GCIdentifier: @gcstring_str
+
+function test_gcmatch(groups,smiles,result)
+    obtained = Set(get_groups_from_smiles(smiles,groups)[2])
+    evaluated = Set(result)
+    @test isequal(obtained,evaluated)
+end
+
+test_gcmatch(groups) = (smiles,result) -> test_gcmatch(groups,smiles,result)
+
+@testset "UNIFAC" begin
+    #http://www.aim.env.uea.ac.uk/aim/info/UNIFACgroups.html
+    unifac = test_gcmatch(UNIFACGroups)
+    unifac("CC",gcstring"CH3:2")
+    unifac("CCCC",gcstring"CH3:2;CH2:2")
+    unifac("CC(C)C",gcstring"CH3:3;CH:1")
+    unifac("CC(C)(C)C",gcstring"CH3:4;C:1")
+end
\ No newline at end of file