diff --git a/R/Cluster.R b/R/Cluster.R
index 1b11203..1e528e2 100644
--- a/R/Cluster.R
+++ b/R/Cluster.R
@@ -176,6 +176,7 @@ example_path <- function(file = NULL) {
   return(path)
 }
 
+
 #' Read Count
 #'
 #' @export
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 2ed5e48..48433bc 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -9,6 +9,10 @@ WriteColumnFile <- function(xPosition, yPosition, data, cutoff, countTable, save
     invisible(.Call('_clustur_WriteColumnFile', PACKAGE = 'clustur', xPosition, yPosition, data, cutoff, countTable, saveLocation))
 }
 
+DetermineIfPhylipOrColumnFile <- function(filePath) {
+    .Call('_clustur_DetermineIfPhylipOrColumnFile', PACKAGE = 'clustur', filePath)
+}
+
 ProcessDistanceFiles <- function(filePath, countTable, cutoff, isSim) {
     .Call('_clustur_ProcessDistanceFiles', PACKAGE = 'clustur', filePath, countTable, cutoff, isSim)
 }
diff --git a/src/Adapters/CountTableAdapter.h b/src/Adapters/CountTableAdapter.h
index f8bcfdc..58ae96a 100644
--- a/src/Adapters/CountTableAdapter.h
+++ b/src/Adapters/CountTableAdapter.h
@@ -27,11 +27,13 @@ class CountTableAdapter {
     Rcpp::DataFrame GetCountTable() const {return countTable;}
     Rcpp::DataFrame ReCreateDataFrame() const;
 private:
+    void CreateNameToIndex();
     struct IndexAbundancePair {
         int groupIndex;
         int sequenceIndex;
         double abundance;
     };
+    std::unordered_map<std::string, size_t> nameToRowIndex;
     std::vector<std::string> sampleNames;
     std::unordered_map<std::string, std::vector<double>> dataFrameMap;
     std::vector<std::string> groups;
diff --git a/src/CountTableAdapter.cpp b/src/CountTableAdapter.cpp
index e48ee2c..9a2968e 100644
--- a/src/CountTableAdapter.cpp
+++ b/src/CountTableAdapter.cpp
@@ -29,6 +29,7 @@ bool CountTableAdapter::CreateDataFrameMap(const Rcpp::DataFrame &countTable) {
     // We only want the actual group names. so everything after
     groups.insert(groups.end(), columnNames.begin() + 2, columnNames.end());
     this->countTable = countTable;
+    CreateNameToIndex();
     return true;
 }
 
@@ -86,29 +87,27 @@ bool CountTableAdapter::CreateDataFrameMapFromSparseCountTable(const Rcpp::DataF
     dataFrameMap = data;
     // In a count table, the first to columns are the sequence and the total abundance.
     // We only want the actual group names. so everything after
-
     this->countTable = countTable;
+    CreateNameToIndex();
     return true;
 
 }
 
 double CountTableAdapter::FindAbundanceBasedOnGroup(const std::string &group, const std::string &sampleName) const {
-    if (std::find(groups.begin(), groups.end(), group) == groups.end())
-        return -1; //Not Found, may need to throw and execption...
-    if (std::find(sampleNames.begin(), sampleNames.end(), sampleName) == sampleNames.end())
-        return -1; //Not Found, may need to throw and execption...
+    // We will preprocess the find during hte read dist process. So remove special checks
+    // - Protip hashmap find is faster than vector
+    if(nameToRowIndex.find(sampleName) == nameToRowIndex.end())
+        return -1;
     const std::vector<double> groupCol = GetColumnByName(group);
-    const long index = std::distance(sampleNames.begin(), std::find(sampleNames.begin(),
-        sampleNames.end(), sampleName));
-    return dataFrameMap.at(group)[index];
+    return dataFrameMap.at(group)[nameToRowIndex.at(sampleName)];
 }
 
 double CountTableAdapter::FindTotalAbundance(const std::string &sampleName) const {
-    if(std::find(sampleNames.begin(), sampleNames.end(), sampleName) == sampleNames.end())
-        return -1; // Not found
-    const long index = std::distance(sampleNames.begin(), std::find(sampleNames.begin(),
-      sampleNames.end(), sampleName));
-    return dataFrameMap.at("total")[index];
+    // We will preprocess the find during hte read dist process. So remove special checks
+    // - Protip hashmap find is faster than vector
+    if(nameToRowIndex.find(sampleName) == nameToRowIndex.end())
+        return -1;
+    return dataFrameMap.at("total")[nameToRowIndex.at(sampleName)];
 }
 
 std::string CountTableAdapter::GetNameByIndex(const int index) const {
@@ -148,6 +147,13 @@ Rcpp::DataFrame CountTableAdapter::ReCreateDataFrame() const {
     return countTable;
 }
 
+
+void CountTableAdapter::CreateNameToIndex() {
+    for(size_t i = 0; i < sampleNames.size(); i++) {
+        nameToRowIndex[sampleNames[i]] = i;
+    }
+}
+
 // Gets every column but the first column (the sequence names)
 std::vector<double> CountTableAdapter::GetColumnByName(const std::string &name) const {
     if (dataFrameMap.find(name) != dataFrameMap.end())
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 2115726..c7a340b 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -40,16 +40,27 @@ BEGIN_RCPP
     return R_NilValue;
 END_RCPP
 }
+// DetermineIfPhylipOrColumnFile
+bool DetermineIfPhylipOrColumnFile(const std::string& filePath);
+RcppExport SEXP _clustur_DetermineIfPhylipOrColumnFile(SEXP filePathSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::string& >::type filePath(filePathSEXP);
+    rcpp_result_gen = Rcpp::wrap(DetermineIfPhylipOrColumnFile(filePath));
+    return rcpp_result_gen;
+END_RCPP
+}
 // ProcessDistanceFiles
-SEXP ProcessDistanceFiles(const std::string& filePath, const Rcpp::DataFrame& countTable, double cutoff, bool isSim);
+SEXP ProcessDistanceFiles(const std::string& filePath, const Rcpp::DataFrame& countTable, const double cutoff, const bool isSim);
 RcppExport SEXP _clustur_ProcessDistanceFiles(SEXP filePathSEXP, SEXP countTableSEXP, SEXP cutoffSEXP, SEXP isSimSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const std::string& >::type filePath(filePathSEXP);
     Rcpp::traits::input_parameter< const Rcpp::DataFrame& >::type countTable(countTableSEXP);
-    Rcpp::traits::input_parameter< double >::type cutoff(cutoffSEXP);
-    Rcpp::traits::input_parameter< bool >::type isSim(isSimSEXP);
+    Rcpp::traits::input_parameter< const double >::type cutoff(cutoffSEXP);
+    Rcpp::traits::input_parameter< const bool >::type isSim(isSimSEXP);
     rcpp_result_gen = Rcpp::wrap(ProcessDistanceFiles(filePath, countTable, cutoff, isSim));
     return rcpp_result_gen;
 END_RCPP
@@ -132,6 +143,7 @@ RcppExport SEXP run_testthat_tests(SEXP);
 static const R_CallMethodDef CallEntries[] = {
     {"_clustur_WritePhylipFile", (DL_FUNC) &_clustur_WritePhylipFile, 6},
     {"_clustur_WriteColumnFile", (DL_FUNC) &_clustur_WriteColumnFile, 6},
+    {"_clustur_DetermineIfPhylipOrColumnFile", (DL_FUNC) &_clustur_DetermineIfPhylipOrColumnFile, 1},
     {"_clustur_ProcessDistanceFiles", (DL_FUNC) &_clustur_ProcessDistanceFiles, 4},
     {"_clustur_ProcessSparseMatrix", (DL_FUNC) &_clustur_ProcessSparseMatrix, 6},
     {"_clustur_GetDistanceDataFrame", (DL_FUNC) &_clustur_GetDistanceDataFrame, 1},
diff --git a/src/main.cpp b/src/main.cpp
index d940c91..9fe75c1 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -13,7 +13,6 @@
 #include "MothurDependencies/ColumnDistanceMatrixReader.h"
 #include "MothurDependencies/SharedFileBuilder.h"
 #include "Adapters/DistanceFileReader.h"
-#include "Tests/OptimatrixAdapterTestFixture.h"
 #if DEBUG_RCPP
 #include <Rcpp.h>
 #include <cctype>
@@ -53,9 +52,8 @@ Rcpp::DataFrame CreateSharedDataFrame(const CountTableAdapter& countTable, const
 }
 
 
-
 //[[Rcpp::export]]
-SEXP ProcessDistanceFiles(const std::string& filePath, const Rcpp::DataFrame& countTable, double cutoff, bool isSim) {
+bool DetermineIfPhylipOrColumnFile(const std::string& filePath) {
     std::fstream data(filePath);
     std::unordered_map<bool, std::string> map;
     map[true] = "This is a phylip file. Processing now...";
@@ -77,19 +75,26 @@ SEXP ProcessDistanceFiles(const std::string& filePath, const Rcpp::DataFrame& co
         isPhylip = false;
     Rcpp::Rcout << map[isPhylip] << "\n";
     data.close();
+    return isPhylip;
+}
+
+//[[Rcpp::export]]
+SEXP ProcessDistanceFiles(const std::string& filePath, const Rcpp::DataFrame& countTable, const double cutoff,
+    const bool isSim) {
+    const bool isPhylip = DetermineIfPhylipOrColumnFile(filePath);
 
     CountTableAdapter adapter;
     adapter.CreateDataFrameMap(countTable);
     if(isPhylip) {
         DistanceFileReader* read = new ReadPhylipMatrix(cutoff, isSim);
-        std::vector<RowData> rowDataMatrix = read->ReadToRowData(filePath);
+        const std::vector<RowData> rowDataMatrix = read->ReadToRowData(filePath);
         read->SetCountTable(adapter);
         read->SetRowDataMatrix(rowDataMatrix);
         read->ReadRowDataMatrix(rowDataMatrix);
         return Rcpp::XPtr<DistanceFileReader>(read);
     }
     DistanceFileReader* read = new ColumnDistanceMatrixReader(cutoff, isSim);
-    std::vector<RowData> rowDataMatrix = read->ReadToRowData(adapter, filePath);
+    const std::vector<RowData> rowDataMatrix = read->ReadToRowData(adapter, filePath);
     read->SetCountTable(adapter);
     read->SetRowDataMatrix(rowDataMatrix);
     read->ReadRowDataMatrix(rowDataMatrix);
diff --git a/tests/testthat/extdata/sparse_matrix_data.RDS b/tests/testthat/extdata/sparse_matrix_data.RDS
deleted file mode 100644
index 572b262..0000000
Binary files a/tests/testthat/extdata/sparse_matrix_data.RDS and /dev/null differ
diff --git a/tests/testthat/test-test-opticluster.R b/tests/testthat/test-test-opticluster.R
index c89b93a..dfc9582 100644
--- a/tests/testthat/test-test-opticluster.R
+++ b/tests/testthat/test-test-opticluster.R
@@ -140,6 +140,16 @@ test_that("Read dist can read column and phylip files", {
   expect_true(nrow(get_distance_data_frame(distance_data_phylip)) == 9604)
 })
 
+
+test_that("We can determine if a file is phylip or not", {
+  is_not_phylip <-
+    DetermineIfPhylipOrColumnFile(test_path("extdata", "amazon_column.dist"))
+  is_phylip <-
+    DetermineIfPhylipOrColumnFile(test_path("extdata", "amazon_phylip.dist"))
+  expect_true(is_phylip)
+  expect_false(is_not_phylip)
+})
+
 test_that("Validate Count Table returns a valid count table", {
   count_table <- read.delim(test_path("extdata", "amazon.count_table"))
   validated_count_table <- validate_count_table(count_table)