CogStack · mart-r · Jan 7, 2025 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ '3.9', '3.10', '3.11' ]
+        python-version: [ '3.9', '3.10', '3.11', '3.12' ]
       max-parallel: 4
 
     steps:

diff --git a/install_requires.txt b/install_requires.txt
@@ -1,11 +1,11 @@
-'numpy>=1.22.0,<1.26.0'  # 1.22.0 is first to support python 3.11; post 1.26.0 there's issues with scipy
+'numpy>=1.26.0,<2.0.0'  # 1.26 is first to support 3.12; cannod support numpy2 due to spacy
 'pandas>=1.4.2' # first to support 3.11
 'gensim>=4.3.0,<5.0.0'  # 5.3.0 is first to support 3.11; avoid major version bump
-'spacy>=3.6.0,<3.8.0'  # 3.8 only supports numpy2 which we can't use due to other dependencies
-'scipy~=1.9.2'  # 1.9.2 is first to support 3.11
+'spacy>=3.6.0,<4.0.0'  # avoid major bump
+'scipy>=1.9.2,<1.14.0'  # 1.9.2 is first to support 3.11; 1.14.0 does not support 3.9
 'transformers>=4.34.0,<5.0.0'  # avoid major version bump
 'accelerate>=0.23.0' # required by Trainer class in de-id
-'torch>=1.13.0,<3.0.0' # 1.13 is first to support 3.11; 2.1.2 has been compatible, but avoid major 3.0.0 for now
+'torch>=2.4.0,<3.0.0' # 2.4.0 is first to support 3.12; avoid major 3.0.0 for now
 'tqdm>=4.27'
 'scikit-learn>=1.1.3,<2.0.0'  # 1.1.3 is first to supporrt 3.11; avoid major version bump
 'dill>=0.3.6,<1.0.0' # stuff saved in 0.3.6/0.3.7 is not always compatible with 0.3.4/0.3.5; avoid major bump

diff --git a/medcat/cdb.py b/medcat/cdb.py
@@ -818,15 +818,17 @@ def most_similar(self,
             sim_data['sim_vectors_cuis'] = np.array(sim_vectors_cuis)
 
         # Select appropriate concepts
-        type_id_inds = np.arange(0, len(sim_data['sim_vectors_type_ids']))
+        type_id_inds = np.arange(0, len(sim_data['sim_vectors_type_ids']), dtype=np.int32)
         if len(type_id_filter) > 0:
-            type_id_inds = np.array([], dtype=np.int32)
+            # NOTE: change in numpy 2
+            type_id_inds = np.array([], dtype=np.int32)  # type: ignore
             for type_id in type_id_filter:
                 type_id_inds = np.union1d(np.array([ind for ind, type_ids in enumerate(sim_data['sim_vectors_type_ids']) if type_id in type_ids]),
                         type_id_inds)
         cnt_inds = np.arange(0, len(sim_data['sim_vectors_counts']))
         if min_cnt > 0:
-            cnt_inds = np.where(sim_data['sim_vectors_counts'] >= min_cnt)[0]
+            # NOTE: change in numpy 2
+            cnt_inds = np.where(sim_data['sim_vectors_counts'] >= min_cnt)[0]  # type: ignore
         # Intersect cnt and type_id
         inds = np.intersect1d(type_id_inds, cnt_inds)
 

diff --git a/medcat/vocab.py b/medcat/vocab.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pickle
-from typing import Optional, List, Dict
+from typing import Optional, List, Dict, cast
 import logging
 
 
@@ -216,7 +216,8 @@ def get_negative_samples(self, n: int = 6, ignore_punct_and_num: bool = False) -
         if len(self.cum_probs) == 0:
             self.make_unigram_table()
         random_vals = np.random.rand(n)
-        inds = np.searchsorted(self.cum_probs, random_vals).tolist()
+        # NOTE: there's a change in numpy
+        inds = cast(List[int], np.searchsorted(self.cum_probs, random_vals).tolist())
 
         if ignore_punct_and_num:
             # Do not return anything that does not have letters in it

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,5 +1,5 @@
 .
-https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl
+https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl
 flake8~=7.0.0
 darglint~=1.8.1
 mypy>=1.7.0,<1.12.0

diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
 .
-https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl
+https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.2/en_core_web_md-3.7.2-py3-none-any.whl
diff --git a/setup.py b/setup.py
@@ -36,6 +36,7 @@
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ],

diff --git a/tests/utils/test_memory_optimiser.py b/tests/utils/test_memory_optimiser.py
@@ -254,8 +254,8 @@ def test_optimisation_round_trip_cuis(self):
             with self.subTest(f'{name}'):
                 self.assertIsInstance(before, dict)
                 self.assertIsInstance(after, dict)
-                self.assertEquals(len(before), len(after))
-                self.assertEquals(before, after)
+                self.assertEqual(len(before), len(after))
+                self.assertEqual(before, after)
 
     def test_optimisation_round_trip_snames(self):
         snames_before = self.cdb.snames
@@ -264,8 +264,8 @@ def test_optimisation_round_trip_snames(self):
         snames_after = self.cdb.snames
         self.assertIsInstance(snames_before, set)
         self.assertIsInstance(snames_after, set)
-        self.assertEquals(len(snames_before), len(snames_after))
-        self.assertEquals(snames_before, snames_after)
+        self.assertEqual(len(snames_before), len(snames_after))
+        self.assertEqual(snames_before, snames_after)
 
     def test_optimisation_round_trip_dirty(self):
         memory_optimiser.perform_optimisation(self.cdb)