diff --git a/setup.py b/setup.py index c19390b..ac4208c 100755 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ setup(name='pytorch_cgx', packages=['cgx_utils'], - version='0.1.0', + version='0.1.1', description='pytorch extension adding a backend ' 'supporting allreduce of quantized buffers.', long_description=long_description, @@ -74,7 +74,7 @@ author='Ilia Markov', author_email='ilia.markov@ist.ac.at', url='https://github.com/IST-DASLab/torch_cgx/', - download_url="https://github.com/IST-DASLab/torch_cgx/archive/refs/tags/v0.1.0.tar.gz", + download_url="https://github.com/IST-DASLab/torch_cgx/archive/refs/tags/v0.1.1.tar.gz", ext_modules=[cpp_extension.CUDAExtension('torch_cgx', sources=src, include_dirs=include_dirs, extra_compile_args={'cxx': cxx_compile_args, 'nvcc': nvcc_compile_args}, diff --git a/src/common/compression/cuda_compression_operations.cu b/src/common/compression/cuda_compression_operations.cu index 00d19ae..2694d62 100755 --- a/src/common/compression/cuda_compression_operations.cu +++ b/src/common/compression/cuda_compression_operations.cu @@ -794,8 +794,8 @@ void CUDA_dequantize_maxmin(const unsigned char *input_data, int num_threads = THREADS_PER_BLOCK_DECOMPRESS; int num_blocks = BLOCKS_PER_GRID((num_elems + PACK_SIZE - 1) / PACK_SIZE, num_threads); -// DEQUANTIZE1(input, meta_info, output, num_elems, bucket_size, bits, -// stream, num_blocks, num_threads); + DEQUANTIZE1(input, meta_info, output, num_elems, bucket_size, bits, + stream, num_blocks, num_threads); } /* Functions declarations */ diff --git a/test/test_qmpi.py b/test/test_cgx.py similarity index 100% rename from test/test_qmpi.py rename to test/test_cgx.py