You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
mmdetection3.3.0 cuda 12.4 pytorch 2.5.0 mmengine 0.10.6 训练时提示
Traceback (most recent call last):
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\queues.py", line 246, in _feed
send_bytes(obj)
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 200, in send_bytes
self._send_bytes(m[offset:offset + size])
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 289, in _send_bytes
ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Traceback (most recent call last):
File "D:\python\mmdetection-main\tools\train.py", line 121, in
main()
File "D:\python\mmdetection-main\tools\train.py", line 117, in main
OSError: [WinError 87] 参数错误。
是为什么?
是runner.run() 报的错
下面是我的系统配置
System environment:
sys.platform: win32
Python: 3.11.11 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:34:19) [MSC v.1929 64 bit (AMD64)]
CUDA available: True
MUSA available: False
numpy_random_seed: 400595520
GPU 0: NVIDIA GeForce RTX 3090
CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4
NVCC: Cuda compilation tools, release 12.4, V12.4.131
MSVC: 用于 x64 的 Microsoft (R) C/C++ 优化编译器 19.42.34436 版
GCC: n/a
PyTorch: 2.5.0
PyTorch compiling details: PyTorch built with:
C++ Version: 201703
MSVC 192930154
Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
from mmengine.config import Config, DictAction
from mmengine.registry import RUNNERS
from mmengine.runner import Runner
from mmdet.utils import setup_cache_size_limit_of_dynamo
def parse_args():
parser = argparse.ArgumentParser(description='Train a detector')
parser.add_argument('config', help='train config file path')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--amp',
action='store_true',
default=False,
help='enable automatic-mixed-precision training')
parser.add_argument(
'--auto-scale-lr',
action='store_true',
help='enable automatically scaling LR.')
parser.add_argument(
'--resume',
nargs='?',
type=str,
const='auto',
help='If specify checkpoint path, resume from it, while if not '
'specify, try to auto resume from the latest checkpoint '
'in the work directory.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
# When using PyTorch version >= 2.0.0, the torch.distributed.launch
# will pass the --local-rank parameter to tools/train.py instead
# of --local_rank.
parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
def main():
args = parse_args()
# Reduce the number of repeated compilations and improve
# training speed.
setup_cache_size_limit_of_dynamo()
# load config
cfg = Config.fromfile(args.config)
cfg.launcher = args.launcher
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
# enable automatic-mixed-precision training
if args.amp is True:
cfg.optim_wrapper.type = 'AmpOptimWrapper'
cfg.optim_wrapper.loss_scale = 'dynamic'
# enable automatically scaling LR
if args.auto_scale_lr:
if 'auto_scale_lr' in cfg and \
'enable' in cfg.auto_scale_lr and \
'base_batch_size' in cfg.auto_scale_lr:
cfg.auto_scale_lr.enable = True
else:
raise RuntimeError('Can not find "auto_scale_lr" or '
'"auto_scale_lr.enable" or '
'"auto_scale_lr.base_batch_size" in your'
' configuration file.')
# resume is determined in this priority: resume from > auto_resume
if args.resume == 'auto':
cfg.resume = True
cfg.load_from = None
elif args.resume is not None:
cfg.resume = True
cfg.load_from = args.resume
# build the runner from config
if 'runner_type' not in cfg:
# build the default runner
runner = Runner.from_cfg(cfg)
else:
# build customized runner from the registry
# if 'runner_type' is set in the cfg
runner = RUNNERS.build(cfg)
# start training
runner.train()
if name == 'main':
main()
Reproduces the problem - command or script
新配置继承了基本配置,并做了必要的修改
base = '../cascade-mask-rcnn_x101-64x4d_fpn_20e_coco.py'
我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数
model = dict(
roi_head=dict(
bbox_head=[dict(type='Shared2FCBBoxHead',num_classes=1),dict(type='Shared2FCBBoxHead',num_classes=1),dict(type='Shared2FCBBoxHead',num_classes=1)],
mask_head=dict(type='FCNMaskHead',num_classes=1)),
backbone=dict(init_cfg=None),
)
Traceback (most recent call last):
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\queues.py", line 246, in _feed
send_bytes(obj)
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 200, in send_bytes
self._send_bytes(m[offset:offset + size])
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 289, in _send_bytes
ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Traceback (most recent call last):
File "D:\python\mmdetection-main\tools\train.py", line 121, in
main()
File "D:\python\mmdetection-main\tools\train.py", line 117, in main
OSError: [WinError 87] 参数错误。
Additional information
Traceback (most recent call last):
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\queues.py", line 246, in _feed
send_bytes(obj)
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 200, in send_bytes
self._send_bytes(m[offset:offset + size])
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 289, in _send_bytes
ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Traceback (most recent call last):
File "D:\python\mmdetection-main\tools\train.py", line 121, in
main()
File "D:\python\mmdetection-main\tools\train.py", line 117, in main
OSError: [WinError 87] 参数错误。 有可能是什么东西版本的问题,已经核对过cuda pytorch 版本没问题了
The text was updated successfully, but these errors were encountered:
Prerequisite
Environment
mmdetection3.3.0 cuda 12.4 pytorch 2.5.0 mmengine 0.10.6 训练时提示
Traceback (most recent call last):
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\queues.py", line 246, in _feed
send_bytes(obj)
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 200, in send_bytes
self._send_bytes(m[offset:offset + size])
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 289, in _send_bytes
ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Traceback (most recent call last):
File "D:\python\mmdetection-main\tools\train.py", line 121, in
main()
File "D:\python\mmdetection-main\tools\train.py", line 117, in main
OSError: [WinError 87] 参数错误。
是为什么?
是runner.run() 报的错
下面是我的系统配置
System environment:
sys.platform: win32
Python: 3.11.11 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:34:19) [MSC v.1929 64 bit (AMD64)]
CUDA available: True
MUSA available: False
numpy_random_seed: 400595520
GPU 0: NVIDIA GeForce RTX 3090
CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4
NVCC: Cuda compilation tools, release 12.4, V12.4.131
MSVC: 用于 x64 的 Microsoft (R) C/C++ 优化编译器 19.42.34436 版
GCC: n/a
PyTorch: 2.5.0
PyTorch compiling details: PyTorch built with:
C++ Version: 201703
MSVC 192930154
Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
Intel(R) MKL-DNN v3.5.3 (Git Hash 66f0cb9eb66affd2da3bf5f8d897376f04aae6af)
OpenMP 2019
LAPACK is enabled (usually provided by MKL)
CPU capability usage: AVX2
CUDA Runtime 12.4
NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90
CuDNN 90.1
Magma 2.5.4
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.4, CUDNN_VERSION=9.1.0, CXX_COMPILER=C:/cb/pytorch_1000000000000/work/tmp_bin/sccache-cl.exe, CXX_FLAGS=/DWIN32 /D_WINDOWS /GR /EHsc /Zc:__cplusplus /bigobj /FS /utf-8 -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE /wd4624 /wd4068 /wd4067 /wd4267 /wd4661 /wd4717 /wd4244 /wd4804 /wd4273, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=OFF, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF,
TorchVision: 0.20.0
OpenCV: 4.11.0
MMEngine: 0.10.6
Runtime environment:
cudnn_benchmark: False
mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}
dist_cfg: {'backend': 'nccl'}
seed: 400595520
Distributed launcher: none
Distributed training: False
GPU number: 1
OrderedDict([('sys.platform', 'win32'), ('Python', '3.11.11 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:34:19) [MSC v.1929 64 bit (AMD64)]'), ('CUDA available', True), ('MUSA available', False), ('numpy_random_seed', np.uint32(2147483648)), ('GPU 0', 'NVIDIA GeForce RTX 3090'), ('CUDA_HOME', 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4'), ('NVCC', 'Cuda compilation tools, release 12.4, V12.4.131'), ('MSVC', '用于 x64 的 Microsoft (R) C/C++ 优化编译 器 19.42.34436 版'), ('GCC', 'n/a'), ('PyTorch', '2.5.0'), ('PyTorch compiling details', 'PyTorch built with:\n - C++ Version: 201703\n - MSVC 192930154\n - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.5.3 (Git Hash 66f0cb9eb66affd2da3bf5f8d897376f04aae6af)\n - OpenMP 2019\n - LAPACK is enabled (usually provided by MKL)\n - CPU capability usage: AVX2\n - CUDA Runtime 12.4\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.1\n - Magma 2.5.4\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.4, CUDNN_VERSION=9.1.0, CXX_COMPILER=C:/cb/pytorch_1000000000000/work/tmp_bin/sccache-cl.exe, CXX_FLAGS=/DWIN32 /D_WINDOWS /GR /EHsc /Zc:__cplusplus /bigobj /FS /utf-8 -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE /wd4624 /wd4068 /wd4067 /wd4267 /wd4661 /wd4717 /wd4244 /wd4804 /wd4273, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=OFF, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n'), ('TorchVision', '0.20.0'), ('OpenCV', '4.11.0'), ('MMEngine', '0.10.6')])
Reproduces the problem - code sample
mmdetection3.3的train.py
Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
from mmengine.config import Config, DictAction
from mmengine.registry import RUNNERS
from mmengine.runner import Runner
from mmdet.utils import setup_cache_size_limit_of_dynamo
def parse_args():
parser = argparse.ArgumentParser(description='Train a detector')
parser.add_argument('config', help='train config file path')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--amp',
action='store_true',
default=False,
help='enable automatic-mixed-precision training')
parser.add_argument(
'--auto-scale-lr',
action='store_true',
help='enable automatically scaling LR.')
parser.add_argument(
'--resume',
nargs='?',
type=str,
const='auto',
help='If specify checkpoint path, resume from it, while if not '
'specify, try to auto resume from the latest checkpoint '
'in the work directory.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
# When using PyTorch version >= 2.0.0, the
torch.distributed.launch
# will pass the
--local-rank
parameter totools/train.py
instead# of
--local_rank
.parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
def main():
args = parse_args()
if name == 'main':
main()
Reproduces the problem - command or script
新配置继承了基本配置,并做了必要的修改
base = '../cascade-mask-rcnn_x101-64x4d_fpn_20e_coco.py'
我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数
model = dict(
roi_head=dict(
bbox_head=[dict(type='Shared2FCBBoxHead',num_classes=1),dict(type='Shared2FCBBoxHead',num_classes=1),dict(type='Shared2FCBBoxHead',num_classes=1)],
mask_head=dict(type='FCNMaskHead',num_classes=1)),
backbone=dict(init_cfg=None),
)
修改数据集相关配置
data_root = 'data/buildings/tiff/'
metainfo = {
'classes': ('building', ),
'palette': [
(220, 20, 60),
]
}
train_dataloader = dict(
batch_size=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='train/coco_annotations.json',
data_prefix=dict(img='train/')),
num_workers=2)
train_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(keep_ratio=True, scale=( # 会对图片进行缩放 ,keep_ratio 是true表示会等比例缩放 scale 表示图片会缩放到的大小
1333, 800
#512 , 512
), type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PackDetInputs'),
]
val_dataloader = dict(
batch_size=8, #默认就是1
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='val/coco_annotations.json',
data_prefix=dict(img='val/')),
num_workers=2)
val_evaluator = dict(
dataset=dict(
ann_file='data/balloon/val/val.json',
)
type='CocoMetric',
metric=['bbox', 'segm'],
format_only=False)
test_dataloader = dict(
batch_size=8, #默认就是1
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='test/coco_annotations.json',
data_prefix=dict(img='test/')),
num_workers=2)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001))
修改评价指标相关配置
val_evaluator = dict(ann_file=data_root + 'val/coco_annotations.json')
test_evaluator = dict(ann_file=data_root + 'test/coco_annotations.json')
log_config = dict(
interval=10, # 每10个iter记录一次
hooks=[
dict(type='TextLoggerHook'), # 文本日志
dict(type='TensorboardLoggerHook'), # TensorBoard 日志
])
使用预训练的 Mask R-CNN 模型权重来做初始化,可以提高模型性能
load_from = 'cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth'
Reproduces the problem - error message
Traceback (most recent call last):
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\queues.py", line 246, in _feed
send_bytes(obj)
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 200, in send_bytes
self._send_bytes(m[offset:offset + size])
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 289, in _send_bytes
ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Traceback (most recent call last):
File "D:\python\mmdetection-main\tools\train.py", line 121, in
main()
File "D:\python\mmdetection-main\tools\train.py", line 117, in main
OSError: [WinError 87] 参数错误。
Additional information
Traceback (most recent call last):
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\queues.py", line 246, in _feed
send_bytes(obj)
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 200, in send_bytes
self._send_bytes(m[offset:offset + size])
File "D:\SOFT\miniconda\envs\open-mmlab\Lib\multiprocessing\connection.py", line 289, in _send_bytes
ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Traceback (most recent call last):
File "D:\python\mmdetection-main\tools\train.py", line 121, in
main()
File "D:\python\mmdetection-main\tools\train.py", line 117, in main
OSError: [WinError 87] 参数错误。 有可能是什么东西版本的问题,已经核对过cuda pytorch 版本没问题了
The text was updated successfully, but these errors were encountered: