I used the browse_dataset.py script to check that the training data looks good (no excess distortion and bboxes located and sized properly) and as far as I see, the same resize is used for train and validation (and test), no padding and keep_ratio=False
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
conf_dir = '/ssd/Arik/PycharmProjects/mmdetection/configs/yolox/'
img_scale = (960, 800)
max_epochs = 50
num_last_epochs = 10
interval = 1
auto_resume = False
gpu_ids = [0]
optimizer = dict(
type='SGD',
lr=0.001,
momentum=0.9,
weight_decay=0.0005,
nesterov=True,
paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0))
optimizer_config = dict(grad_clip=None)
lr_config = dict(
policy='YOLOX',
warmup='exp',
by_epoch=False,
warmup_by_epoch=True,
warmup_ratio=1,
warmup_iters=1,
num_last_epochs=10,
min_lr_ratio=0.05)
runner = dict(type='EpochBasedRunner', max_epochs=50)
checkpoint_config = dict(interval=1)
log_config = dict(
interval=50,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
custom_hooks = [
dict(type='YOLOXModeSwitchHook', num_last_epochs=10, priority=48),
dict(type='SyncNormHook', num_last_epochs=10, interval=1, priority=48),
dict(
type='ExpMomentumEMAHook',
resume_from=None,
momentum=0.0001,
priority=49)
]
model = dict(
type='YOLOX',
input_size=(960, 800),
random_size_range=(15, 25),
random_size_interval=10,
backbone=dict(type='CSPDarknet', deepen_factor=1.0, widen_factor=1.0),
neck=dict(
type='YOLOXPAFPN',
in_channels=[256, 512, 1024],
out_channels=256,
num_csp_blocks=3),
bbox_head=dict(
type='YOLOXHead', num_classes=2, in_channels=256, feat_channels=256),
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
test_cfg=dict(score_thr=0.001, nms=dict(type='nms', iou_threshold=0.65)))
dataset_type = 'CocoDataset'
data = dict(
samples_per_gpu=4,
workers_per_gpu=2,
persistent_workers=True,
train=dict(
type='MultiImageMixDataset',
dataset=dict(
type='CocoDataset',
ann_file=
'/ssd/Arik/tel_aviv/data/coco/2022_02_07_containers_filtered/coco_labels_train.json',
img_prefix='/ssd/Arik/tel_aviv/data/labeled/',
classes=['opentrashcan', 'trashcontainer'],
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True)
],
filter_empty_gt=False),
pipeline=[
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Resize', img_scale=(960, 800), keep_ratio=False),
dict(
type='FilterAnnotations',
min_gt_bbox_wh=(1, 1),
keep_empty=False),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]),
val=dict(
type='CocoDataset',
ann_file=
'/ssd/Arik/tel_aviv/data/coco/2022_02_07_containers_filtered/coco_labels_test.json',
img_prefix='/ssd/Arik/tel_aviv/data/labeled/',
classes=['opentrashcan', 'trashcontainer'],
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(960, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='CocoDataset',
ann_file=
'/ssd/Arik/tel_aviv/data/coco/2022_02_07_containers_filtered//coco_labels_test.json',
img_prefix='/ssd/Arik/tel_aviv/data/labeled/',
classes=['opentrashcan', 'trashcontainer'],
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(960, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])
]))
evaluation = dict(
save_best='auto', interval=1, dynamic_intervals=[(40, 1)], metric='bbox')
work_dir = '/ssd/Arik/tel_aviv/training/mmlab/train/yolox_960x800'
Hello,
I'm training a yolox-l model on a custom coco type dataset.
This works well when using square input resolution, the default is (640,640), but I also trained successfully on (800,800) and (1120,1120).
Using the exact same config but changing the input resolution to anything non square, e.g (960,800) the training results in 0 validation mAP consistently.
I used the browse_dataset.py script to check that the training data looks good (no excess distortion and bboxes located and sized properly) and as far as I see, the same resize is used for train and validation (and test), no padding and keep_ratio=False
Example results for (960,800):
config: