-
-
Notifications
You must be signed in to change notification settings - Fork 56.5k
Description
System Information
OpenCV python version: 4.8.1.78
Operating System / Platform: Ubuntu 20.04
Python version: 3.8.10 with ROS Noetic
Camera model: Intel Realsense D435
Detailed description
Hi, I'm using Realsense Camera and Yolov5 segmentation.
When I run this code on Windows, it works normally
import argparse
import os
import platform
import sys
from pathlib import Path
from PIL import ImageDraw, ImageFont
import torch
import numpy as np
import time
import pyrealsense2 as rs
import math
import threading
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_boxes, scale_segments,
strip_optimizer)
from utils.plots import Annotator, colors, save_one_box
from utils.segment.general import masks2segments, process_mask, process_mask_native
from utils.torch_utils import select_device, smart_inference_mode
from math import atan2, cos, sin, sqrt, pi, asin, acos
cfg = rs.config() #ok
cfg.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) #ok
cfg.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) #ok
pipe = rs.pipeline()
profile = pipe.start(cfg)
align_to = rs.stream.color
align = rs.align(align_to)
intr = profile.get_stream(rs.stream.color).as_video_stream_profile().get_intrinsics() #ok
def find_plane(points):
c = np.mean(points, axis=0) # Calculate average of the array
r0 = points - c # Calculate centroid
u, s, v = np.linalg.svd(r0) # Perform SVD
nv = v[-1, :] # Set normal vector for the last column of v
ds = np.dot(points, nv) # Calculate distance between each point
param = np.r_[nv, -np.mean(ds)]
return param
def drawAxis(img, p_, q_, colour, scale):
p = list(p_)
q = list(q_)
angle = atan2(p[1] - q[1], p[0] - q[0]) # angle in radians
hypotenuse = sqrt((p[1] - q[1]) * (p[1] - q[1]) + (p[0] - q[0]) * (p[0] - q[0]))
# Here we lengthen the arrow by a factor of scale
q[0] = p[0] - scale * hypotenuse * cos(angle)
q[1] = p[1] - scale * hypotenuse * sin(angle)
cv2.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), colour, 1, cv2.LINE_AA)
# create the arrow hooks
p[0] = q[0] + 9 * cos(angle + pi / 4)
p[1] = q[1] + 9 * sin(angle + pi / 4)
cv2.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), colour, 1, cv2.LINE_AA)
p[0] = q[0] + 9 * cos(angle - pi / 4)
p[1] = q[1] + 9 * sin(angle - pi / 4)
cv2.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), colour, 1, cv2.LINE_AA)
def get_frame_stream():
# Wait for a coherent pair of frames: depth and color
frames = pipe.wait_for_frames()
aligned_frames = align.process(frames)
depth_frame = aligned_frames.get_depth_frame()
color_frame = aligned_frames.get_color_frame()
depth_frame.get_height()
if not depth_frame or not color_frame:
# If there is no frame, probably camera not connected, return False
print(
"Error, impossible to get the frame, make sure that the Intel Realsense camera is correctly connected")
return None, None
# Apply filter to fill the Holes in the depth image
color_intrin = color_frame.profile.as_video_stream_profile().intrinsics
spatial = rs.spatial_filter()
spatial.set_option(rs.option.holes_fill, 3)
filtered_depth = spatial.process(depth_frame)
hole_filling = rs.hole_filling_filter()
filled_depth = hole_filling.process(filtered_depth)
# Create colormap to show the depth of the Objects
colorizer = rs.colorizer()
depth_colormap = np.asanyarray(colorizer.colorize(filled_depth).get_data())
# Convert images to numpy arrays
# distance = depth_frame.get_distance(int(50),int(50))
# print("distance", distance)
depth_image = np.asanyarray(filled_depth.get_data())
color_image = np.asanyarray(color_frame.get_data())
# cv2.imshow("Colormap", depth_colormap)
# cv2.imshow("depth img", depth_image)
return True, frames, color_image, depth_image, color_frame, depth_frame, color_intrin
@smart_inference_mode()
@torch.no_grad()
def run(
# weights=ROOT / 'yolo5s-seg.pt', # model.pt path(s)
weights=ROOT /'yolo5s-seg.pt',
source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
data=ROOT / 'data/coco128-seg.yaml', # dataset.yaml path
imgsz=(640, 640), # inference size (height, width) #OG@(640, 640)
conf_thres=0.79, # confidence threshold
iou_thres=0.66, # NMS IOU threshold #OG@0.45
max_det=10, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=True, # class-agnostic NMS #overlap 2 or more labels on 1 object
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / 'runs/predict-seg', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=1, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
retina_masks=False,
WHITE = (225, 255, 255), # color for displaying #BGR
RED = (0, 0, 255), # color for displaying #BGR
GREEN = (0, 255, 0), # color for displaying #BGR
TEAL = (255, 255, 0), # color for displaying #BGR
PINK = (255, 0, 255) # color for displaying #BGR
):
source = str(source)
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
if is_url and is_file:
source = check_file(source) # download
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
bs = 1 # batch_size
view_img = check_imshow(warn=True)
#cap = cv2.VideoCapture(1)
#frame_img = cap.read()
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
bs = len(dataset)
vid_path, vid_writer = [None] * bs, [None] * bs
get_frame_stream()
# # Dataloader
# # bs = 1 # batch_size
# if webcam:
# view_img = check_imshow(warn=True)
# dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
# bs = len(dataset)
# # elif screenshot:
# # dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
# else:
# dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
# bs = 1 # batch_size
# vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
for path, im, im0s, vid_cap, s in dataset:
num_threads = threading.active_count()
with dt[0]:
frame_img = vid_cap
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
imnew = im.half() if model.fp16 else im.int() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred, proto = model(im, augment=augment, visualize=visualize)[:2]
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32)
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
###############################################################################
########################## INITIALIZING PREDICTIONS. ##########################
###############################################################################
# Process predictions
for i, det in enumerate(pred): # per image
# # Detect number of threads
# if num_threads == 2:
# print("Thread: Multi")
# elif num_threads == 1:
# print("Thread: Single")
seen += 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'{i}: '
p = Path(p) # to Path
# save_path = str(save_dir / p.name) # im.jpg
# txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
s += '%gx%g ' % im.shape[2:] # print string
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # rescale boxes to im0 size
masks2 = process_mask(proto[i], det[:, 6:], det[:, :4], imnew.shape[2:], upsample=True) # HWC
det[:, :4] = scale_boxes(imnew.shape[2:], det[:, :4], im0.shape).round() # rescale boxes to im0 size
segments2 = [
scale_segments(im0.shape if retina_masks else imnew.shape[2:], x, im0.shape, normalize=False)
for x in reversed(masks2segments(masks))] ## UN-NORMALIZED
#NumberOfElements = sizeof(arr) / sizeof(arr[0]);
#segments2_conver = np.array(segments2, dtype=np.int32)
#print(segments2_conver)
segments = [
scale_segments(im0.shape if retina_masks else im.shape[2:], x, im0.shape, normalize=True)
for x in reversed(masks2segments(masks))] ## NORMALIZED
# Print results
for c in det[:, 5].unique():
n = (det[:, 5] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
colorclass = str(names[int(c)])
# g = f"{names[int(c)]}"
# cs = str(names[int(c)])
# print("Class_3: ", cs)
# print(n)
# Mask plotting
annotator.masks(
masks,
colors=[colors(x, True) for x in det[:, 5]],
im_gpu=torch.as_tensor(im0, dtype=torch.float16).to(device).permute(2, 0, 1).flip(0).contiguous() /
255 if retina_masks else im[i])
ret, new_frame, bgr_image, depth_image, bgr_frame, depth_frame, color_intrin = get_frame_stream()
# Write results
for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])):
seg = segments[j].reshape(-1) # (n,2) to (n*2)
#print(segments[j])
line = (cls, *seg, conf) if save_conf else (cls, *seg) # label format
#with open(f'{txt_path}.txt', 'a') as f:
#f.write(('%g ' * len(line)).rstrip() % line + '\n')
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
annotator.box_label(xyxy, label, color=colors(c, True))
box = xyxy
centerx, centery = int((int(box[0]) + int(box[2])) / 2), int((int(box[1]) + int(box[3])) / 2)
center_point = centerx, centery
#distance_mm = depth_image[centery, centerx]
#print(center_point)
#ImageDraw.ImageDraw.polygon(, segments2[j], outline=colors(c, True), width=3)
#cv2.circle(vid_cap,(segments2[j]), radius=1, color=(0,0,255), thickness=1)
im0 = annotator.result()
seg_point = np.array(segments2[j], dtype=np.int32)
#cv2.putText(im0, "{} mm".format(distance_mm), (centerx, centery - 10), 0, 0.5, (0, 0, 255), 2)
#center_point_XYZ = rs.rs2_deproject_pixel_to_point(color_intrin, center_point, distance_mm)
#print(np.array(center_point_XYZ).astype(int))
#print("Center point: ", center_point)
cv2.polylines(im0, [np.array(segments2[j], dtype=np.int32)], isClosed=True, color=(0,0,0), thickness=3)
##########################################################
#################### AREA ESTIMATION. ####################
##########################################################
area = cv2.contourArea(seg_point)
ctrl_area = area
if ctrl_area > 3000 and ctrl_area < 9000: #OG@5250
"So this is the list of segmentation point segments2[j] "
#print([np.array(segments2[j], dtype=np.int32)])
"So we will move on to do the PCA for detecting vector X,Y for and object"
""
###########################################################
##################### YAW ESTIMATION. #####################
###########################################################
## Performing PCA analysis
mean = np.empty((0))
mean, eigenvectors, eigenvalues = cv2.PCACompute2(np.array(segments2[j], dtype=np.float64), mean)
## Storing the center of the object
cntr = (int(mean[0, 0]), int(mean[0, 1]))
## Drawing the principal components
cv2.circle(im0, cntr, 3, PINK, 2)
p1 = (cntr[0] + 0.02 * eigenvectors[0, 0] * eigenvalues[0, 0],
cntr[1] + 0.02 * eigenvectors[0, 1] * eigenvalues[0, 0])
p2 = (cntr[0] - 0.02 * eigenvectors[1, 0] * eigenvalues[1, 0],
cntr[1] - 0.02 * eigenvectors[1, 1] * eigenvalues[1, 0])
drawAxis(im0, cntr, p1, GREEN, 1)
drawAxis(im0, cntr, p2, TEAL, 5)
yaw = atan2(eigenvectors[0, 1], eigenvectors[0, 0]) # orientation in radians
##########################################################
##################### SIZE ESTIMATION. ###################
##########################################################
# "So in This part we will try to extract the maximum, minimum of coordinates x,y"
# "Remember that the highest point Y have the lowest value"
# "Remember that the furthest point X have the highest value"
# arr = segments2[j]
# arr4 = np.max(arr, axis=0) # find max_y and max_x
# arr5 = np.min(arr, axis=0) # find min_y and min_x
# index = np.where(arr == arr4[0])[0]
# index1 = np.where(arr == arr4[1])[0]
# index2 = np.where(arr == arr5[0])[0]
# index3 = np.where(arr == arr5[1])[0]
# length = len(index)
# length1 = len(index1)
# length2 = len(index2)
# length3 = len(index3)
# # check for top right corner _ check for x
# x = (arr[index[0]])
# if length > 1:
# for i in range(length - 1):
# x = np.row_stack([x, arr[index[i + 1]]])
# topright_max = np.max(x, axis=0)
# index_xtopright = np.where(x == topright_max[1])[0][0]
# topright_pos = x[index_xtopright]
# else:
# topright_pos = x
# # check for bot right corner _ check for y
# y = (arr[index1[0]])
# if length1 > 1:
# for i in range(length1 - 1):
# y = np.row_stack([y, arr[index1[i + 1]]])
# botright_min = np.min(y, axis=0)
# index_ybotright = np.where(y == botright_min[0])[0][0]
# botright_pos = y[index_ybotright]
# else:
# botright_pos = y
# # check for top lef corner _ check for x
# z = (arr[index2[0]])
# if length2 > 1:
# for i in range(length2 - 1):
# z = np.row_stack([z, arr[index2[i + 1]]])
# topleft_max = np.max(z, axis=0)
# index_ztopleft = np.where(z == topleft_max[1])[0][0]
# topleft_pos = z[index_ztopleft]
# else:
# topleft_pos = z
# # check for bot lef corner _ check for y
# t = (arr[index3[0]])
# if length3 > 1:
# for i in range(length3 - 1):
# t = np.row_stack([t, arr[index3[i + 1]]])
# botlef_min = np.min(t, axis=0)
# index_zbotlef = np.where(t == botlef_min[0])[0][0]
# botlef_pos = t[index_zbotlef]
# else:
# botlef_pos = t
# cv2.circle(im0, center_point, 3, (255, 0, 0))
# distance_1 = depth_image[int(topright_pos[1]), int(topright_pos[0])]
# distance_2 = depth_image[int(botright_pos[1]), int(topleft_pos[0])]
# distance_3 = depth_image[int(topleft_pos[1]), int(topleft_pos[0])]
# distance_4 = depth_image[int(botlef_pos[1]), int(botlef_pos[0])]
# point1 = rs.rs2_deproject_pixel_to_point(color_intrin, [topright_pos[0], topright_pos[1]], distance_1)
# point2 = rs.rs2_deproject_pixel_to_point(color_intrin, [botright_pos[0], topleft_pos[1]], distance_2)
# point3 = rs.rs2_deproject_pixel_to_point(color_intrin, [topleft_pos[0], topleft_pos[1]], distance_3)
# point4 = rs.rs2_deproject_pixel_to_point(color_intrin, [botlef_pos[0], botlef_pos[1]], distance_4)
# cv2.circle(im0, (int(topright_pos[0]), int(topright_pos[1])), 2, RED)
# cv2.circle(im0, (int(botright_pos[0]), int(botright_pos[1])), 2, RED)
# cv2.circle(im0, (int(topleft_pos[0]), int(topleft_pos[1])), 2, RED)
# cv2.circle(im0, (int(botlef_pos[0]), int(botlef_pos[1])), 2, RED)
##########################################################
################ ROLL, PITCH ESTIMATION. #################
##########################################################
offset_x = int((xyxy[2] - xyxy[0])/10)
offset_y = int((xyxy[3] - xyxy[1])/10)
interval_x = int((xyxy[2] - xyxy[0] -2 * offset_x)/2)
interval_y = int((xyxy[3] - xyxy[1] -2 * offset_y)/2)
points = np.zeros([9,3]) #OG@[9,3]
for i in range(3): #OG@3
for j in range(3): #OG@3
x = int(xyxy[0]) + offset_x + interval_x*i
y = int(xyxy[1]) + offset_y + interval_y*j
dist = depth_frame.get_distance(x, y)*1000 #OG@*1000
Xtemp = dist*(x - intr.ppx)/intr.fx
Ytemp = dist*(y - intr.ppy)/intr.fy
Ztemp = dist
points[j+i*3][0] = Xtemp
points[j+i*3][1] = Ytemp
points[j+i*3][2] = Ztemp
param = find_plane(points)
roll = math.atan(param[2]/param[1])*180/math.pi
if(roll < 0):
roll = roll + 90
else:
roll = roll - 90
pitch = math.atan(param[2]/param[0])*180/math.pi
if(pitch < 0):
pitch = pitch + 90
else:
pitch = pitch - 90
##########################################################
################## X, Y, Z ESTIMATION. ###################
##########################################################
object_coordinates = []
cx = int((xyxy[0] + xyxy[2])/2)
cy = int((xyxy[1] + xyxy[3])/2)
dist = depth_frame.get_distance(cx + 0, cy + 0)*1000
Xtarget = dist*(cx + 0 - intr.ppx)/intr.fx - 35 #the distance from RGB camera to realsense center
Ytarget = dist*(cy + 0 - intr.ppy)/intr.fy
Ztarget = dist
object_coordinates.append([label, Xtarget, Ztarget])
##########################################################
#################### AREA ESTIMATION. ####################
##########################################################
# area = cv2.contourArea(seg_point)
#########################################################
############## DISPLAYING PARAMETER SECTION. ############
#########################################################
#print("Totally seen: ", str(len(det)))
#print("Class: ", str(names))
# class_label = c in det[:, 5].unique()
# print("Class_1: ", str(c))
# print("Class_2: ", str(class_label))
## Displaying Class parameter ##
print("Class: ", colorclass)
## Displaying X,Y,Z parameters ##
label_coordinate = "(" + str(round(Xtarget)) + ", " + str(round(Ytarget)) + ", " + str(round(Ztarget)) + ")"
cv2.putText(im0, label_coordinate, (int((xyxy[0] + xyxy[2])/2) - 40, int((xyxy[1] + xyxy[3])/2) + 60), cv2.FONT_HERSHEY_PLAIN, 1, WHITE, thickness=1, lineType=cv2.LINE_AA)
print("(" + str(round(Xtarget)) + ", " + str(round(Ytarget)) + ", " + str(round(Ztarget)) + ")")
## Displaying Roll, Pitch, Yaw angles and Area ##
cvtR2D = np.rad2deg(yaw)
label_roll = "Roll: " + str(round(roll, 2))
label_pitch = "Pitch: " + str(round(pitch, 2))
label_yaw = "Yaw: " + str(round((-int(cvtR2D) - 90), 2))
label_area = "Area: " + str(int(area))
cv2.putText(im0, label_roll, (int((xyxy[0] + xyxy[2])/2) - 40, int((xyxy[1] + xyxy[3])/2)), cv2.FONT_HERSHEY_PLAIN, 1, WHITE, thickness=1, lineType=cv2.LINE_AA)
cv2.putText(im0, label_pitch, (int((xyxy[0] + xyxy[2])/2) - 40, int((xyxy[1] + xyxy[3])/2) + 20), cv2.FONT_HERSHEY_PLAIN, 1, WHITE, thickness=1, lineType=cv2.LINE_AA)
cv2.putText(im0, label_yaw, (int((xyxy[0] + xyxy[2])/2) - 40, int((xyxy[1] + xyxy[3])/2) + 40), cv2.FONT_HERSHEY_PLAIN, 1, WHITE, thickness=1, lineType=cv2.LINE_AA)
cv2.putText(im0, label_area, (int((xyxy[0] + xyxy[2])/2) - 40, int((xyxy[1] + xyxy[3])/2) + 80), cv2.FONT_HERSHEY_PLAIN, 1, WHITE, thickness=1, lineType=cv2.LINE_AA)
print("Roll: " + str(round(roll, 2)) + ", Pitch: " + str(round(pitch, 2)) + ", Yaw: " + str(round((-int(cvtR2D) - 90), 2)))
print("Area: ", area)
print("End One detection phase")
print()
# Stream results
if view_img:
if platform.system() == 'Linux' and p not in windows:
windows.append(p)
cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # 1 millisecond
exit()
# Print time (inference-only)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
# LOGGER.info(f"'Class_4: '{g}")
# Print results
# t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
# LOGGER.info(f'Seg:%.1fms seg, Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
# def parse_opt():
# parser = argparse.ArgumentParser()
# parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)')
# parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
# parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
# parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
# parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
# parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
# parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
# parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
# parser.add_argument('--view-img', action='store_true', help='show results')
# parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
# parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
# parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
# parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
# parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
# parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
# parser.add_argument('--augment', action='store_true', help='augmented inference')
# parser.add_argument('--visualize', action='store_true', help='visualize features')
# parser.add_argument('--update', action='store_true', help='update all models')
# parser.add_argument('--project', default=ROOT / 'runs/predict-seg', help='save results to project/name')
# parser.add_argument('--name', default='exp', help='save results to project/name')
# parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
# parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
# parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
# parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
# parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
# parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
# parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
# parser.add_argument('--retina-masks', action='store_true', help='whether to plot masks in native resolution')
# opt = parser.parse_args()
# opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
# print_args(vars(opt))
# return opt
# def main(opt):
# check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
# run(**vars(opt))
if __name__ == '__main__':
# opt = parse_opt()
# main(opt)
run()
But when I execute the code above on Ubuntu 20.04, I get this error:
[ WARN:0@4.385] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video4): can't open camera by index
[ERROR:0@4.501] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range
Traceback (most recent call last):
File "train_seg.py", line 9, in <module>
run(
File "/home/anthonia/.local/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/anthonia/.local/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/anthonia/Documents/yolov5-master/Ultimate_2.py", line 175, in run
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
File "/home/anthonia/Documents/yolov5-master/utils/dataloaders.py", line 367, in __init__
assert cap.isOpened(), f'{st}Failed to open {s}'
AssertionError: 1/1: 4... Failed to open 4
With the index 4 is the RGB channel of realsense camera.
I have tried everything, check the dev/video also but I don't know why the program does not works. Here is the list of dev/video
crw-rw-rw-+ 1 root plugdev 81, 0 Thg 11 16 10:55 /dev/video0
crw-rw-rw-+ 1 root plugdev 81, 1 Thg 11 16 10:55 /dev/video1
crw-rw-rw-+ 1 root plugdev 81, 2 Thg 11 16 10:55 /dev/video2
crw-rw-rw-+ 1 root plugdev 81, 3 Thg 11 16 10:55 /dev/video3
crw-rw-rw-+ 1 root plugdev 81, 4 Thg 11 16 10:55 /dev/video4
crw-rw-rw-+ 1 root plugdev 81, 5 Thg 11 16 10:55 /dev/video5
crw-rw----+ 1 root video 81, 6 Thg 11 16 10:55 /dev/video6
crw-rw----+ 1 root video 81, 7 Thg 11 16 10:55 /dev/video7
And I also have installed the librealsense from source and pip install pyrealsense. Please help me
Steps to reproduce
I have research all the related issue but they don't work for me. I cannot find the related post about realsense on Ubuntu and Windows. I do not know why it works on Windows.
Issue submission checklist
- I report the issue, it's not a question
- I checked the problem with documentation, FAQ, open issues, forum.opencv.org, Stack Overflow, etc and have not found any solution
- I updated to the latest OpenCV version and the issue is still there
- There is reproducer code and related data files (videos, images, onnx, etc)