Line data Source code
1 : #include "../../includes/objectDetection/YOLOv5TRT.hpp"
2 :
3 : std::string YOLOv5TRT::lastClassName = "";
4 : std::chrono::steady_clock::time_point YOLOv5TRT::lastNotificationTime = std::chrono::steady_clock::now();
5 :
6 1 : YOLOv5TRT::YOLOv5TRT(const std::string& enginePath, const std::string& labelPath)
7 1 : : labelManager(labelPath) {
8 : // Configurar OpenCV para usar CUDA
9 1 : cv::cuda::setDevice(0);
10 1 : loadEngine(enginePath);
11 0 : allocateBuffers();
12 :
13 : // Pré-alocar buffers reutilizáveis
14 0 : channels.resize(3);
15 0 : hostDataBuffer = new float[3*640*640];
16 :
17 0 : num_classes = static_cast<int>(labelManager.getNumClasses());
18 :
19 0 : Publisher::instance(5557); //Initialize publisher
20 0 : }
21 :
22 0 : YOLOv5TRT::~YOLOv5TRT() {
23 0 : cudaStreamDestroy(stream);
24 0 : delete[] hostDataBuffer;
25 0 : delete[] outputHost;
26 0 : cudaFree(inputDevice);
27 0 : cudaFree(outputDevice);
28 0 : }
29 :
30 : /**
31 : * @brief Calcula o volume (número total de elementos) de um tensor dado suas dimensões.
32 : * @param dims Dimensões do tensor.
33 : * @return Volume total.
34 : */
35 0 : size_t YOLOv5TRT::calculateVolume(const nvinfer1::Dims& dims) {
36 0 : size_t volume = 1;
37 0 : for (int i = 0; i < dims.nbDims; ++i) {
38 0 : volume *= dims.d[i];
39 : }
40 0 : return volume;
41 : }
42 :
43 1 : void YOLOv5TRT::loadEngine(const std::string& enginePath) {
44 1 : std::ifstream file(enginePath, std::ios::binary);
45 1 : if (!file) {
46 1 : std::cerr << "[ERRO] Falha ao carregar o engine TensorRT!" << std::endl;
47 1 : exit(EXIT_FAILURE);
48 : }
49 :
50 0 : file.seekg(0, file.end);
51 0 : size_t size = file.tellg();
52 0 : file.seekg(0, file.beg);
53 0 : std::vector<char> engineData(size);
54 0 : file.read(engineData.data(), size);
55 :
56 0 : runtime = createInferRuntime(Logger::instance());
57 0 : engine = runtime->deserializeCudaEngine(engineData.data(), size);
58 0 : if (!engine) {
59 0 : std::cerr << "[ERRO] Falha ao desserializar o engine TensorRT!" << std::endl;
60 0 : exit(EXIT_FAILURE);
61 : }
62 :
63 0 : context = engine->createExecutionContext();
64 0 : }
65 :
66 0 : void YOLOv5TRT::allocateBuffers() {
67 0 : inputSize = calculateVolume(engine->getBindingDimensions(0)) * sizeof(float);
68 0 : outputSize = calculateVolume(engine->getBindingDimensions(1)) * sizeof(float);
69 :
70 0 : cudaMalloc(&inputDevice, inputSize);
71 0 : cudaMalloc(&outputDevice, outputSize);
72 :
73 0 : bindings.push_back(inputDevice);
74 0 : bindings.push_back(outputDevice);
75 :
76 0 : cudaStreamCreate(&stream);
77 0 : outputHost = new float[outputSize / sizeof(float)];
78 0 : }
79 :
80 : /**
81 : * @brief Executa inferência em uma imagem.
82 : * @param image Imagem de entrada (cv::Mat BGR).
83 : * @return Vetor de floats com a saída do modelo.
84 : */
85 0 : std::vector<float> YOLOv5TRT::infer(const cv::Mat& image) {
86 : // Usar GPU para processamento
87 0 : gpu_image.upload(image);
88 0 : cv::cuda::resize(gpu_image, gpu_resized, cv::Size(640, 640));
89 0 : gpu_resized.convertTo(gpu_float, CV_32FC3, 1.0/255.0);
90 :
91 : // Download otimizado
92 0 : gpu_float.download(blob);
93 0 : cv::split(blob, channels);
94 :
95 : // Cópia otimizada dos canais (HWC -> CHW)
96 0 : for (int c = 0; c < 3; c++) {
97 0 : memcpy(hostDataBuffer + c*640*640,
98 0 : channels[c].ptr<float>(),
99 : 640*640*sizeof(float));
100 : }
101 :
102 : // Copiar dados para GPU
103 0 : cudaMemcpyAsync(inputDevice, hostDataBuffer, 3*640*640*sizeof(float),
104 : cudaMemcpyHostToDevice, stream);
105 :
106 : // Executar inferência
107 : //context->enqueueV2(bindings.data(), stream, nullptr);
108 :
109 0 : context->executeV2(bindings.data());
110 :
111 : // Copiar resultados para o host
112 0 : cudaMemcpyAsync(outputHost, outputDevice, outputSize,
113 : cudaMemcpyDeviceToHost, stream);
114 0 : cudaStreamSynchronize(stream);
115 :
116 0 : return std::vector<float>(outputHost, outputHost + outputSize / sizeof(float));
117 : }
118 :
119 : /**
120 : * @brief Pós-processa a saída do modelo, aplicando threshold de confiança e NMS.
121 : * @param output Saída bruta do modelo.
122 : * @param num_classes Número de classes.
123 : * @param conf_thresh Threshold de confiança.
124 : * @param nms_thresh Threshold de NMS.
125 : * @return Vetor de detecções finais.
126 : */
127 0 : std::vector<Detection> YOLOv5TRT::postprocess(const std::vector<float>& output, int num_classes, float conf_thresh, float nms_thresh) {
128 0 : std::vector<Detection> dets;
129 0 : int num_preds = output.size() / (5 + num_classes);
130 :
131 0 : for (int i = 0; i < num_preds; ++i) {
132 0 : const float* pred = &output[i * (5 + num_classes)];
133 0 : float obj = pred[4];
134 0 : if (obj < conf_thresh) continue;
135 :
136 : // Encontrar a classe com maior probabilidade
137 0 : float max_cls = pred[5];
138 0 : int class_id = 0;
139 0 : for (int c = 1; c < num_classes; ++c) {
140 0 : if (pred[5 + c] > max_cls) {
141 0 : max_cls = pred[5 + c];
142 0 : class_id = c;
143 : }
144 : }
145 :
146 0 : float score = obj * max_cls;
147 0 : if (score < conf_thresh) continue;
148 :
149 0 : dets.push_back({pred[0], pred[1], pred[2], pred[3], score, class_id});
150 : }
151 :
152 : // NMS
153 0 : std::vector<Detection> result;
154 0 : std::sort(dets.begin(), dets.end(), [](const Detection& a, const Detection& b) {
155 0 : return a.conf > b.conf;
156 : });
157 :
158 0 : std::vector<bool> removed(dets.size(), false);
159 0 : for (size_t i = 0; i < dets.size(); ++i) {
160 0 : if (removed[i]) continue;
161 0 : result.push_back(dets[i]);
162 :
163 0 : for (size_t j = i + 1; j < dets.size(); ++j) {
164 0 : if (removed[j]) continue;
165 :
166 : // Calcular IoU
167 0 : float xx1 = std::max(dets[i].x - dets[i].w/2, dets[j].x - dets[j].w/2);
168 0 : float yy1 = std::max(dets[i].y - dets[i].h/2, dets[j].y - dets[j].h/2);
169 0 : float xx2 = std::min(dets[i].x + dets[i].w/2, dets[j].x + dets[j].w/2);
170 0 : float yy2 = std::min(dets[i].y + dets[i].h/2, dets[j].y + dets[j].h/2);
171 :
172 0 : float w = std::max(0.0f, xx2 - xx1);
173 0 : float h = std::max(0.0f, yy2 - yy1);
174 0 : float inter = w * h;
175 0 : float area1 = dets[i].w * dets[i].h;
176 0 : float area2 = dets[j].w * dets[j].h;
177 0 : float ovr = inter / (area1 + area2 - inter);
178 :
179 0 : if (ovr > nms_thresh) removed[j] = true;
180 : }
181 : }
182 0 : return result;
183 : }
184 :
185 : /**
186 : * @brief Função principal. Inicializa recursos, executa loop de inferência e exibe resultados.
187 : * @return 0 em caso de sucesso.
188 : */
189 0 : void YOLOv5TRT::process_image(const cv::Mat& frame) {
190 0 : auto output = infer(frame);
191 0 : std::vector<Detection> dets = postprocess(output, num_classes, conf_thresh, nms_thresh);
192 :
193 0 : for (const auto& det : dets) {
194 : // Converter coordenadas normalizadas para absolutas
195 : // Corrigir: tratar det.x, det.y, det.w, det.h como coordenadas absolutas (input 640x640)
196 0 : float x_center = (det.x / 640.0f) * frame.cols;
197 0 : float y_center = (det.y / 640.0f) * frame.rows;
198 0 : float width = (det.w / 640.0f) * frame.cols;
199 0 : float height = (det.h / 640.0f) * frame.rows;
200 :
201 : // Calcular coordenadas do retângulo
202 0 : int x1 = static_cast<int>(x_center - width / 2);
203 0 : int y1 = static_cast<int>(y_center - height / 2);
204 0 : int x2 = static_cast<int>(x_center + width / 2);
205 0 : int y2 = static_cast<int>(y_center + height / 2);
206 :
207 : // Garantir que as coordenadas estão dentro da imagem
208 0 : x1 = std::max(0, std::min(x1, frame.cols - 1));
209 0 : y1 = std::max(0, std::min(y1, frame.rows - 1));
210 0 : x2 = std::max(0, std::min(x2, frame.cols - 1));
211 0 : y2 = std::max(0, std::min(y2, frame.rows - 1));
212 :
213 : // Verificar se o retângulo é válido
214 0 : if (x2 > x1 && y2 > y1) {
215 0 : std::string className = labelManager.getLabel(det.class_id);
216 0 : std::cout << "Object found: " << className << " at (" << x1 << "," << y1 << ")-(" << x2 << "," << y2 << ")" << std::endl;
217 :
218 0 : auto now = std::chrono::steady_clock::now();
219 0 : auto elapsedMs = std::chrono::duration_cast<std::chrono::milliseconds>(now - lastNotificationTime).count();
220 :
221 0 : if (className != lastClassName || elapsedMs > 2000) { // Only notify again if different or 2s passed
222 0 : lastClassName = className;
223 0 : lastNotificationTime = now;
224 0 : Publisher::instance(5557)->publish("notification", className);
225 : }
226 :
227 : /* if (className != lastClassName)
228 : {
229 : lastClassName = className;
230 : std::lock_guard<std::mutex> lock(pubMutex);
231 : Publisher::instance(5557)->publish("notification", className);
232 : } */
233 : // Desenhar retângulo usando coordenadas Point
234 : /* cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 3);
235 :
236 : // Desenhar label
237 : std::string label = className + " (" + std::to_string(int(det.conf * 100)) + "%)";
238 :
239 : int baseline = 0;
240 : cv::Size textSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &baseline);
241 :
242 : // Fundo do texto
243 : cv::rectangle(frame,
244 : cv::Point(x1, y1 - textSize.height - 10),
245 : cv::Point(x1 + textSize.width, y1),
246 : cv::Scalar(0, 255, 0), -1);
247 :
248 : // Texto
249 : cv::putText(frame, label,
250 : cv::Point(x1, y1 - 5),
251 : cv::FONT_HERSHEY_SIMPLEX, 0.6,
252 : cv::Scalar(0, 0, 0), 2); */
253 : } /* else {
254 : std::cout << "Invalid rectangle coordinates for detection: "
255 : << "x1=" << x1 << ", y1=" << y1
256 : << ", x2=" << x2 << ", y2=" << y2
257 : << ", width=" << width << ", height=" << height
258 : << ", det.x=" << det.x << ", det.y=" << det.y
259 : << ", det.w=" << det.w << ", det.h=" << det.h
260 : << ", class_id=" << det.class_id << ", conf=" << det.conf << std::endl;
261 : } */
262 : }
263 0 : }
|