編集履歴一覧に戻る
airpocketのアイコン画像

airpocket が 2023年12月01日16時48分10秒 に編集

初版

タイトルの変更

+

うわさのYOLOをtfliteにしてPi5でもっと高速に動かす

タグの変更

+

AI

+

Bookworm

+

Pi5

+

YOLO

+

RaspberryPi

記事種類の変更

+

セットアップや使用方法

ライセンスの変更

+

(MIT) The MIT License

本文の変更

+

# はじめに この記事は、[うわさのYOLOをRaspberry Pi 5で試す](https://elchika.com/article/9a31a595-d454-4186-bc85-38f13a5155c2/)で使ったモデルを[onnxからtfliteに変換して](https://elchika.com/article/1d02ca02-0ed1-4450-b882-5570b2c8a3ae/)さらに高速に動かしてみます。 元ネタは@PINTO03091さんのこちらです。 @[twitter](https://twitter.com/PINTO03091/status/1729845948019327191) # 環境 Raspberry Pi 5 Bookworm 64bit desktop python 3.11.2 # 環境を作ります。 [うわさのYOLOをRaspberry Pi 5で試す](https://elchika.com/article/9a31a595-d454-4186-bc85-38f13a5155c2/)で作ったvenv環境にtfliteのruntimeを入れます。 まずvenvで作った仮想環境に入っておきます。(環境名をonnxにしてしまったのはご愛敬 ``` source onnx/bin/activate ``` ## tfliteのruntimeを入れる 適当な作業用フォルダを選んで[PINTOさん謹製tflite](https://github.com/PINTO0309/TensorflowLite-bin)を入れます。 ``` $ sudo apt install -y \ swig libjpeg-dev zlib1g-dev python3-dev \ unzip wget python3-pip curl git cmake make $ pip3 install numpy==1.24.3 $ TFVER=2.12.0 $ PYVER=311 $ ARCH=aarch64 $ pip3 install \ --no-cache-dir \ https://github.com/PINTO0309/TensorflowLite-bin/releases/download/v${TFVER}/tflite_runtime-${TFVER/-/}-cp${PYVER}-none-linux_${ARCH}.whl ``` 以上でtfliteが入りました。 ## tflite用のモデルを持ってくる tflite用のモデルもRaspberry Pi上で作りたかったのですが難しかったのでWinPC上でonnxからtfliteへ変換しました。 変換方法は[こちらの記事](https://elchika.com/article/1d02ca02-0ed1-4450-b882-5570b2c8a3ae/)をご覧ください。 tflite用のモデルへはいくつか変換したのですが、量子化モデルはうまく動かなかったので、以下の二つのモデルで動作確認をします。Raspberry Piの作業フォルダへこれらのモデルをコピーしてください。 gold_yolo_n_body_head_hand_post_0461_0.4428_1x3x128x160_float16.tflite gold_yolo_n_body_head_hand_post_0461_0.4428_1x3x128x160_float32.tflite # コード onnxのデモ用コードを修正してtflite用のdemo_goldyolo_tflite.pyとして使用します。 次の通り動かすと量子化モデル、4スレッド動作で最高5msecを切る速度です。爆速! ``` python demo_goldyolo_tflite.py -m gold_yolo_n_body_head_hand_post_0461_0.4428_1x3x128x160_integer_quant.tflite -th 4 ``` -m で使用モデル、-th でスレッド数(1~4)指定できます。 ``` #!/usr/bin/env python import copy import cv2 import time import numpy as np from argparse import ArgumentParser from typing import Tuple, Optional, List from tflite_runtime.interpreter import Interpreter import re class GoldYOLOONNX(object): def __init__( self, model_path :Optional[str] = 'gold_yolo_n_body_head_hand_post_0461_0.4428_1x3x128x160_float16.tflite', class_score_th :Optional[float] = 0.35, num_threads :Optional[int] = 4, providers :Optional[List] = [ ( 'TensorrtExecutionProvider', { 'trt_engine_cache_enable': True, 'trt_engine_cache_path': '.', 'trt_fp16_enable': True, } ), 'CUDAExecutionProvider', 'CPUExecutionProvider', ], ): """GoldYOLOONNX Parameters ---------- model_path: Optional[str] ONNX file path for GoldYOLO class_score_th: Optional[float] Score threshold. Default: 0.35 providers: Optional[List] Name of onnx execution providers Default: [ ( 'TensorrtExecutionProvider', { 'trt_engine_cache_enable': True, 'trt_engine_cache_path': '.', 'trt_fp16_enable': True, } ), 'CUDAExecutionProvider', 'CPUExecutionProvider', ] """ self.interpreter = Interpreter(model_path=model_path, num_threads=num_threads) self.interpreter.allocate_tensors() # Threshold self.class_score_th = class_score_th # Model loading self.input_details = self.interpreter.get_input_details() match = re.search(r"'(.*?)'", str(self.input_details[0]["dtype"])) self.str_dtype = match.group(1)[6:] self.input_shapes = self.input_details[0]["shape"] def __call__( self, image: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray]: """YOLOv7ONNX Parameters ---------- image: np.ndarray Entire image Returns ------- boxes: np.ndarray Predicted boxes: [N, x1, y1, x2, y2] scores: np.ndarray Predicted box scores: [N, score] """ temp_image = copy.deepcopy(image) # PreProcess resized_image = self.__preprocess( temp_image, ) # Inference inference_image = np.asarray([resized_image], dtype=np.dtype(self.str_dtype)) #float32 self.interpreter.set_tensor(self.input_details[0]['index'], inference_image) self.interpreter.invoke() output_details = self.interpreter.get_output_details() boxes = self.interpreter.get_tensor(output_details[0]['index']) #print(boxes) # PostProcess result_boxes, result_scores = \ self.__postprocess( image=temp_image, boxes=boxes, ) return result_boxes, result_scores def __preprocess( self, image: np.ndarray, swap: Optional[Tuple[int,int,int]] = (0,1,2), ) -> np.ndarray: """__preprocess Parameters ---------- image: np.ndarray Entire image swap: tuple HWC to CHW: (2,0,1)* CHW to HWC: (1,2,0) HWC to HWC: (0,1,2) CHW to CHW: (0,1,2) Returns ------- resized_image: np.ndarray Resized and normalized image. """ # Normalization + BGR->RGB resized_image = cv2.resize( image, ( int(self.input_shapes[2]), int(self.input_shapes[1]), ) ) resized_image = np.divide(resized_image, 255.0) resized_image = resized_image[..., ::-1] resized_image = resized_image.transpose(swap) resized_image = np.ascontiguousarray( resized_image, dtype=np.float32, ) return resized_image def __postprocess( self, image: np.ndarray, boxes: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray]: """__postprocess Parameters ---------- image: np.ndarray Entire image. boxes: np.ndarray float32[N, 7] Returns ------- result_boxes: np.ndarray Predicted boxes: [N, x1, y1, x2, y2] result_scores: np.ndarray Predicted box confs: [N, score] """ image_height = image.shape[0] image_width = image.shape[1] """ Detector is N -> Number of boxes detected batchno -> always 0: BatchNo.0 batchno_classid_x1y1x2y2_score: float32[N,7] """ result_boxes = [] result_scores = [] if len(boxes) > 0: scores = boxes[:, 6:7] keep_idxs = scores[:, 0] > self.class_score_th scores_keep = scores[keep_idxs, :] boxes_keep = boxes[keep_idxs, :] if len(boxes_keep) > 0: for box, score in zip(boxes_keep, scores_keep): class_id = int(box[1]) x_min = int(max(box[2], 0) * image_width / self.input_shapes[2]) y_min = int(max(box[3], 0) * image_height / self.input_shapes[1]) x_max = int(min(box[4], self.input_shapes[2]) * image_width / self.input_shapes[2]) y_max = int(min(box[5], self.input_shapes[1]) * image_height / self.input_shapes[1]) result_boxes.append( [x_min, y_min, x_max, y_max, class_id] ) result_scores.append( score ) return np.asarray(result_boxes), np.asarray(result_scores) def is_parsable_to_int(s): try: int(s) return True except ValueError: return False def main(): parser = ArgumentParser() parser.add_argument( '-th', '--num_threads', type=int, default=4, ) parser.add_argument( '-m', '--model', type=str, default='gold_yolo_n_body_head_hand_post_0461_0.4428_1x3x128x160_float16.tflite', ) parser.add_argument( '-v', '--video', type=str, default="0", ) args = parser.parse_args() model = GoldYOLOONNX( model_path=args.model, num_threads=args.num_threads, ) cap = cv2.VideoCapture( int(args.video) if is_parsable_to_int(args.video) else args.video ) cap_fps = cap.get(cv2.CAP_PROP_FPS) w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') video_writer = cv2.VideoWriter( filename='output.mp4', fourcc=fourcc, fps=cap_fps, frameSize=(w, h), ) while cap.isOpened(): res, image = cap.read() if not res: break debug_image = copy.deepcopy(image) start_time = time.perf_counter() boxes, scores = model(debug_image) elapsed_time = time.perf_counter() - start_time cv2.putText( debug_image, f'{elapsed_time*1000:.2f} ms', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA, ) cv2.putText( debug_image, f'{elapsed_time*1000:.2f} ms', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA, ) for box, score in zip(boxes, scores): classid: int = box[4] color = (255,255,255) if classid == 0: color = (255,0,0) elif classid == 1: color = (0,0,255) elif classid == 2: color = (0,255,0) cv2.rectangle( debug_image, (box[0], box[1]), (box[2], box[3]), (255,255,255), 2, ) cv2.rectangle( debug_image, (box[0], box[1]), (box[2], box[3]), color, 1, ) cv2.putText( debug_image, f'{score[0]:.2f}', ( box[0] if box[0]+50 < debug_image.shape[1] else debug_image.shape[1]-50, box[1]-10 if box[1]-25 > 0 else 20 ), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA, ) cv2.putText( debug_image, f'{score[0]:.2f}', ( box[0] if box[0]+50 < debug_image.shape[1] else debug_image.shape[1]-50, box[1]-10 if box[1]-25 > 0 else 20 ), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 1, cv2.LINE_AA, ) key = cv2.waitKey(1) if key == 27: # ESC break cv2.imshow("test", debug_image) video_writer.write(debug_image) if video_writer: video_writer.release() if cap: cap.release() if __name__ == "__main__": main() ```