【SPRESENSE】Spresenseで矩形検出する【画像処理】

Spresenseのカメラ画像をAIで処理する事例は豊富にあるが、枯れた技術を使ってマーカー検出している事例が見られなかったのでやってみた。

画像取得
　グレースケール化容易なCAM_IMAGE_PIX_FMT_YUV422フォーマットがおすすめ。
グレースケール化
　YUV422は1pixel当たり2byteの情報量を持っており、その2byte中のビッグエンディアンで左から3bit~11bit目にかけてを取り出すと8bit分解能の輝度データが抽出できる。
二値化
　輝度データが設定した閾値以下で0、以上で1に変換する。
ラベリング
　ラベリングは、まとまった領域を数える的な作業のとこ。輝度が1のとあるpixelの上下左右に、同じく輝度が1のpixelがあるか調べていき、隣り合うpixelの輝度が1のグループを作っていく感じの処理を行う。「C言語による画像処理プログラミング入門」という緑と黒の表紙の怪しい書籍を参考にした。サンプルプログラムを使用して解説しているので非常に分かりやすかった。
頂点検出
　頂点検出は各領域の水平垂直の最大最小値の４点と画像を45度回転させたときの４点を比較し、対角位置の頂点距離が大きいほうを選択するようにした。
矩形判定
　矩形判別には縦横比、充填率、面積を閾値に設定し、範囲外の領域は除外して最終的に残った領域が矩形であるというような方針をとっている。

Spresense本体用ソースコードをSpresenseに書き込み、描画＆動画保存用ソースコードをpythonで実行すると下記のような動画が得られる。
　ホワイトボードに貼り付けた黒い矩形を映したが、背景がマスクされ矩形のみがグレーで描画されていることが確認できる。また矩形頂点が黒く描画されており頂点が正しく検出できていることも確認できる。

ここにPOSTが表示されます

　動画のフレームレートはシリアル通信の速度がボトルネックで、頑張っても10fpsで安定して動かすには5fpsくらいが限界のようだった。映像伝送しなければ画像処理自体は速く動くのでロボット制御には十分使えると考えられる。


cam_test.ino
#include <SDHCI.h>
#include <stdio.h>
#include <Camera.h>

#define BAUDRATE                (2000000)

#define image_h 96
#define image_v 64
uint16_t image_size = 0;
double th_aspe = 3.0;
int th_area = 10;
int th_lmin = 5;
double th_fill_err = 0.2;
float rangle45deg = 0.70710678;
float image_rot45table[image_h][image_v][2];

uint8_t image_data[image_h * image_v];
uint8_t image[image_h][image_v];
uint8_t hist[256];

uint8_t* image_buf;
SDClass  theSD;

float dt = 0.2;
float c_time = 0.0;

bool f_timer = 0;
bool f_cam = 0;
int posc[4][2];

int label[image_h][image_v];


void setup() {
  // put your setup code here, to run once:
  Serial.begin(BAUDRATE);
  while (!Serial)
  {
    ;
  }
  /* Initialize SD */
  //  while (!theSD.begin())
  //  {
  //    /* wait until SD card is mounted. */
  //    Serial.println("Insert SD card.");
  //  }

  Serial.println("Prepare camera");
  theCamera.begin(1,
                  CAM_VIDEO_FPS_120,
                  image_h,
                  image_v,
                  CAM_IMAGE_PIX_FMT_YUV422 );

  theCamera.startStreaming(true, CamCB);
  affin_rot45();

  attachTimerInterrupt(timer_1, uint64_t(dt * 1000000));
  posc[0][0] = 0;
  posc[0][1] = 0;

  posc[1][0] = 0;
  posc[1][1] = 0;

  posc[2][0] = 0;
  posc[2][1] = 0;

  posc[3][0] = 0;
  posc[3][1] = 0;
}

void CamCB(CamImage img)
{
  if (img.isAvailable())
  {
    image_buf = img.getImgBuff();
    image_size = img.getImgSize();

    f_cam = 1;
  }
  else
  {

  }
}

uint64_t timer_1() {//
  c_time = c_time + dt;
  if (c_time > 100.0) {
    c_time = 0.0;
  }
  f_timer = 1;
  return uint64_t(dt * 1000000);
}

void loop() {
  if (f_timer == 1 && f_cam == 1) {

    gray_scale(image_buf, image_size);
    binarize(80);
    image_copy();
    filterling_using_shape();
    image_copy2();

    int i, j, k;
    int adder = 0;
    byte s_uchMsg[6];
    s_uchMsg[adder] = 0xa5;
    adder = adder + 1;
    s_uchMsg[adder] = 0x5a;
    adder = adder + 1;

    s_uchMsg[adder] = (byte)(image_h >> 8 & 0x00ff);
    adder = adder + 1;
    s_uchMsg[adder] = (byte)(image_h & 0x00ff);
    adder = adder + 1;

    s_uchMsg[adder] = (byte)(image_v >> 8 & 0x00ff);
    adder = adder + 1;
    s_uchMsg[adder] = (byte)(image_v & 0x00ff);
    adder = adder + 1;

    Serial.write(s_uchMsg, sizeof(s_uchMsg));
    Serial.write(image_data, sizeof(image_data));

    adder = 0;
    byte e_uchMsg[2];
    e_uchMsg[adder] = 0x0d;
    adder = adder + 1;
    e_uchMsg[adder] = 0x0a;
    adder = adder + 1;
    Serial.write(e_uchMsg, sizeof(e_uchMsg));

    f_timer = 0;
    f_cam = 0;
  }

}

void gray_scale(uint8_t* img, uint16_t img_size)
{
  int i, j, k;
  uint8_t lb = 0;
  uint8_t hb = 0;
  word tmp = 0;
  uint8_t ypix = 0;
  for (i = 0; i < sizeof(image_data); i++) {
    lb = img[i * 2];
    hb = img[i * 2 + 1];
    tmp = ((hb << 8) | lb);
    ypix = ((tmp >> 11) & 0b11111) << 3;
    image_data[i] = ypix;
  }
}

void binarize(uint8_t t)
{
  int i;
  for (i = 0; i < sizeof(image_data); i++) {
    if (image_data[i] > t) {
      image_data[i] = 255;
    } else {
      image_data[i] = 0;
    }
  }
}

void image_copy()
{
  int x, y, cnt;
  cnt = 0;
  for (y = 0; y < image_v; y++) {
    for (x = 0; x < image_h; x++) {
      image[x][y] = image_data[cnt];
      cnt = cnt + 1;
    }
  }
}
void image_copy2()
{
  int x, y, cnt;
  cnt = 0;
  for (y = 0; y < image_v; y++) {
    for (x = 0; x < image_h; x++) {
      image_data[cnt] = image[x][y];
      cnt = cnt + 1;
    }
  }
}
void filterling_using_shape()
{
  int i, j, k, x, y, number;
  int xmin, xmax, ymin, ymax, area;
  float xminr, xmaxr, yminr, ymaxr;
  int pos0[4][2], pos1[4][2], pos2[4][2];
  double w, h, ratio, fill_rate;
  double w2, h2;

  number = labeling();

  if ( number > 0 ) {
    for (i = 1; i <= number; i++) {
      xmin = image_h - 1;  ymin = image_v - 1;
      xmax = 0;           ymax = 0;

      xminr = image_rot45table[image_h - 1][0][0];  yminr = image_rot45table[image_h - 1][image_v - 1][1];
      xmaxr = image_rot45table[0][image_v - 1][0];  ymaxr = image_rot45table[0][0][1];

      for (j = 0; j < 4; j++) {
        for (k = 0; k < 2; k++) {
          pos0[j][k] = 0;
          pos1[j][k] = 0;
          pos2[j][k] = 0;
        }
      }

      for (y = 0; y < image_v; y++)
        for (x = 0; x < image_h; x++)
          if (label[x][y] == i) {
            if (x < xmin) {
              xmin = x;
              pos1[0][0] = x;
              pos1[0][1] = y;
            }
            if (x > xmax) {
              xmax = x;
              pos1[1][0] = x;
              pos1[1][1] = y;
            }
            if (y < ymin) {
              ymin = y;
              pos1[2][0] = x;
              pos1[2][1] = y;
            }
            if (y > ymax) {
              ymax = y;
              pos1[3][0] = x;
              pos1[3][1] = y;
            }

            if (image_rot45table[x][y][0] < xminr) {
              xminr = image_rot45table[x][y][0];
              pos2[0][0] = x;
              pos2[0][1] = y;
            }
            if (image_rot45table[x][y][0] > xmaxr) {
              xmaxr = image_rot45table[x][y][0];
              pos2[1][0] = x;
              pos2[1][1] = y;
            }
            if (image_rot45table[x][y][1] < yminr) {
              yminr = image_rot45table[x][y][1];
              pos2[2][0] = x;
              pos2[2][1] = y;
            }
            if (image_rot45table[x][y][1] > ymaxr) {
              ymaxr = image_rot45table[x][y][1];
              pos2[3][0] = x;
              pos2[3][1] = y;
            }
          }
      w = xmax - xmin + 1.0;
      h = ymax - ymin + 1.0;
      w2 = xmaxr - xminr + 1.0;
      h2 = ymaxr - yminr + 1.0;
      if ( w < h ) {
        ratio = h / w;
        if ( w2 < h2 ) {
          if ( h2 > h ) {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos2[j][k];
              }
            }
          } else {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos1[j][k];
              }
            }
          }
        } else {
          if ( w2 > h ) {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos2[j][k];
              }
            }
          } else {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos1[j][k];
              }
            }
          }
        }
      } else {
        ratio = w / h;
        if ( w2 < h2 ) {
          if ( h2 > w ) {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos2[j][k];
              }
            }
          } else {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos1[j][k];
              }
            }
          }
        } else {
          if ( w2 > w ) {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos2[j][k];
              }
            }
          } else {
            for (j = 0; j < 4; j++) {
              for (k = 0; k < 2; k++) {
                pos0[j][k] = pos1[j][k];
              }
            }
          }
        }
      }
      area = 0;
      for (y = 0; y < image_v; y++) {
        for (x = 0; x < image_h; x++) {
          if (label[x][y] == i) {
            area++;
          }
        }
      }
      float area_p = 0.0;
      area_p = 0.5 * (pos0[0][0] - pos0[2][0]) * (pos0[0][1] + pos0[2][1]);
      area_p = area_p + 0.5 * (pos0[2][0] - pos0[1][0]) * (pos0[2][1] + pos0[1][1]);
      area_p = area_p + 0.5 * (pos0[1][0] - pos0[3][0]) * (pos0[1][1] + pos0[3][1]);
      area_p = area_p + 0.5 * (pos0[3][0] - pos0[0][0]) * (pos0[3][1] + pos0[0][1]);
      area_p = abs(area_p);

      fill_rate = area / area_p;

      int lmin = image_h * image_h;
      for (j = 0; j < 4; j++) {
        for (k = 0; k < 4; k++) {
          int lsqrt = (pos0[j][0] - pos0[k][0]) * (pos0[j][0] - pos0[k][0]) + (pos0[j][1] - pos0[k][1]) * (pos0[j][1] - pos0[k][1]);
          if (j != k) {
            if (lsqrt < lmin) {
              lmin = lsqrt;
            }
          }
        }
      }
      lmin = sqrt(lmin);

      if ( ratio > th_aspe || area <= th_area || abs(1.0 - fill_rate) > th_fill_err || lmin < th_lmin) {
        for (y = 0; y < image_v; y++) {
          for (x = 0; x < image_h; x++) {
            if (label[x][y] == i ) {
              image[x][y] = 255;//白く描画
            }
          }
        }
      } else {
        for (y = 0; y < image_v; y++) {
          for (x = 0; x < image_h; x++) {
            if (label[x][y] == i ) {
              image[x][y] = 100;//グレーで描画
            }
          }
        }
        //矩形頂点は黒く描画
        image[pos0[0][0]][pos0[0][1]] = 0;
        image[pos0[1][0]][pos0[1][1]] = 0;
        image[pos0[2][0]][pos0[2][1]] = 0;
        image[pos0[3][0]][pos0[3][1]] = 0;

        posc[0][0] = pos0[0][0];
        posc[0][1] = pos0[0][1];

        posc[1][0] = pos0[1][0];
        posc[1][1] = pos0[1][1];

        posc[2][0] = pos0[2][0];
        posc[2][1] = pos0[2][1];

        posc[3][0] = pos0[3][0];
        posc[3][1] = pos0[3][1];

        int tmpx;
        int tmpy;
        for (j = 0; j < 4; ++j) {
          for (k = j + 1; k < 4; ++k) {
            if (posc[j][0] > posc[k][0]) {
              tmpx =  posc[j][0];
              tmpy =  posc[j][1];
              posc[j][0] = posc[k][0];
              posc[j][1] = posc[k][1];
              posc[k][0] = tmpx;
              posc[k][1] = tmpy;
            }
          }
        }

        if (posc[0][1] > posc[1][1]) {
          tmpx =  posc[1][0];
          tmpy =  posc[1][1];
          posc[1][0] = posc[0][0];
          posc[1][1] = posc[0][1];
          posc[0][0] = tmpx;
          posc[0][1] = tmpy;
        }
        if (posc[3][1] > posc[2][1]) {
          tmpx =  posc[2][0];
          tmpy =  posc[2][1];
          posc[2][0] = posc[3][0];
          posc[2][1] = posc[3][1];
          posc[3][0] = tmpx;
          posc[3][1] = tmpy;
        }
      }
    }
  }
}

void affin_rot45() {
  int x, y;
  for (y = 0; y < image_v; y++) {
    for (x = 0; x < image_h; x++) {
      image_rot45table[x][y][0] = (float)x * rangle45deg - (float)y * rangle45deg;
      image_rot45table[x][y][1] = (float)x * rangle45deg + (float)y * rangle45deg;
    }
  }
}

void modify_label( int num1, int num2)
{
  int x, y;

  for (y = 0; y < image_v; y++)
    for (x = 0; x < image_h; x++)
      if ( label[x][y] == num1) label[x][y] = num2;
}

int search_4neighbors( int x, int y )
{
  int max = 0;
  if (y - 1 >= 0 && label[x][y - 1] > max )        max = label[x][y - 1];
  if (x - 1 >= 0 && label[x - 1][y] > max )        max = label[x - 1][y];
  if (y + 1 < image_v && label[x][y + 1] > max ) max = label[x][y + 1];
  if (x + 1 < image_h  && label[x + 1][y] > max ) max = label[x + 1][y];
  return max;
}

int labeling()
{
  int x, y, num;
  int count = 0;
  int new_count;
  for (y = 0; y < image_v; y++)
    for (x = 0; x < image_h; x++)
      label[x][y] = 0;
  for (y = 0; y < image_v; y++)
    for (x = 0; x < image_h; x++) {
      if ( image[x][y] == 0 && label[x][y] == 0 ) {
        num = search_4neighbors(x, y);
        if (num == 0)
          label[x][y] = ++count;
        else label[x][y] = num;
      }
    }
  if ( count > 0 ) {
    for (y = 0; y < image_v; y++)
      for (x = 0; x < image_h; x++)
        if (label[x][y] != 0) {
          num = search_4neighbors(x, y);
          if ( num > label[x][y]) {
            modify_label(num, label[x][y]);
          }
        }

    new_count = 0;
    for (y = 0; y < image_v; y++)
      for (x = 0; x < image_h; x++)
        if ( label[x][y] > new_count ) {
          new_count++;
          modify_label(label[x][y], new_count);
        }
    return new_count;
  } else return 0;
}


main.py
import serial
import datetime
import numpy as np
import time
import cv2
from PIL import Image
from matplotlib import pyplot as plt
con=serial.Serial('COM6',2000000)

print(con.portstr)

data_buf = []
img_h = 96
img_v = 64
readlen = img_h*img_v+8
data_img = np.zeros([img_v,img_h,3],dtype=np.uint8)

frame_rate = 5 # フレームレート
size = (img_h, img_v) # 動画の画面サイズ
fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') 
codec = cv2.VideoWriter_fourcc(*'mp4v')
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
writer = cv2.VideoWriter('output.mp4', fmt, frame_rate, size)
counter = 0
fig_num = 0
def main():
    

    while True:
        serial_read()
        if counter > 100:
            break
    con.close()
    writer.release()

def serial_read():
    global con
    global data_buf,img_h,img_v
    global writer,counter,fig_num
    data = con.readline()
    if len(data) > 0:
        for i in range(len(data)):
            data_buf.append(data[i])
        for i in range(len(data_buf)):
            if len(data_buf) < readlen:
                break
            addr = 0
            if data_buf[i + addr] == 0xa5:
                addr = addr + 1
                if data_buf[i + addr] == 0x5a:

                    addr = addr + 1
                    low_byte = data_buf[i + addr]
                    addr = addr + 1
                    high_byte = data_buf[i + addr]
                    img_h = int.from_bytes((low_byte ,high_byte), byteorder='big')

                    addr = addr + 1
                    low_byte = data_buf[i + addr]
                    addr = addr + 1
                    high_byte = data_buf[i + addr]
                    img_v = int.from_bytes((low_byte ,high_byte), byteorder='big')

                    print([img_h,img_v])
                    data_img = np.zeros([img_v,img_h,3],dtype=np.uint8)

                
                    for ita in range(img_v*img_h):
                        addr = addr + 1
                        ushTmp = data_buf[i + addr]
                        data_img[int((ita - ita%img_h)/img_h),ita%img_h,0] = np.uint8(ushTmp)
                        data_img[int((ita - ita%img_h)/img_h),ita%img_h,1] = np.uint8(ushTmp)
                        data_img[int((ita - ita%img_h)/img_h),ita%img_h,2] = np.uint8(ushTmp)
                    addr = addr + 1
                    if data_buf[i + addr] == 0x0d:
                        addr = addr + 1
                        if data_buf[i + addr] == 0x0a:
                            for j in range(i + addr + 1):
                                data_buf.pop(0)
                            print(type(data_img))
                            fig_num = fig_num + 1
                            img_show = data_img
                            cv2.imshow('frame',img_show)
                            writer.write(img_show)
                            cv2.waitKey(1)
                            counter = counter + 1
                            break   

if __name__ == '__main__':
    main()