Tensorflow版Faster RCNN源码解析（TFFRCNN）（2） test.py（不使用RPN时）（含ndarray数组复杂的切片操作等）...-白红宇

Tensorflow版Faster RCNN源码解析（TFFRCNN）（2） test.py（不使用RPN时）（含ndarray数组复杂的切片操作等）...

阅读量：5153 次

发布时间：2019-06-13

本文共 13921 字，大约阅读时间需要 46 分钟。

本blog为github上版源码解析系列代码笔记

---------------个人学习笔记---------------

----------------本文作者吴疆--------------

------------

1.当不通过RPN获得roi需要做如下修改：

demo.py中调用demo()时需传入boxes参数

调用im_detect()时需传入boxes参数，否则boxes默认为None

cfg.TEST.HAS_RPN改为False

_get_blobs(im, boxes)中_get_rois_blob(rois,cfg.TEST.SCALES_BASE)的cfg.TEST.SCALES_BASE改为im_scale_factors

修改VGGnet_test.py网络文件：self.im_info改为self.rois，其占位符shape改为[None, 5]，将self.layers字典中‘im_info’均改为‘rois’，删除网络与RPN相关的层。

import tensorflow as tffrom .network import Networkfrom ..fast_rcnn.config import cfgclass VGGnet_test(Network):    def __init__(self, trainable=True):        self.inputs = []        self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])        self.rois = tf.placeholder(tf.float32, shape=[None, 5])        self.keep_prob = tf.placeholder(tf.float32)        self.layers = dict({
    'data': self.data, 'rois': self.rois})        self.trainable = trainable        self.setup()    def setup(self):        # n_classes = 21        n_classes = cfg.NCLASSES        # anchor_scales = [8, 16, 32]        anchor_scales = cfg.ANCHOR_SCALES        _feat_stride = [16, ]        (self.feed('data')         .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False)         .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False)         .max_pool(2, 2, 2, 2, padding='VALID', name='pool1')         .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False)         .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False)         .max_pool(2, 2, 2, 2, padding='VALID', name='pool2')         .conv(3, 3, 256, 1, 1, name='conv3_1')         .conv(3, 3, 256, 1, 1, name='conv3_2')         .conv(3, 3, 256, 1, 1, name='conv3_3')         .max_pool(2, 2, 2, 2, padding='VALID', name='pool3')         .conv(3, 3, 512, 1, 1, name='conv4_1')         .conv(3, 3, 512, 1, 1, name='conv4_2')         .conv(3, 3, 512, 1, 1, name='conv4_3')         .max_pool(2, 2, 2, 2, padding='VALID', name='pool4')         .conv(3, 3, 512, 1, 1, name='conv5_1')         .conv(3, 3, 512, 1, 1, name='conv5_2')         .conv(3, 3, 512, 1, 1, name='conv5_3'))        # (self.feed('conv5_3')        #  .conv(3, 3, 512, 1, 1, name='rpn_conv/3x3')        #  .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score'))        #        # (self.feed('rpn_conv/3x3')        #  .conv(1, 1, len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu=False, name='rpn_bbox_pred'))        #        # #  shape is (1, H, W, Ax2) -> (1, H, WxA, 2)        # (self.feed('rpn_cls_score')        #  .spatial_reshape_layer(2, name='rpn_cls_score_reshape')        #  .spatial_softmax(name='rpn_cls_prob'))        #        # # shape is (1, H, WxA, 2) -> (1, H, W, Ax2)        # (self.feed('rpn_cls_prob')        #  .spatial_reshape_layer(len(anchor_scales) * 3 * 2, name='rpn_cls_prob_reshape'))        #        # (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info')        #  .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois'))        (self.feed('conv5_3', 'rois')         .roi_pool(7, 7, 1.0 / 16, name='pool_5')         .fc(4096, name='fc6')         .fc(4096, name='fc7')         .fc(n_classes, relu=False, name='cls_score')         .softmax(name='cls_prob'))        (self.feed('fc7')         .fc(n_classes * 4, relu=False, name='bbox_pred'))

View Code

2.test.py中im_detect(sess, net, im, boxes=None)调用函数执行顺序（其中，形参im应为BGR顺序）

def im_detect(sess, net, im, boxes=None):    """Detect object classes in an image given object proposals.    Arguments:        net (caffe.Net): Fast R-CNN network to use        im (ndarray): color image to test (in BGR order)        boxes (ndarray): R x 4 array of object proposals    Returns:        scores (ndarray): R x K array of object class scores (K includes            background as object category 0)        boxes (ndarray): R x (4*K) array of predicted bounding boxes    """

_get_blobs(im, boxes)获取blobs数据字典（含data、rois字段，data与rois均经过缩放）与图像缩放因子数组im_scales（默认不使用图像金字塔，则数组仅含一个元素），其中，还对所有的blobs['rois']进行去重（判断映射到主干网络产生的feature map相同位置的），更新blobs['rois']（按缩放因子缩放了的，R*5含第1维全0 level）和boxes(原始传入的boxes,未被缩放，R*4)

--->forward pass前向传播，构造feed_dict字典（含net.data即blobs['data']、net.rois即blobs['rois']、net.keep_prob为1.0，与VGGnet_test.py的输入对应）并通过运行sess传入

--->通过调用（network.py中）net.get_output()得到cls_score(cfg.TEST.SVM为True时的得分，即不经过softmax)、cls_prob(默认cfg.TEST.SVM为False，经过softmax处理的最终得分)、bbox_pred坐标补偿量、rois（维度分别为R2*n_classes、R2*n_classes、R2*(n_classes*4)、R2*5（R2 <= R），对应于VGGnet_test.py相应层输出）

--->默认cfg.TEST.BBOX_REG为True调用bbox_transform_inv(boxes,box_deltas)（bbox_transform.py中）回归boxes得到pred_boxes

--->调用_clip_boxes(pred_boxes,im.shape)将越界的（回归后的boxes）pred_boxes改为图像边界得到更新的pred_boxes

--->更新scores和pred_boxes并返回，此时score维度由R2*n_classes变回R*n_classes，pred_boxes维度也由R2*(n_classes*4)变为R*(n_classes*4)（R为最初的boxes数量）即说明对于映射到feature map相同位置的原始boxes，其最终回归为拥有相同score的同一个box。说明返回的scores和pred_boxes中有重复的元素（且仍然为最初boxes的排序，具体可参考np.unique()函数）（未明白为何这样设计，只统计unique的感觉更好）

# When mapping from image ROIs to feature map ROIs, there's some aliasing    # (some distinct image ROIs get mapped to the same feature ROI).    # Here, we identify duplicate feature ROIs, so we only compute features    # on the unique subset.    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:   #1/16          v = np.array([1, 1e3, 1e6, 1e9, 1e12])        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)        _, index, inv_index = np.unique(hashes, return_index=True,                                        return_inverse=True)        blobs['rois'] = blobs['rois'][index, :]        boxes = boxes[index, :]

cls_score, cls_prob, bbox_pred, rois = \        sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois')],\                 feed_dict=feed_dict)

if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:        # Map scores and predictions back to the original set of boxes        scores = scores[inv_index, :]        pred_boxes = pred_boxes[inv_index, :]

# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as nphashes = np.array([     [4],     [1],     [6],     [7],     [1]    ])_, index, inv_index = np.unique(hashes, return_index=True,                                return_inverse=True)print(inv_index)# _          [1 4 6 7]     
      
       # index      [1 0 2 3]     
       
        # inv_index  [1 0 2 3 0]

View Code

_为去重并排序后的数组、index为_中元素在原数组中第一次出现时的索引构成的数组、inv_index为原数组元素在_中的对应索引构成的数组。

3._get_blobs(im, rois)函数的执行逻辑(rois即为传入的boxes)

调用_get_image_blob(im)获取blobs['data']（[None, None, None,3]，与VGGnet_test.py中data占位符对应）和im_scale_factors缩放因子数组（即im_scales）,调用_get_rois_blob(rois,im_scale_factors)得到blobs['rois']（R*5）返回blobs和im_scale_factors

def _get_image_blob(im):    """Converts an image into a network input.    Arguments:        im (ndarray): a color image in BGR order    Returns:        blob (ndarray): a data blob holding an image pyramid        im_scale_factors (list): list of image scales (relative to im) used            in the image pyramid  # 此处注释有误，应为ndarray而非list    """

im--->减去像素均值操作--->判断以长边（1000）/短边（600）缩放--->双线性差值对图像进行缩放im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,interpolation=cv2.INTER_LINEAR)--->

blob=im_list_to_blob(processed_ims)（blob.py中）将（图像金字塔处理后得到的）图像列表转化为blob数据（即blobs['data']）返回blob和np.array(im_scale_factors)缩放因子数组（不使用图像金字塔时processed_ims和im_scale_factors均只含一个元素）

def _get_rois_blob(im_rois, im_scale_factors):    """Converts RoIs into network inputs.    Arguments:        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates        im_scale_factors (list): scale factors as returned by _get_image_blob   # 此处注释有误，应为ndarray而非list    Returns:        blob (ndarray): R x 5 matrix of RoIs in the image pyramid    """

调用_project_im_rois(im_rois, im_scale_factors)得到rois和levels，拼接rois和levels构成rois_blob(即data['rois'],shape为R*5，5=1level+4坐标，默认不使用图像金字塔level均为0)

def _project_im_rois(im_rois, scales):    """Project image RoIs into the image pyramid built by _get_image_blob.    Arguments:        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates        scales (list): scale factors as returned by _get_image_blob  # 此处注释有误，应为ndarray而非list    Returns:        rois (ndarray): R x 4 matrix of projected RoI coordinates        levels (list): image pyramid levels used by each projected RoI  # 此处注释有误，应为ndarray而非list    """

rois = im_rois * scales[levels] rois按缩放因子进行缩放返回rois和levels

# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as npim_rois = np.array([    [1, 2, 4, 5],    [2, 3, 7, 8]])scale = np.array([0.6])levels = np.array([    [0],    [0]])"""[[0.6] [0.6]]"""print(scale[levels])rois = im_rois * scale[levels]"""[[0.6 1.2 2.4 3. ] [1.2 1.8 4.2 4.8]]"""print(rois)

View Code

4.test_net(sess, net, imdb, weight_filename, max_per_image = 300即top-300个proposal, thresh = 0.05即得分阈值, vis = False)的执行逻辑（被test_net.py调用）

def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, vis=False): #被test_net.py调用    #weights_filename不包含后缀的模型文件名，类似于vggnet...70000    """Test a Fast R-CNN network on an image database."""

构造all_boxes列表，其中all_boxes[cls_ind类别索引][im_id图像索引]为该类的R*5的检测结果det，即R*（x1,y1,x2,y2,score） ( 其中图像共num_images张，num_images=len(imdb.image_index) )

--->调用get_output_dir(imdb,weights_filename)(config.py中)得到输出对应数据集测试结果pkl的部分绝对路径（需+detection.pkl）

--->构造计时器字典_t(含im_detect和misc字段，分别创建了一个Timer对象)

--->(不使用RPN时)roidb=imdb.roidb(维度和内容不明，imdb类)--->得到测试结果pkl文件全路径det_file（如output/faster_rcnn_voc_vgg/voc_2007_test/VGGnet_fast_rcnn_iter_6020/detections.pkl）

--->i循环遍历每一张图像 bbox_proposal = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0](维度和内容不明,使得含义不明)剔除为gt的rois，只评估非roi的proposal的效果（因为roi有可能来源于training or val split）

--->读取图像im--->调用im_detect(sess, net, im, box_proposal)返回scores和boxes,前后时间戳相减得到耗时detect_time

--->对于每张图像，从1~imdb.num_classes j循环遍历各个类别（0为背景），取出该类score得分超过thresh阈值的boxes拼接成cls_det（None,5）,类内执行NMS(nms是C编译的.so),更新cls_det，将cls_det填入 all_boxes对应位置all_boxes[j][i]

--->每张图像最多保留包含所有类别的max_per_image(默认为300，即top-300)个得分最高的boxes，更新all_boxes[j][i]，im_detect()后的处理均纳入NMS耗时nms_time--->打印相关测试信息

--->两层循环结束后，利用cPickle向det_file中写入all_boxes检测结果--->调用imdb.evaluate_detection(all_boxes, output_dir)(lib.datasets.imdb.pascal_voc.py中)评估算法精度（如计算AP值）、按类存储相关检测结果

# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能# all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score)all_boxes = [[[] for _ in range(5)]  # num_images             for _ in range(4)]  # num_classes"""[ [[], [], [], [], []], [[], [], [], [], []], [[], [], [], [], []], [[], [], [], [], []]]"""print(all_boxes)

View Code

5.其他函数

_clip_boxes(boxes, im_shape)

def _clip_boxes(boxes, im_shape):    """Clip boxes to image boundaries."""    # x1 >= 0    boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)     # y1 >= 0    boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)    # x2 < im_shape[1]    boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)    # y2 < im_shape[0]    boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)    return boxes

# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as npboxes = np.array([    [1, 2, 5, 7],    [2, 4, 8, 9]                 ])"""[[1] [2]]"""print(boxes[:, 0::4])# [1 2]print(boxes[:, 0])

View Code

# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as npa = np.array([    [0, 1, 2, 3, 4, 5, 6, 7],    [1, 2, 3, 4, 5, 6, 7, 8],    [2, 3, 4, 5, 6, 7, 8, 9]])"""[[0 4] [1 5] [2 6]]"""b = a[:, 0::4]   # 第二维以4为步长print(b)

View Code

_rescale_boxes(boxes, inds, scales) 未见调用

def _rescale_boxes(boxes, inds, scales):    """Rescale boxes according to image rescaling."""    for i in xrange(boxes.shape[0]):  # 并不是生成序列，而是作为一个生成器，生成一个取出一个        boxes[i,:] = boxes[i,:] / scales[int(inds[i])]    return boxes

vis_detection(im, class_name, dets, thresh=0.8) 可视化检测结果，绘制boxes矩形框（demo.py中将其重构，不太理解的是这里为何要将10与dets.shape[0]进行比较）

被test_net(sess, net, imdb, weight_filename, max_per_image = 300, thresh = 0.05, vis = False)调用

def vis_detections(im, class_name, dets, thresh=0.8):    """Visual debugging of detections."""    import matplotlib.pyplot as plt     #im = im[:, :, (2, 1, 0)]    for i in xrange(np.minimum(10, dets.shape[0])):   #dets由bbox坐标和score拼成        bbox = dets[i, :4]         score = dets[i, -1]         if score > thresh:            #plt.cla()            #plt.imshow(im)            plt.gca().add_patch(                plt.Rectangle((bbox[0], bbox[1]),                              bbox[2] - bbox[0],                              bbox[3] - bbox[1], fill=False,                              edgecolor='g', linewidth=3)                )            plt.gca().text(bbox[0], bbox[1] - 2,                 '{:s} {:.3f}'.format(class_name, score),                 bbox=dict(facecolor='blue', alpha=0.5),                 fontsize=14, color='white')            plt.title('{}  {:.3f}'.format(class_name, score))    #plt.show()

apply_nms(all_boxes, thresh) 未见调用

def apply_nms(all_boxes, thresh):    """Apply non-maximum suppression to all predicted boxes output by the    test_net method.    """    num_classes = len(all_boxes)    num_images = len(all_boxes[0])    nms_boxes = [[[] for _ in xrange(num_images)]                 for _ in xrange(num_classes)]    for cls_ind in xrange(num_classes):        for im_ind in xrange(num_images):            dets = all_boxes[cls_ind][im_ind]            if dets == []:                continue            x1 = dets[:, 0]            y1 = dets[:, 1]            x2 = dets[:, 2]            y2 = dets[:, 3]            scores = dets[:, 4]            inds = np.where((x2 > x1) & (y2 > y1) & (scores > cfg.TEST.DET_THRESHOLD))[0]            dets = dets[inds,:]            if dets == []:                continue            keep = nms(dets, thresh)  # 核心在这里            if len(keep) == 0:                continue            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()    return nms_boxes

传入参数all_boxes可见test_net(sess, net, imdb, weights_filename, max_per_image. thresh, vis)函数

转载于:https://www.cnblogs.com/deeplearning1314/p/11097754.html

你可能感兴趣的文章