博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Tensorflow版Faster RCNN源码解析(TFFRCNN) (2) test.py(不使用RPN时)(含ndarray数组复杂的切片操作等)...
阅读量:5153 次
发布时间:2019-06-13

本文共 13921 字,大约阅读时间需要 46 分钟。

本blog为github上版源码解析系列代码笔记

---------------个人学习笔记---------------

----------------本文作者吴疆--------------

------------

 

1.当不通过RPN获得roi需要做如下修改:

demo.py中调用demo()时需传入boxes参数

调用im_detect()时需传入boxes参数,否则boxes默认为None

cfg.TEST.HAS_RPN改为False

_get_blobs(im, boxes)中_get_rois_blob(rois,cfg.TEST.SCALES_BASE)的cfg.TEST.SCALES_BASE改为im_scale_factors

修改VGGnet_test.py网络文件:self.im_info改为self.rois,其占位符shape改为[None, 5],将self.layers字典中‘im_info’均改为‘rois’,删除网络与RPN相关的层。      

import tensorflow as tffrom .network import Networkfrom ..fast_rcnn.config import cfgclass VGGnet_test(Network):    def __init__(self, trainable=True):        self.inputs = []        self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])        self.rois = tf.placeholder(tf.float32, shape=[None, 5])        self.keep_prob = tf.placeholder(tf.float32)        self.layers = dict({
'data': self.data, 'rois': self.rois}) self.trainable = trainable self.setup() def setup(self): # n_classes = 21 n_classes = cfg.NCLASSES # anchor_scales = [8, 16, 32] anchor_scales = cfg.ANCHOR_SCALES _feat_stride = [16, ] (self.feed('data') .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') .conv(3, 3, 256, 1, 1, name='conv3_1') .conv(3, 3, 256, 1, 1, name='conv3_2') .conv(3, 3, 256, 1, 1, name='conv3_3') .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') .conv(3, 3, 512, 1, 1, name='conv4_1') .conv(3, 3, 512, 1, 1, name='conv4_2') .conv(3, 3, 512, 1, 1, name='conv4_3') .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') .conv(3, 3, 512, 1, 1, name='conv5_1') .conv(3, 3, 512, 1, 1, name='conv5_2') .conv(3, 3, 512, 1, 1, name='conv5_3')) # (self.feed('conv5_3') # .conv(3, 3, 512, 1, 1, name='rpn_conv/3x3') # .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score')) # # (self.feed('rpn_conv/3x3') # .conv(1, 1, len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu=False, name='rpn_bbox_pred')) # # # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) # (self.feed('rpn_cls_score') # .spatial_reshape_layer(2, name='rpn_cls_score_reshape') # .spatial_softmax(name='rpn_cls_prob')) # # # shape is (1, H, WxA, 2) -> (1, H, W, Ax2) # (self.feed('rpn_cls_prob') # .spatial_reshape_layer(len(anchor_scales) * 3 * 2, name='rpn_cls_prob_reshape')) # # (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info') # .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois')) (self.feed('conv5_3', 'rois') .roi_pool(7, 7, 1.0 / 16, name='pool_5') .fc(4096, name='fc6') .fc(4096, name='fc7') .fc(n_classes, relu=False, name='cls_score') .softmax(name='cls_prob')) (self.feed('fc7') .fc(n_classes * 4, relu=False, name='bbox_pred'))
View Code

2.test.py中im_detect(sess, net, im, boxes=None)调用函数执行顺序(其中,形参im应为BGR顺序)

def im_detect(sess, net, im, boxes=None):    """Detect object classes in an image given object proposals.    Arguments:        net (caffe.Net): Fast R-CNN network to use        im (ndarray): color image to test (in BGR order)        boxes (ndarray): R x 4 array of object proposals    Returns:        scores (ndarray): R x K array of object class scores (K includes            background as object category 0)        boxes (ndarray): R x (4*K) array of predicted bounding boxes    """

_get_blobs(im, boxes)获取blobs数据字典(含data、rois字段,data与rois均经过缩放)与图像缩放因子数组im_scales(默认不使用图像金字塔,则数组仅含一个元素),其中,还对所有的blobs['rois']进行去重(判断映射到主干网络产生的feature map相同位置的),更新blobs['rois']按缩放因子缩放了的,R*5含第1维全0 level)和boxes(原始传入的boxes,未被缩放,R*4)

--->forward pass前向传播,构造feed_dict字典(含net.data即blobs['data']、net.rois即blobs['rois']、net.keep_prob为1.0,与VGGnet_test.py的输入对应)并通过运行sess传入

--->通过调用(network.py中)net.get_output()得到cls_score(cfg.TEST.SVM为True时的得分,即不经过softmax)、cls_prob(默认cfg.TEST.SVM为False,经过softmax处理的最终得分)、bbox_pred坐标补偿量、rois(维度分别为R2*n_classes、R2*n_classes、R2*(n_classes*4)、R2*5(R2 <= R)对应于VGGnet_test.py相应层输出

--->默认cfg.TEST.BBOX_REG为True调用bbox_transform_inv(boxes,box_deltas)(bbox_transform.py)回归boxes得到pred_boxes

--->调用_clip_boxes(pred_boxes,im.shape)将越界的(回归后的boxes)pred_boxes改为图像边界得到更新的pred_boxes

--->更新scores和pred_boxes并返回,此时score维度由R2*n_classes变回R*n_classes,pred_boxes维度也由R2*(n_classes*4)变为R*(n_classes*4)(R为最初的boxes数量即说明对于映射到feature map相同位置的原始boxes,其最终回归为拥有相同score的同一个box。说明返回的scores和pred_boxes中有重复的元素(且仍然为最初boxes的排序,具体可参考np.unique()函数)(未明白为何这样设计,只统计unique的感觉更好

# When mapping from image ROIs to feature map ROIs, there's some aliasing    # (some distinct image ROIs get mapped to the same feature ROI).    # Here, we identify duplicate feature ROIs, so we only compute features    # on the unique subset.    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:   #1/16          v = np.array([1, 1e3, 1e6, 1e9, 1e12])        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)        _, index, inv_index = np.unique(hashes, return_index=True,                                        return_inverse=True)        blobs['rois'] = blobs['rois'][index, :]        boxes = boxes[index, :]
cls_score, cls_prob, bbox_pred, rois = \        sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois')],\                 feed_dict=feed_dict)
if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:        # Map scores and predictions back to the original set of boxes        scores = scores[inv_index, :]        pred_boxes = pred_boxes[inv_index, :]
# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as nphashes = np.array([     [4],     [1],     [6],     [7],     [1]    ])_, index, inv_index = np.unique(hashes, return_index=True,                                return_inverse=True)print(inv_index)# _          [1 4 6 7]     
# index [1 0 2 3]
# inv_index [1 0 2 3 0]
View Code

_为去重并排序后的数组、index为_中元素在原数组中第一次出现时的索引构成的数组、inv_index为原数组元素在_中的对应索引构成的数组。

3._get_blobs(im, rois)函数的执行逻辑(rois即为传入的boxes)

调用_get_image_blob(im)获取blobs['data']([None, None, None,3],与VGGnet_test.py中data占位符对应)和im_scale_factors缩放因子数组(即im_scales),调用_get_rois_blob(rois,im_scale_factors)得到blobs['rois'](R*5)返回blobs和im_scale_factors

def _get_image_blob(im):    """Converts an image into a network input.    Arguments:        im (ndarray): a color image in BGR order    Returns:        blob (ndarray): a data blob holding an image pyramid        im_scale_factors (list): list of image scales (relative to im) used            in the image pyramid  # 此处注释有误,应为ndarray而非list    """

im--->减去像素均值操作--->判断以长边(1000)/短边(600)缩放--->双线性差值对图像进行缩放im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,interpolation=cv2.INTER_LINEAR)--->

blob=im_list_to_blob(processed_ims)(blob.py中)将(图像金字塔处理后得到的)图像列表转化为blob数据(即blobs['data'])返回blob和np.array(im_scale_factors)缩放因子数组(不使用图像金字塔时processed_ims和im_scale_factors均只含一个元素)

def _get_rois_blob(im_rois, im_scale_factors):    """Converts RoIs into network inputs.    Arguments:        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates        im_scale_factors (list): scale factors as returned by _get_image_blob   # 此处注释有误,应为ndarray而非list    Returns:        blob (ndarray): R x 5 matrix of RoIs in the image pyramid    """

 调用_project_im_rois(im_rois, im_scale_factors)得到rois和levels,拼接rois和levels构成rois_blob(即data['rois'],shape为R*5,5=1level+4坐标,默认不使用图像金字塔level均为0)

def _project_im_rois(im_rois, scales):    """Project image RoIs into the image pyramid built by _get_image_blob.    Arguments:        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates        scales (list): scale factors as returned by _get_image_blob  # 此处注释有误,应为ndarray而非list    Returns:        rois (ndarray): R x 4 matrix of projected RoI coordinates        levels (list): image pyramid levels used by each projected RoI  # 此处注释有误,应为ndarray而非list    """

rois = im_rois * scales[levels] rois按缩放因子进行缩放返回rois和levels

# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as npim_rois = np.array([    [1, 2, 4, 5],    [2, 3, 7, 8]])scale = np.array([0.6])levels = np.array([    [0],    [0]])"""[[0.6] [0.6]]"""print(scale[levels])rois = im_rois * scale[levels]"""[[0.6 1.2 2.4 3. ] [1.2 1.8 4.2 4.8]]"""print(rois)
View Code

4.test_net(sess, net, imdb, weight_filename, max_per_image = 300即top-300个proposal, thresh = 0.05即得分阈值, vis = False)的执行逻辑(被test_net.py调用)

def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, vis=False): #被test_net.py调用    #weights_filename不包含后缀的模型文件名,类似于vggnet...70000    """Test a Fast R-CNN network on an image database."""

构造all_boxes列表,其中all_boxes[cls_ind类别索引][im_id图像索引]为该类的R*5的检测结果det,即R*(x1,y1,x2,y2,score)  (   其中图像共num_images张,num_images=len(imdb.image_index) )

--->调用get_output_dir(imdb,weights_filename)(config.py中)得到输出对应数据集测试结果pkl的部分绝对路径(需+detection.pkl)

--->构造计时器字典_t(含im_detect和misc字段,分别创建了一个Timer对象)

--->(不使用RPN时)roidb=imdb.roidb(维度和内容不明,imdb类)--->得到测试结果pkl文件全路径det_file(如output/faster_rcnn_voc_vgg/voc_2007_test/VGGnet_fast_rcnn_iter_6020/detections.pkl)

--->i循环遍历每一张图像  bbox_proposal = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0](维度和内容不明,使得含义不明)剔除为gt的rois,只评估非roi的proposal的效果(因为roi有可能来源于training or val split

--->读取图像im--->调用im_detect(sess, net, im, box_proposal)返回scores和boxes,前后时间戳相减得到耗时detect_time

--->对于每张图像,从1~imdb.num_classes j循环遍历各个类别(0为背景),取出该类score得分超过thresh阈值的boxes拼接成cls_det(None,5),类内执行NMS(nms是C编译的.so),更新cls_det,将cls_det填入  all_boxes对应位置all_boxes[j][i]

--->每张图像最多保留包含所有类别的max_per_image(默认为300,即top-300)个得分最高的boxes,更新all_boxes[j][i],im_detect()后的处理均纳入NMS耗时nms_time--->打印相关测试信息

--->两层循环结束后,利用cPickle向det_file中写入all_boxes检测结果--->调用imdb.evaluate_detection(all_boxes, output_dir)(lib.datasets.imdb.pascal_voc.py中)评估算法精度(如计算AP值)、按类存储相关检测结果

# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能# all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score)all_boxes = [[[] for _ in range(5)]  # num_images             for _ in range(4)]  # num_classes"""[ [[], [], [], [], []], [[], [], [], [], []], [[], [], [], [], []], [[], [], [], [], []]]"""print(all_boxes)
View Code

5.其他函数

_clip_boxes(boxes, im_shape)

def _clip_boxes(boxes, im_shape):    """Clip boxes to image boundaries."""    # x1 >= 0    boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)     # y1 >= 0    boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)    # x2 < im_shape[1]    boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)    # y2 < im_shape[0]    boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)    return boxes
# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as npboxes = np.array([    [1, 2, 5, 7],    [2, 4, 8, 9]                 ])"""[[1] [2]]"""print(boxes[:, 0::4])# [1 2]print(boxes[:, 0])
View Code
# -*- coding:utf-8 -*-# Author: WUJiang# 测试功能import numpy as npa = np.array([    [0, 1, 2, 3, 4, 5, 6, 7],    [1, 2, 3, 4, 5, 6, 7, 8],    [2, 3, 4, 5, 6, 7, 8, 9]])"""[[0 4] [1 5] [2 6]]"""b = a[:, 0::4]   # 第二维以4为步长print(b)
View Code

_rescale_boxes(boxes, inds, scales)  未见调用

def _rescale_boxes(boxes, inds, scales):    """Rescale boxes according to image rescaling."""    for i in xrange(boxes.shape[0]):  # 并不是生成序列,而是作为一个生成器,生成一个取出一个        boxes[i,:] = boxes[i,:] / scales[int(inds[i])]    return boxes

vis_detection(im, class_name, dets, thresh=0.8) 可视化检测结果,绘制boxes矩形框 (demo.py中将其重构,不太理解的是这里为何要将10与dets.shape[0]进行比较

被test_net(sess, net, imdb, weight_filename, max_per_image = 300, thresh = 0.05, vis = False)调用

def vis_detections(im, class_name, dets, thresh=0.8):    """Visual debugging of detections."""    import matplotlib.pyplot as plt     #im = im[:, :, (2, 1, 0)]    for i in xrange(np.minimum(10, dets.shape[0])):   #dets由bbox坐标和score拼成        bbox = dets[i, :4]         score = dets[i, -1]         if score > thresh:            #plt.cla()            #plt.imshow(im)            plt.gca().add_patch(                plt.Rectangle((bbox[0], bbox[1]),                              bbox[2] - bbox[0],                              bbox[3] - bbox[1], fill=False,                              edgecolor='g', linewidth=3)                )            plt.gca().text(bbox[0], bbox[1] - 2,                 '{:s} {:.3f}'.format(class_name, score),                 bbox=dict(facecolor='blue', alpha=0.5),                 fontsize=14, color='white')            plt.title('{}  {:.3f}'.format(class_name, score))    #plt.show()

apply_nms(all_boxes, thresh)   未见调用

def apply_nms(all_boxes, thresh):    """Apply non-maximum suppression to all predicted boxes output by the    test_net method.    """    num_classes = len(all_boxes)    num_images = len(all_boxes[0])    nms_boxes = [[[] for _ in xrange(num_images)]                 for _ in xrange(num_classes)]    for cls_ind in xrange(num_classes):        for im_ind in xrange(num_images):            dets = all_boxes[cls_ind][im_ind]            if dets == []:                continue            x1 = dets[:, 0]            y1 = dets[:, 1]            x2 = dets[:, 2]            y2 = dets[:, 3]            scores = dets[:, 4]            inds = np.where((x2 > x1) & (y2 > y1) & (scores > cfg.TEST.DET_THRESHOLD))[0]            dets = dets[inds,:]            if dets == []:                continue            keep = nms(dets, thresh)  # 核心在这里            if len(keep) == 0:                continue            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()    return nms_boxes

传入参数all_boxes可见test_net(sess, net, imdb, weights_filename, max_per_image. thresh, vis)函数

转载于:https://www.cnblogs.com/deeplearning1314/p/11097754.html

你可能感兴趣的文章
js深度克隆对象、数组
查看>>
socket阻塞与非阻塞,同步与异步
查看>>
团队工作第二天
查看>>
System类
查看>>
tableView
查看>>
Happy Great BG-卡精度
查看>>
Xamarin Visual Studio不识别JDK路径
查看>>
菜鸟“抄程序”之道
查看>>
Ubuntu下关闭防火墙
查看>>
TCP/IP 邮件的原理
查看>>
w3m常用快捷键
查看>>
【Unity 3D】学习笔记四十一:关节
查看>>
原型设计工具
查看>>
windows下的C++ socket服务器(4)
查看>>
css3 2d转换3d转换以及动画的知识点汇总
查看>>
【Java】使用Eclipse进行远程调试,Linux下开启远程调试
查看>>
js对象属性方法
查看>>
对Vue为什么不支持IE8的解释之一
查看>>
Maven安装配置
查看>>
ORA-10635: Invalid segment or tablespace type
查看>>