（15-2-03）姿势预测器:Android姿势预测器（3）图像处理

15.3.5 图像处理

将用摄像机预览图像时，会实时预测图像中人物的姿势，并通过图像处理技术绘制出人物的四肢。

（1）编写程序文件ImageUtils.kt，实现用于操作图像的实用程序类。提取摄像机中的图像，并使用线条绘制四肢和和头部器官，将回执结果保存到缓存中。具体实现代码如下所示。

object ImageUtils {

    private const val TAG = "ImageUtils"

    @RequiresApi(VERSION_CODES.KITKAT)
    fun yuv420ThreePlanesToNV21(
        yuv420888planes: Array<Plane>, width: Int, height: Int
    ): ByteBuffer? {
        val imageSize = width * height
        val out = ByteArray(imageSize + 2 * (imageSize / 4))
        if (areUVPlanesNV21(yuv420888planes, width, height)) {
            //复制Y的值.
            yuv420888planes[0].buffer[out, 0, imageSize]
            val uBuffer = yuv420888planes[1].buffer
            val vBuffer = yuv420888planes[2].buffer
            //从V缓冲区获取第一个V值，因为U缓冲区不包含它。
            vBuffer[out, imageSize, 1]
            //从U缓冲区复制第一个U值和剩余的VU值.
            uBuffer[out, imageSize + 1, 2 * imageSize / 4 - 1]
        } else {
            // 回退处理并逐个复制UV值，虽然速度较慢，但也有效。
            unpackPlane(
                yuv420888planes[0],
                width,
                height,
                out,
                0,
                1
            )
            //拆包U.
            unpackPlane(
                yuv420888planes[1],
                width,
                height,
                out,
                imageSize + 1,
                2
            )
            //拆包V.
            unpackPlane(
                yuv420888planes[2],
                width,
                height,
                out,
                imageSize,
                2
            )
        }
        return ByteBuffer.wrap(out)
    }

    @TargetApi(VERSION_CODES.KITKAT)
    private fun unpackPlane(
        plane: Plane, width: Int, height: Int, out: ByteArray, offset: Int, pixelStride: Int
    ) {
        val buffer = plane.buffer
        buffer.rewind()

        // 计算当前平面的大小。
        //假设它具有与原始图像相同的纵横比.
        val numRow = (buffer.limit() + plane.rowStride - 1) / plane.rowStride
        if (numRow == 0) {
            return
        }
        val scaleFactor = height / numRow
        val numCol = width / scaleFactor

        //提取输出缓冲区中的数据
        var outputPos = offset
        var rowStart = 0
        for (row in 0 until numRow) {
            var inputPos = rowStart
            for (col in 0 until numCol) {
                out[outputPos] = buffer[inputPos]
                outputPos += pixelStride
                inputPos += plane.pixelStride
            }
            rowStart += plane.rowStride
        }
    }

    @RequiresApi(VERSION_CODES.KITKAT)
    private fun areUVPlanesNV21(planes: Array<Plane>, width: Int, height: Int): Boolean {
        val imageSize = width * height
        val uBuffer = planes[1].buffer
        val vBuffer = planes[2].buffer

        //备份缓冲区属性.
        val vBufferPosition = vBuffer.position()
        val uBufferLimit = uBuffer.limit()

        //将V缓冲区提前1字节，因为U缓冲区不包含第一个V值。
        vBuffer.position(vBufferPosition + 1)
        //切掉U缓冲区的最后一个字节，因为V缓冲区不包含最后一个U值。
        uBuffer.limit(uBufferLimit - 1)

        //检查缓冲区是否相等并具有预期的元素数
        val areNV21 =
            vBuffer.remaining() == 2 * imageSize / 4 - 2 && vBuffer.compareTo(uBuffer) == 0

        //将缓冲区恢复到其初始状态.
        vBuffer.position(vBufferPosition)
        uBuffer.limit(uBufferLimit)
        return areNV21
    }

    fun getBitmap(data: ByteBuffer, width: Int, height: Int): Bitmap? {
        data.rewind()
        val imageInBuffer = ByteArray(data.limit())
        data[imageInBuffer, 0, imageInBuffer.size]
        try {
            val image = YuvImage(
                imageInBuffer, ImageFormat.NV21, width, height, null
            )
            val stream = ByteArrayOutputStream()
            image.compressToJpeg(Rect(0, 0, width, height), 80, stream)
            val bmp = BitmapFactory.decodeByteArray(stream.toByteArray(), 0, stream.size())
            stream.close()
            return bmp
        } catch (e: Exception) {
            Log.e(TAG, "Error: " + e.message)
        }
        return null
    }
}

（2）编写文件MoveNet.kt实现移动处理，因为相机中的人物动作是动态的，所以需要适时绘制人物的四肢和头部器官的运动轨迹。文件MoveNet.kt的具体实现流程如下所示：

编写函数processInputImage()准备用于检测的输入图像，

    private fun processInputImage(bitmap: Bitmap, inputWidth: Int, inputHeight: Int): TensorImage? {
        val width: Int = bitmap.width
        val height: Int = bitmap.height

        val size = if (height > width) width else height
        val imageProcessor = ImageProcessor.Builder().apply {
            add(ResizeWithCropOrPadOp(size, size))
            add(ResizeOp(inputWidth, inputHeight, ResizeOp.ResizeMethod.BILINEAR))
        }.build()
        val tensorImage = TensorImage(DataType.FLOAT32)
        tensorImage.load(bitmap)
        return imageProcessor.process(tensorImage)
    }

编写函数initRectF()定义默认的裁剪区域，当算法无法从上一帧可靠地确定裁剪区域时，该函数提供初始裁剪区域（从两侧填充完整图像，使其成为方形图像）。

    private fun initRectF(imageWidth: Int, imageHeight: Int): RectF {
        val xMin: Float
        val yMin: Float
        val width: Float
        val height: Float
        if (imageWidth > imageHeight) {
            width = 1f
            height = imageWidth.toFloat() / imageHeight
            xMin = 0f
            yMin = (imageHeight / 2f - imageWidth / 2f) / imageHeight
        } else {
            height = 1f
            width = imageHeight.toFloat() / imageWidth
            yMin = 0f
            xMin = (imageWidth / 2f - imageHeight / 2) / imageWidth
        }
        return RectF(
            xMin,
            yMin,
            xMin + width,
            yMin + height
        )
    }

编写函数torsoVisible()检查是否有足够的躯干关键点，此函数检查模型是否有把握预测指定裁剪区域中的一个肩部/髋部。

    private fun torsoVisible(keyPoints: List<KeyPoint>): Boolean {
        return ((keyPoints[BodyPart.LEFT_HIP.position].score > MIN_CROP_KEYPOINT_SCORE).or(
            keyPoints[BodyPart.RIGHT_HIP.position].score > MIN_CROP_KEYPOINT_SCORE
        )).and(
            (keyPoints[BodyPart.LEFT_SHOULDER.position].score > MIN_CROP_KEYPOINT_SCORE).or(
                keyPoints[BodyPart.RIGHT_SHOULDER.position].score > MIN_CROP_KEYPOINT_SCORE
            )
        )
    }

确定要裁剪图像以供模型运行推断的区域，该算法使用前一帧检测到的关节来估计包围目标人全身并以两个髋关节中点为中心的正方形区域。裁剪尺寸由每个关节与中心点之间的距离确定。当模型对四个躯干关节预测不确定时，该函数将返回默认裁剪，即填充为方形的完整图像。

    private fun determineRectF(
        keyPoints: List<KeyPoint>,
        imageWidth: Int,
        imageHeight: Int
    ): RectF {
        val targetKeyPoints = mutableListOf<KeyPoint>()
        keyPoints.forEach {
            targetKeyPoints.add(
                KeyPoint(
                    it.bodyPart,
                    PointF(
                        it.coordinate.x * imageWidth,
                        it.coordinate.y * imageHeight
                    ),
                    it.score
                )
            )
        }
        if (torsoVisible(keyPoints)) {
            val centerX =
                (targetKeyPoints[BodyPart.LEFT_HIP.position].coordinate.x +
                        targetKeyPoints[BodyPart.RIGHT_HIP.position].coordinate.x) / 2f
            val centerY =
                (targetKeyPoints[BodyPart.LEFT_HIP.position].coordinate.y +
                        targetKeyPoints[BodyPart.RIGHT_HIP.position].coordinate.y) / 2f

            val torsoAndBodyDistances =
                determineTorsoAndBodyDistances(keyPoints, targetKeyPoints, centerX, centerY)

            val list = listOf(
                torsoAndBodyDistances.maxTorsoXDistance * TORSO_EXPANSION_RATIO,
                torsoAndBodyDistances.maxTorsoYDistance * TORSO_EXPANSION_RATIO,
                torsoAndBodyDistances.maxBodyXDistance * BODY_EXPANSION_RATIO,
                torsoAndBodyDistances.maxBodyYDistance * BODY_EXPANSION_RATIO
            )

            var cropLengthHalf = list.maxOrNull() ?: 0f
            val tmp = listOf(centerX, imageWidth - centerX, centerY, imageHeight - centerY)
            cropLengthHalf = min(cropLengthHalf, tmp.maxOrNull() ?: 0f)
            val cropCorner = Pair(centerY - cropLengthHalf, centerX - cropLengthHalf)

            return if (cropLengthHalf > max(imageWidth, imageHeight) / 2f) {
                initRectF(imageWidth, imageHeight)
            } else {
                val cropLength = cropLengthHalf * 2
                RectF(
                    cropCorner.second / imageWidth,
                    cropCorner.first / imageHeight,
                    (cropCorner.second + cropLength) / imageWidth,
                    (cropCorner.first + cropLength) / imageHeight,
                )
            }
        } else {
            return initRectF(imageWidth, imageHeight)
        }
    }

编写函数torsoVisible()计算每个关键点到中心位置的最大距离。该函数返回两组关键点之间的最大距离：完整的17个关键点和4个躯干关键点。返回的信息将用于确定作物大小。

    private fun determineTorsoAndBodyDistances(
        keyPoints: List<KeyPoint>,
        targetKeyPoints: List<KeyPoint>,
        centerX: Float,
        centerY: Float
    ): TorsoAndBodyDistance {
        val torsoJoints = listOf(
            BodyPart.LEFT_SHOULDER.position,
            BodyPart.RIGHT_SHOULDER.position,
            BodyPart.LEFT_HIP.position,
            BodyPart.RIGHT_HIP.position
        )

        var maxTorsoYRange = 0f
        var maxTorsoXRange = 0f
        torsoJoints.forEach { joint ->
            val distY = abs(centerY - targetKeyPoints[joint].coordinate.y)
            val distX = abs(centerX - targetKeyPoints[joint].coordinate.x)
            if (distY > maxTorsoYRange) maxTorsoYRange = distY
            if (distX > maxTorsoXRange) maxTorsoXRange = distX
        }

        var maxBodyYRange = 0f
        var maxBodyXRange = 0f
        for (joint in keyPoints.indices) {
            if (keyPoints[joint].score < MIN_CROP_KEYPOINT_SCORE) continue
            val distY = abs(centerY - keyPoints[joint].coordinate.y)
            val distX = abs(centerX - keyPoints[joint].coordinate.x)

            if (distY > maxBodyYRange) maxBodyYRange = distY
            if (distX > maxBodyXRange) maxBodyXRange = distX
        }
        return TorsoAndBodyDistance(
            maxTorsoYRange,
            maxTorsoXRange,
            maxBodyYRange,
            maxBodyXRange
        )
    }

（3）编写文件PoseNet.kt实现姿势处理，具体实现代码如下所示。

编写函数postProcessModelOuputs()将Posenet热图和偏移量输出转换为关键点列表，

    private fun postProcessModelOuputs(
        heatmaps: Array<Array<Array<FloatArray>>>,
        offsets: Array<Array<Array<FloatArray>>>
    ): Person {
        val height = heatmaps[0].size
        val width = heatmaps[0][0].size
        val numKeypoints = heatmaps[0][0][0].size

        //查找最可能存在关键点的位置（行、列）.
        val keypointPositions = Array(numKeypoints) { Pair(0, 0) }
        for (keypoint in 0 until numKeypoints) {
            var maxVal = heatmaps[0][0][0][keypoint]
            var maxRow = 0
            var maxCol = 0
            for (row in 0 until height) {
                for (col in 0 until width) {
                    if (heatmaps[0][row][col][keypoint] > maxVal) {
                        maxVal = heatmaps[0][row][col][keypoint]
                        maxRow = row
                        maxCol = col
                    }
                }
            }
            keypointPositions[keypoint] = Pair(maxRow, maxCol)
        }

        //通过偏移调整计算关键点的x和y坐标.
        val xCoords = IntArray(numKeypoints)
        val yCoords = IntArray(numKeypoints)
        val confidenceScores = FloatArray(numKeypoints)
        keypointPositions.forEachIndexed { idx, position ->
            val positionY = keypointPositions[idx].first
            val positionX = keypointPositions[idx].second
            yCoords[idx] = ((
                    position.first / (height - 1).toFloat() * inputHeight +
                            offsets[0][positionY][positionX][idx]
                    ) * (cropSize.toFloat() / inputHeight)).toInt() + (cropHeight / 2).toInt()
            xCoords[idx] = ((
                    position.second / (width - 1).toFloat() * inputWidth +
                            offsets[0][positionY]
                                    [positionX][idx + numKeypoints]
                    ) * (cropSize.toFloat() / inputWidth)).toInt() + (cropWidth / 2).toInt()
            confidenceScores[idx] = sigmoid(heatmaps[0][positionY][positionX][idx])
        }

        val keypointList = mutableListOf<KeyPoint>()
        var totalScore = 0.0f
        enumValues<BodyPart>().forEachIndexed { idx, it ->
            keypointList.add(
                KeyPoint(
                    it,
                    PointF(xCoords[idx].toFloat(), yCoords[idx].toFloat()),
                    confidenceScores[idx]
                )
            )
            totalScore += confidenceScores[idx]
        }
        return Person(keypointList.toList(), totalScore / numKeypoints)
    }

    override fun lastInferenceTimeNanos(): Long = lastInferenceTimeNanos

    override fun close() {
        interpreter.close()
    }

编写函数processInputImage()将输入图像缩放并裁剪为张量图像，

    private fun processInputImage(bitmap: Bitmap): TensorImage {
        //重置裁剪宽度和高度
        cropWidth = 0f
        cropHeight = 0f
        cropSize = if (bitmap.width > bitmap.height) {
            cropWidth = (bitmap.width - bitmap.height).toFloat()
            bitmap.height
        } else {
            cropHeight = (bitmap.height - bitmap.width).toFloat()
            bitmap.width
        }

        val imageProcessor = ImageProcessor.Builder().apply {
            add(ResizeWithCropOrPadOp(cropSize, cropSize))
            add(ResizeOp(inputWidth, inputHeight, ResizeOp.ResizeMethod.BILINEAR))
            add(NormalizeOp(MEAN, STD))
        }.build()
        val tensorImage = TensorImage(DataType.FLOAT32)
        tensorImage.load(bitmap)
        return imageProcessor.process(tensorImage)
    }

编写函数initOutputMap()，功能是为要填充的模型实现初始化处理，将输出保存为1*x*y*z格式的浮点型数组的outputMap。

    private fun initOutputMap(interpreter: Interpreter): HashMap<Int, Any> {
        val outputMap = HashMap<Int, Any>()

        // 包含热图1 * 9 * 9 * 17
        val heatmapsShape = interpreter.getOutputTensor(0).shape()
        outputMap[0] = Array(heatmapsShape[0]) {
            Array(heatmapsShape[1]) {
                Array(heatmapsShape[2]) { FloatArray(heatmapsShape[3]) }
            }
        }

        // 包含偏移量1 * 9 * 9 * 34
        val offsetsShape = interpreter.getOutputTensor(1).shape()
        outputMap[1] = Array(offsetsShape[0]) {
            Array(offsetsShape[1]) { Array(offsetsShape[2]) { FloatArray(offsetsShape[3]) } }
        }

        //包含向前位移1 * 9 * 9 * 32
        val displacementsFwdShape = interpreter.getOutputTensor(2).shape()
        outputMap[2] = Array(offsetsShape[0]) {
            Array(displacementsFwdShape[1]) {
                Array(displacementsFwdShape[2]) { FloatArray(displacementsFwdShape[3]) }
            }
        }

        //包含向后位移1 * 9 * 9 * 32
        val displacementsBwdShape = interpreter.getOutputTensor(3).shape()
        outputMap[3] = Array(displacementsBwdShape[0]) {
            Array(displacementsBwdShape[1]) {
                Array(displacementsBwdShape[2]) { FloatArray(displacementsBwdShape[3]) }
            }
        }

        return outputMap
    }

（15-2-03）姿势预测器:Android姿势预测器（3）图像处理

15.3.5 图像处理

悦读