// カメラから取得したsample bufferを用いてハンドポーズを検出するs
extension CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
var thumbTip: CGPoint?
var indexTip: CGPoint?
defer {
DispatchQueue.main.sync {
self.processPoints(thumbTip: thumbTip, indexTip: indexTip)
}
}
let handler = VNImageRequestHandler(cmSampleBuffer: sampleBuffer, orientation: .up, options: [:])
do {
// Perform VNDetectHumanHandPoseRequest
try handler.perform([handPoseRequest])
// Continue only when a hand was detected in the frame.
// Since we set the maximumHandCount property of the request to 1, there will be at most one observation.
guard let observation = handPoseRequest.results?.first as? VNRecognizedPointsObservation else {
return
}
// Get points for thumb and index finger.
let thumbPoints = try observation.recognizedPoints(forGroupKey: .handLandmarkRegionKeyThumb)
let indexFingerPoints = try observation.recognizedPoints(forGroupKey: .handLandmarkRegionKeyIndexFinger)
// Look for tip points.
guard let thumbTipPoint = thumbPoints[.handLandmarkKeyThumbTIP], let indexTipPoint = indexFingerPoints[.handLandmarkKeyIndexTIP] else {
return
}
// Ignore low confidence points.
guard thumbTipPoint.confidence > 0.3 && indexTipPoint.confidence > 0.3 else {
return
}
// Convert points from Vision coordinates to AVFoundation coordinates.
thumbTip = CGPoint(x: thumbTipPoint.location.x, y: 1 - thumbTipPoint.location.y)
indexTip = CGPoint(x: indexTipPoint.location.x, y: 1 - indexTipPoint.location.y)
} catch {
cameraFeedSession?.stopRunning()
let error = AppError.visionError(error: error)
DispatchQueue.main.async {
error.displayInViewController(self)
}
}
}
}
// ハンドポーズを利用したpinハンドリングの例
init(pinchMaxDistance: CGFloat = 40, evidenceCounterStateTrigger: Int = 3) {
self.pinchMaxDistance = pinchMaxDistance
self.evidenceCounterStateTrigger = evidenceCounterStateTrigger
}
func reset() {
state = .unknown
pinchEvidenceCounter = 0
apartEvidenceCounter = 0
}
func processPointsPair(_ pointsPair: PointsPair) {
lastProcessedPointsPair = pointsPair
let distance = pointsPair.indexTip.distance(from: pointsPair.thumbTip)
if distance < pinchMaxDistance {
// Keep accumulating evidence for pinch state.
pinchEvidenceCounter += 1
apartEvidenceCounter = 0
// Set new state based on evidence amount.
state = (pinchEvidenceCounter >= evidenceCounterStateTrigger) ? .pinched : .possiblePinch
} else {
// Keep accumulating evidence for apart state.
apartEvidenceCounter += 1
pinchEvidenceCounter = 0
// Set new state based on evidence amount.
state = (apartEvidenceCounter >= evidenceCounterStateTrigger) ? .apart : .possibleApart
}
}
// Visionによるアクションの判別によるゲームアプリのサンプル
extension GameViewController: CameraViewControllerOutputDelegate {
func cameraViewController(_ controller: CameraViewController, didReceiveBuffer buffer: CMSampleBuffer, orientation: CGImagePropertyOrientation) {
let visionHandler = VNImageRequestHandler(cmSampleBuffer: buffer, orientation: orientation, options: [:])
if self.gameManager.stateMachine.currentState is GameManager.TrackThrowsState {
DispatchQueue.main.async {
// Get the frame of rendered view
let normalizedFrame = CGRect(x: 0, y: 0, width: 1, height: 1)
self.jointSegmentView.frame = controller.viewRectForVisionRect(normalizedFrame)
self.trajectoryView.frame = controller.viewRectForVisionRect(normalizedFrame)
}
// Perform the trajectory request in a separate dispatch queue
trajectoryQueue.async {
self.setUpDetectTrajectoriesRequest()
do {
if let trajectoryRequest = self.detectTrajectoryRequest {
try visionHandler.perform([trajectoryRequest])
}
} catch {
AppError.display(error, inViewController: self)
}
}
}
// Run bodypose request for additional GameConstants.maxPostReleasePoseObservations frames after the first trajectory observation is detected
if !(self.trajectoryView.inFlight && self.trajectoryInFlightPoseObservations >= GameConstants.maxTrajectoryInFlightPoseObservations) {
do {
try visionHandler.perform([detectPlayerRequest])
if let result = detectPlayerRequest.results?.first as? VNRecognizedPointsObservation {
let box = humanBoundingBox(for: result)
let boxView = playerBoundingBox
DispatchQueue.main.async {
let horizontalInset = CGFloat(-20.0)
let verticalInset = CGFloat(-20.0)
let viewRect = controller.viewRectForVisionRect(box).insetBy(dx: horizontalInset, dy: verticalInset)
self.updateBoundingBox(boxView, withRect: viewRect)
if !self.playerDetected && !boxView.isHidden {
self.gameStatusLabel.alpha = 0
self.resetTrajectoryRegions()
self.gameManager.stateMachine.enter(GameManager.DetectedPlayerState.self)
}
}
}
} catch {
AppError.display(error, inViewController: self)
}
} else {
// Hide player bounding box
DispatchQueue.main.async {
if !self.playerBoundingBox.isHidden {
self.playerBoundingBox.isHidden = true
self.jointSegmentView.resetView()
}
}
}
}
}
func humanBoundingBox(for observation: VNRecognizedPointsObservation) -> CGRect {
var box = CGRect.zero
// Process body points only if the confidence is high
guard observation.confidence > 0.6 else {
return box
}
var normalizedBoundingBox = CGRect.null
guard let points = try? observation.recognizedPoints(forGroupKey: .all) else {
return box
}
for (_, point) in points {
// Only use point if human pose joint was detected reliably
guard point.confidence > 0.1 else { continue }
normalizedBoundingBox = normalizedBoundingBox.union(CGRect(origin: point.location, size: .zero))
}
if !normalizedBoundingBox.isNull {
box = normalizedBoundingBox
}
// Fetch body joints from the observation and overlay them on the player
DispatchQueue.main.async {
let joints = getBodyJointsFor(observation: observation)
self.jointSegmentView.joints = joints
}
// Store the body pose observation in playerStats when the game is in TrackThrowsState
// We will use these observations for action classification once the throw is complete
if gameManager.stateMachine.currentState is GameManager.TrackThrowsState {
playerStats.storeObservation(observation)
if trajectoryView.inFlight {
trajectoryInFlightPoseObservations += 1
}
}
return box
}
func prepareInputWithObservations(_ observations: [VNRecognizedPointsObservation]) -> MLMultiArray? {
let numAvailableFrames = observations.count
let observationsNeeded = 60
var multiArrayBuffer = [MLMultiArray]()
// swiftlint:disable identifier_name
for f in 0 ..< min(numAvailableFrames, observationsNeeded) {
let pose = observations[f]
do {
let oneFrameMultiArray = try pose.keypointsMultiArray()
multiArrayBuffer.append(oneFrameMultiArray)
} catch {
continue
}
}
// If poseWindow does not have enough frames (60) yet, we need to pad 0s
if numAvailableFrames < observationsNeeded {
for _ in 0 ..< (observationsNeeded - numAvailableFrames) {
do {
let oneFrameMultiArray = try MLMultiArray(shape: [1, 3, 18], dataType: .double)
try resetMultiArray(oneFrameMultiArray)
multiArrayBuffer.append(oneFrameMultiArray)
} catch {
continue
}
}
}
return MLMultiArray(concatenating: [MLMultiArray](multiArrayBuffer), axis: 0, dataType: MLMultiArrayDataType.double)
}
mutating func getLastThrowType() -> ThrowType {
let actionClassifier = PlayerActionClassifier().model
guard let poseMultiArray = prepareInputWithObservations(poseObservations) else {
return ThrowType.none
}
let input = PlayerActionClassifierInput(input: poseMultiArray)
guard let predictions = try? actionClassifier.prediction(from: input),
let output = predictions.featureValue(for: "output")?.multiArrayValue,
let outputBuffer = try? UnsafeBufferPointer<Float32>(output) else {
return ThrowType.none
}
let probabilities = Array(outputBuffer)
guard let maxConfidence = probabilities.prefix(3).max(), let maxIndex = probabilities.firstIndex(of: maxConfidence) else {
return ThrowType.none
}
let throwTypes = ThrowType.allCases
return throwTypes[maxIndex]
}