Pykinect and OpenCV

Apr 18, 2012 at 7:08 PM

Hi,I

I have created a little program combining Pykinect and OpenCV. It works fine to transfer the images from the Kinect camera to a IPL image from OpenCV:

(You need the file haarcascade_frontalface_alt.xml in the same directory as the program, it is availlabe in the OpenCV package. I used OpenCV 2.1 for this example)

"""provides a simple PyGame sample with video and face detection via OpenCV"""

import thread
# import itertools
import ctypes
import cv

import pykinect
from pykinect import nui
from time import sleep

import pygame
from pygame.locals import *

get_snapshot = False
KINECTEVENT = pygame.USEREVENT
VIDEO_WINSIZE = (640,480)
pygame.init()

# recipe to get address of surface: http://archives.seul.org/pygame/users/Apr-2008/msg00218.html
if hasattr(ctypes.pythonapi, 'Py_InitModule4'):
   Py_ssize_t = ctypes.c_int
elif hasattr(ctypes.pythonapi, 'Py_InitModule4_64'):
   Py_ssize_t = ctypes.c_int64
else:
   raise TypeError("Cannot determine type of Py_ssize_t")

_PyObject_AsWriteBuffer = ctypes.pythonapi.PyObject_AsWriteBuffer
_PyObject_AsWriteBuffer.restype = ctypes.c_int
_PyObject_AsWriteBuffer.argtypes = [ctypes.py_object,
                                  ctypes.POINTER(ctypes.c_void_p),
                                  ctypes.POINTER(Py_ssize_t)]

def detect_and_draw(img, cascade):
    # allocate temporary images
    gray = cv.CreateImage((img.width,img.height), 8, 1)
    small_img = cv.CreateImage((cv.Round(img.width / image_scale),
                cv.Round (img.height / image_scale)), 8, 1)
    # convert color input image to grayscale
    cv.CvtColor(img, gray, cv.CV_BGR2GRAY)
    # scale input image for faster processing
    cv.Resize(gray, small_img, cv.CV_INTER_LINEAR)
    cv.EqualizeHist(small_img, small_img)
    if(cascade):
        t = cv.GetTickCount()
        faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0),
                                    haar_scale, min_neighbors, haar_flags, min_size)
        t = cv.GetTickCount() - t
        # print "detection time = %gms" % (t/(cv.GetTickFrequency()*1000.)), 
        if faces:
            # print "face found"
            for ((x, y, w, h), n) in faces:
                # the input to cv.HaarDetectObjects was resized, so scale the 
                # bounding box of each face and convert it to two CvPoints
                pt1 = (int(x * image_scale), int(y * image_scale))
                pt2 = (int((x + w) * image_scale), int((y + h) * image_scale))
                cv.Rectangle(img, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0)
        else:
            # print "face not found"
            pass
        return img

def surface_to_array(surface):
   buffer_interface = surface.get_buffer()
   # print type(buffer_interface), # 
   address = ctypes.c_void_p()
   size = Py_ssize_t() 
   # set the void pointer to the pygame image screen location
   _PyObject_AsWriteBuffer(buffer_interface,
                          ctypes.byref(address), ctypes.byref(size))
   bytes = (ctypes.c_byte * size.value).from_address(address.value)
   # print size.value, # 1228800
   # print type(bytes), #  1228800 = 640*480*4
   bytes.object = buffer_interface
   # print type(bytes.object),# 
   return bytes #  1228800 = 640*480*4 RGBA

def video_frame_ready(frame):
    global get_snapshot, frame_copy, ipl, show_face
    # print type(frame), # 
    if not video_display:
        return
    with screen_lock:
        address = surface_to_array(screen)
        # print type(address),
        # array pointer to pygame image
        #  1228800 = 640*480*4 RGBA
        # take the pointer of the pygame screen image and put it to the
        # pykinect.nui.structs.PlanarImage
        frame.image.copy_bits(address)
        # print type(frame.image), # 
        if get_snapshot:
            get_snapshot = False
            show_face = not show_face
        if show_face:
            # print "snapshot"
            cv.SetData(ipl, address)
            if ipl.origin == cv.IPL_ORIGIN_TL:
                cv.Copy(ipl, frame_copy)
            else:
                cv.Flip(ipl, frame_copy, 0)
            frame_copy = detect_and_draw(frame_copy, cascade)
            cv.ShowImage("Snapshot", frame_copy)
        del address # delete the array pointer
        pygame.display.update()

if __name__ == '__main__':
    video_display = True
    show_face = True
    cv.NamedWindow("Snapshot")
    cascade = cv.Load('haarcascade_frontalface_alt.xml')
    min_size = (20, 20)
    image_scale = 3
    haar_scale = 1.2
    min_neighbors = 2
    haar_flags = 0
    frame_copy = cv.CreateImage(VIDEO_WINSIZE, cv.IPL_DEPTH_8U, 4)
    ipl = cv.CreateImage(VIDEO_WINSIZE, cv.IPL_DEPTH_8U, 4)
    screen_lock = thread.allocate()
    screen = pygame.display.set_mode(VIDEO_WINSIZE,0,32)    
    pygame.display.set_caption('Python Kinect Camera')
    kinect = nui.Runtime()   
    kinect.video_frame_ready += video_frame_ready
    kinect.video_stream.open(nui.ImageStreamType.Video, 2, nui.ImageResolution.Resolution640x480, nui.ImageType.Color)
    print('Controls: ')
    print('     q ESC - Quit the view')
    print('     u - Increase elevation angle')
    print('     j - Decrease elevation angle')
    print('     f - toggle facedetection')

    # main game loop
    done = False
    while not done:
        e = pygame.event.wait()
        dispInfo = pygame.display.Info()
        if e.type == pygame.QUIT:
            done = True
            break
        elif e.type == KEYDOWN:
            if e.key == K_ESCAPE or e.key == K_q:
                done = True
                break
            elif e.key == K_u:
                kinect.camera.elevation_angle = kinect.camera.elevation_angle + 2
            elif e.key == K_j:
                kinect.camera.elevation_angle = kinect.camera.elevation_angle - 2
            elif e.key == K_x:
                kinect.camera.elevation_angle = 2
            elif e.key == K_f:
                get_snapshot = True
    sleep(1)
Now I want to also transfer the ipl image back to into the pygame window but I have no idea how to do this. My trials always got errors. Any hints?

 

Apr 19, 2012 at 6:04 PM

OK, I finally managed to write the code myself. Here it is and does not need pygame for the display but just uses OpenCV:

 

"""provides a simple sample with video and face detection via OpenCV and the Kinect camera"""

import array
import thread
import ctypes
import cv
import time

import pykinect
from pykinect import nui

get_snapshot = False
VIDEO_WINSIZE = (640,480)

if hasattr(ctypes.pythonapi, 'Py_InitModule4'):
   Py_ssize_t = ctypes.c_int
elif hasattr(ctypes.pythonapi, 'Py_InitModule4_64'):
   Py_ssize_t = ctypes.c_int64
else:
   raise TypeError("Cannot determine type of Py_ssize_t")
size = Py_ssize_t() 

def detect_and_draw(img, cascade):
    # allocate temporary images
    gray = cv.CreateImage((img.width,img.height), 8, 1)
    small_img = cv.CreateImage((cv.Round(img.width / image_scale),
                cv.Round (img.height / image_scale)), 8, 1)
    # convert color input image to grayscale
    cv.CvtColor(img, gray, cv.CV_BGR2GRAY)
    # scale input image for faster processing
    cv.Resize(gray, small_img, cv.CV_INTER_LINEAR)
    cv.EqualizeHist(small_img, small_img)
    if(cascade):
        t = cv.GetTickCount()
        faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0),
                                    haar_scale, min_neighbors, haar_flags, min_size)
        t = cv.GetTickCount() - t
        # print "detection time = %gms" % (t/(cv.GetTickFrequency()*1000.)), 
        if faces:
            # print "face found"
            for ((x, y, w, h), n) in faces:
                # the input to cv.HaarDetectObjects was resized, so scale the 
                # bounding box of each face and convert it to two CvPoints
                pt1 = (int(x * image_scale), int(y * image_scale))
                pt2 = (int((x + w) * image_scale), int((y + h) * image_scale))
                cv.Rectangle(img, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0)
        else:
            # print "face not found"
            pass
        return img

def video_frame_ready(frame):
    global address, get_snapshot, frame_copy, ipl, show_face
    # print type(frame), # 
    with screen_lock:
        # array pointer to array image  
        # print type(address),
        #  pointing to an array of size: 1228800 = 640*480*4 RGBA
        # take the pointer of the array image and put it to the
        # pykinect.nui.structs.PlanarImage
        frame.image.copy_bits(address)
        # print type(frame.image), # 
        if get_snapshot:
            get_snapshot = False
            show_face = not show_face
        cv.SetData(ipl, ipl_mat.tostring())
        if show_face:
            frame_copy = detect_and_draw(ipl, cascade)
            if ipl.origin == cv.IPL_ORIGIN_TL:
                cv.Copy(ipl, frame_copy)
            else:
                cv.Flip(ipl, frame_copy, 0)
            cv.ShowImage("Kinect FaceDetect", frame_copy)
        else:
            cv.ShowImage("Kinect FaceDetect", ipl)
        
if __name__ == '__main__':
    show_face = True
    cv.NamedWindow("Kinect FaceDetect")
    cascade = cv.Load('haarcascade_frontalface_alt.xml')
    min_size = (20, 20)
    image_scale = 3
    haar_scale = 1.2
    min_neighbors = 2
    haar_flags = 0
    frame_copy = cv.CreateImage(VIDEO_WINSIZE, cv.IPL_DEPTH_8U, 4)
    ipl = cv.CreateImage(VIDEO_WINSIZE, cv.IPL_DEPTH_8U, 4)
    ipl_mat = array.array('c')
    ipl_mat.fromstring(ipl.tostring())
    # print "ipl_mat.itemsize", ipl_mat.itemsize
    # print "ipl_mat.buffer_info()", ipl_mat.buffer_info()
    # print "ipl_mat.buffer_info()[1]*ipl_mat.itemsize", ipl_mat.buffer_info()[1]*ipl_mat.itemsize
    address = ipl_mat.buffer_info()[0]
    screen_lock = thread.allocate()
    kinect = nui.Runtime()   
    kinect.video_frame_ready += video_frame_ready
    kinect.video_stream.open(nui.ImageStreamType.Video, 2, nui.ImageResolution.Resolution640x480, nui.ImageType.Color)
    print('Controls: ')
    print('     q ESC - Quit the view')
    print('     u - Increase elevation angle')
    print('     j - Decrease elevation angle')
    print('     x - set to angle == horizontal')
    print('     f - toggle facedetection')
    # main loop
    done = False
    while not done:
        k = cv.WaitKey(0)
        if k == 27: # ESC key
            done = True
            break
        elif k != -1: 
            # print k
            if k == 117: # u
                kinect.camera.elevation_angle = kinect.camera.elevation_angle + 2
            elif k == 106: # j
                kinect.camera.elevation_angle = kinect.camera.elevation_angle - 2
            elif k == 120: # x
                kinect.camera.elevation_angle = 2
            elif k == 102: # f
                get_snapshot = True
            elif k == 113: # q
                done = True
                break
    cv.DestroyWindow("Kinect FaceDetect")

This of course opens a world of possibilites with OpenCV alongside the Kinect camera. Wow!

Coordinator
Apr 19, 2012 at 6:06 PM

Awesome!

Apr 20, 2012 at 4:18 PM

You can include the example into the already existing examples for your module, if you like.

I tried to get freenect to work but no success at all. So I rather go with the MS-SDK for Kinect which seems to be quite more reliably at least on Windows. Unfortunately it doesn´t seem to work on WinXP but just on Win7.

Apr 20, 2012 at 4:22 PM
El día 19 de abril de 2012 20:04, bunkus <notifications@codeplex.com> escribió:
> From: bunkus
>
> OK, I finally managed to write the code myself. Here it is and does not need
> pygame for the display but just uses OpenCV:
>

Cool bunkus,

I'm going to test it now myself :)


Saludos,

-- luismiguel  (@lmorillas)

>
>
> """provides a simple sample with video and face detection via OpenCV and the
> Kinect camera"""
>
> import array
> import thread
> import ctypes
> import cv
> import time
>
> import pykinect
> from pykinect import nui
>
> get_snapshot = False
> VIDEO_WINSIZE = (640,480)
>
> if hasattr(ctypes.pythonapi, 'Py_InitModule4'):
> Py_ssize_t = ctypes.c_int
> elif hasattr(ctypes.pythonapi, 'Py_InitModule4_64'):
> Py_ssize_t = ctypes.c_int64
> else:
> raise TypeError("Cannot determine type of Py_ssize_t")
> size = Py_ssize_t()
>
> def detect_and_draw(img, cascade):
> # allocate temporary images
> gray = cv.CreateImage((img.width,img.height), 8, 1)
> small_img = cv.CreateImage((cv.Round(img.width / image_scale),
> cv.Round (img.height / image_scale)), 8, 1)
> # convert color input image to grayscale
> cv.CvtColor(img, gray, cv.CV_BGR2GRAY)
> # scale input image for faster processing
> cv.Resize(gray, small_img, cv.CV_INTER_LINEAR)
> cv.EqualizeHist(small_img, small_img)
> if(cascade):
> t = cv.GetTickCount()
> faces = cv.HaarDetectObjects(small_img, cascade,
> cv.CreateMemStorage(0),
> haar_scale, min_neighbors, haar_flags,
> min_size)
> t = cv.GetTickCount() - t
> # print "detection time = %gms" % (t/(cv.GetTickFrequency()*1000.)),
> if faces:
> # print "face found"
> for ((x, y, w, h), n) in faces:
> # the input to cv.HaarDetectObjects was resized, so scale
> the
> # bounding box of each face and convert it to two CvPoints
> pt1 = (int(x * image_scale), int(y * image_scale))
> pt2 = (int((x + w) * image_scale), int((y + h) *
> image_scale))
> cv.Rectangle(img, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0)
> else:
> # print "face not found"
> pass
> return img
>
> def video_frame_ready(frame):
> global address, get_snapshot, frame_copy, ipl, show_face
> # print type(frame), #
> with screen_lock:
> # array pointer to array image
> # print type(address),
> # pointing to an array of size: 1228800 = 640*480*4 RGBA
> # take the pointer of the array image and put it to the
> # pykinect.nui.structs.PlanarImage
> frame.image.copy_bits(address)
> # print type(frame.image), #
> if get_snapshot:
> get_snapshot = False
> show_face = not show_face
> cv.SetData(ipl, ipl_mat.tostring())
> if show_face:
> frame_copy = detect_and_draw(ipl, cascade)
> if ipl.origin == cv.IPL_ORIGIN_TL:
> cv.Copy(ipl, frame_copy)
> else:
> cv.Flip(ipl, frame_copy, 0)
> cv.ShowImage("Kinect FaceDetect", frame_copy)
> else:
> cv.ShowImage("Kinect FaceDetect", ipl)
>
> if __name__ == '__main__':
> show_face = True
> cv.NamedWindow("Kinect FaceDetect")
> cascade = cv.Load('haarcascade_frontalface_alt.xml')
> min_size = (20, 20)
> image_scale = 3
> haar_scale = 1.2
> min_neighbors = 2
> haar_flags = 0
> frame_copy = cv.CreateImage(VIDEO_WINSIZE, cv.IPL_DEPTH_8U, 4)
> ipl = cv.CreateImage(VIDEO_WINSIZE, cv.IPL_DEPTH_8U, 4)
> ipl_mat = array.array('c')
> ipl_mat.fromstring(ipl.tostring())
> # print "ipl_mat.itemsize", ipl_mat.itemsize
> # print "ipl_mat.buffer_info()", ipl_mat.buffer_info()
> # print "ipl_mat.buffer_info()[1]*ipl_mat.itemsize",
> ipl_mat.buffer_info()[1]*ipl_mat.itemsize
> address = ipl_mat.buffer_info()[0]
> screen_lock = thread.allocate()
> kinect = nui.Runtime()
> kinect.video_frame_ready += video_frame_ready
> kinect.video_stream.open(nui.ImageStreamType.Video, 2,
> nui.ImageResolution.Resolution640x480, nui.ImageType.Color)
> print('Controls: ')
> print(' q ESC - Quit the view')
> print(' u - Increase elevation angle')
> print(' j - Decrease elevation angle')
> print(' x - set to angle == horizontal')
> print(' f - toggle facedetection')
> # main loop
> done = False
> while not done:
> k = cv.WaitKey(0)
> if k == 27: # ESC key
> done = True
> break
> elif k != -1:
> # print k
> if k == 117: # u
> kinect.camera.elevation_angle =
> kinect.camera.elevation_angle + 2
> elif k == 106: # j
> kinect.camera.elevation_angle =
> kinect.camera.elevation_angle - 2
> elif k == 120: # x
> kinect.camera.elevation_angle = 2
> elif k == 102: # f
> get_snapshot = True
> elif k == 113: # q
> done = True
> break
> cv.DestroyWindow("Kinect FaceDetect")
>
> This of course opens a world of possibilites with OpenCV alongside the
> Kinect camera. Wow!
>
> Read the full discussion online.
>
> To add a post to this discussion, reply to this email
> ([email removed])
>
> To start a new discussion for this project, email
> [email removed]
>
> You are receiving this email because you subscribed to this discussion on
> CodePlex. You can unsubscribe or change your settings on codePlex.com.
>
> Please note: Images and attachments will be removed from emails. Any posts
> to this discussion will also be available online at codeplex.com