Golang : Handling image beyond OpenCV video capture boundary

One of the problems that I was unable to solve at the previous tutorial on how to put UTF-8 characters on OpenCV image frames is how to deal with the crashing problem caused by openCV's SetROI and Copy functions whenever the UTF-8 image breach the camera's dimension or visual range.
To solve this problem, the rectangle size of the image or let's call it label has to be dynamically resized so that it does not overlap the boundary. The code example below has two solutions.
The first solution is to resize the label dynamically by performing a calculation on the label size needed for each iteration in order not to cause the program to crash. This solution is implemented for X and Y beyond camera's width and height
The second solution is to .... simply don't display the label whenever the label touches the boundary. This second solution is implemented for X and Y less than 0. That's it, the label's position is in beyond the top-left and top-right corner.
Use the method that you find easiest to implement.
To understand how these solutions work, run the code below, test it out by adjusting the sliders.
The PNG image use by the code below can be downloaded at https://d1ohg4ss876yi2.cloudfront.net/golang-convert-png-transparent-background-image-to-jpg-or-jpeg-image/PNG-file.png
Here you go!
 package main

 import (
  "fmt"
  "image"
  "image/color"
  "image/draw"
  "image/jpeg"
  "math"
  "os"
  "runtime"
  "strconv"
  "time"

  "github.com/lazywei/go-opencv/opencv"
  "github.com/mattn/go-gtk/glib"
  "github.com/mattn/go-gtk/gtk"
 )

 var (
  win = new(opencv.Window)
  webCamera = new(opencv.Capture)
  statusbar = new(gtk.Statusbar)
  snapshotFileName string
  cameraWidth, cameraHeight int
  sliderPosX int = 100
  sliderPosY int = 100
  penetrationWidth int // to handle beyond cameraWidth
  penetrationHeight int
  distanceWidth int
  distanceHeight int
  stopCamera = false // to prevent segmentation fault
  backgroundWidth = 267 // from PNG file dimension, change to your own image file dimension
  backgroundHeight = 394
  elasticWidth = backgroundWidth
  elasticHeight = backgroundHeight
  horizontalScale = float32(1.0)
  verticalScale = float32(1.0)
  shear = float32(1.0)
  thickness = 3
  lineType = 8
  textFont = opencv.InitFont(opencv.CV_FONT_HERSHEY_SIMPLEX, horizontalScale, verticalScale, shear, thickness, lineType)
  IplImgFrame, utf8TextImg  *opencv.IplImage
  redColor = opencv.NewScalar(0, 0, 255, 0) // red - (blue, green, red, alpha)
  cyanColor = opencv.NewScalar(255, 255, 0, 0) // cyan - (blue, green, red, alpha)
  red = color.RGBA{255, 0, 0, 255}
  blue = color.RGBA{0, 0, 255, 255}
  white = color.RGBA{255, 255, 255, 255}
  black = color.RGBA{0, 0, 0, 255}
  background *image.RGBA
  // more color at https://github.com/golang/image/blob/master/colornames/table.go
 )

 func opencvImageBGRToBGRA(img *opencv.IplImage) opencv.IplImage {
  // The image frames from camera is in RGB (3 channels )
  // We need to convert the frames to RGBA (4 channels )
  // so that we can perform copy and paste the UTF8 strings
  // into the region of interest.
  // Using the ToImage() function will work, but will cause delay in refresh rate.
  // Use CvtColor() function for the best result

  w := img.Width()
  h := img.Height()

  // create a IplImage with 4 channels
  tmp := opencv.CreateImage(w, h, opencv.IPL_DEPTH_8U, 4)

  // upgrade BGR to BGRA ( 3 to 4 channels)
  opencv.CvtColor(img, tmp, opencv.CV_BGR2BGRA)
  return *tmp

 }

 func BGRAToBGR(img *opencv.IplImage) opencv.IplImage {

  w := img.Width()
  h := img.Height()

  // create a IplImage with 3 channels
  tmp := opencv.CreateImage(w, h, opencv.IPL_DEPTH_8U, 3)

  // downgrade BGRA to BGR ( 4 to 3 channels)
  opencv.CvtColor(img, tmp, 1)
  // why use integer value of 1?
  // see http://docs.opencv.org/3.1.0/df/d4e/group__imgproc__c.html
  return *tmp

 }

 func processFrameAndUpdate() {

  // convert background from image.Image type to opencv.IplImage
  utf8TextImg = opencv.FromImage(background)
  var utf8TextImgCropped = utf8TextImg

  for {
 if !stopCamera {
 if webCamera.GrabFrame() {
 IplImgFrame = webCamera.RetrieveFrame(1)

 if IplImgFrame != nil {

 *IplImgFrame = opencvImageBGRToBGRA(IplImgFrame)

 currentTime := time.Now().Local().Format("2006-01-02 15:04:05 +0800")

 // set ROI(Region Of Interest) in IplImageFrame
 // and paste our UTF8 runes into ROI via Copy
 // need extra care here to check if our rectangle is beyond the boundary
 rectTopLeftPosition := strconv.Itoa(sliderPosX) + "," + strconv.Itoa(sliderPosY)
 rectTopRightPosition := strconv.Itoa(sliderPosX+elasticWidth) + "," + strconv.Itoa(sliderPosY)
 rectBottomLeftPosition := strconv.Itoa(sliderPosX) + "," + strconv.Itoa(sliderPosY+elasticHeight)
 rectBottomRightPosition := strconv.Itoa(sliderPosX+elasticWidth) + "," + strconv.Itoa(sliderPosY+elasticHeight)

 fmt.Println("Top left : ", rectTopLeftPosition)
 fmt.Println("Top right : ", rectTopRightPosition)
 fmt.Println("Bottom left : ", rectBottomLeftPosition)
 fmt.Println("Bottom right : ", rectBottomRightPosition)

 //rect := opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)
 rect := opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, backgroundHeight)

 fmt.Println("Rect beginning : ", rect)

 // ---------------------------------------------------------------
 // handle the X-axis and Y-axis crossing the camera width and height

 if ((sliderPosX + elasticWidth) > cameraWidth) || ((sliderPosY + elasticHeight) > cameraHeight) {

 // calculate the penetration distance of utf8TextImg beyond the camera Width
 penetrationWidth = (sliderPosX + elasticWidth) - cameraWidth
 elasticWidth = elasticWidth - penetrationWidth

 // calculate the penetration distance of utf8TextImg beyond the camera Height
 penetrationHeight = (sliderPosY + elasticHeight) - cameraHeight
 elasticHeight = elasticHeight - penetrationHeight

 fmt.Println("ElasticHeight : ", elasticHeight)
 fmt.Println("BackgroundHeight : ", backgroundHeight)

 // need to crop utf8TextImg as well, otherwise, the Copy() function below
 // will crash the program
 if (elasticWidth > 0) || (elasticHeight > 0) {
 // cap elasticWidth to backgroundWidth
 if elasticWidth > backgroundWidth {
 elasticWidth = backgroundWidth
 }

 // cap elasticWidth to backgroundWidth
 if elasticHeight > backgroundHeight {
 elasticHeight = backgroundHeight
 }

 rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, elasticHeight)
 }

 if elasticWidth == 0 {
 rect = opencv.NewRect(sliderPosX-1, sliderPosY, 1, backgroundHeight)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, 1, backgroundHeight)
 }

 if elasticHeight == 0 {
 rect = opencv.NewRect(sliderPosX, sliderPosY-1, backgroundWidth, 1)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, backgroundWidth, 1)
 }

 }

 if ((sliderPosX + elasticWidth) < cameraWidth) && (elasticWidth < backgroundWidth) {

 // calculate the distance of sliderPosX to camera Width

 distanceWidth = sliderPosX - cameraWidth

 if distanceWidth <= 0 {
 distanceWidth = int(math.Abs(float64(distanceWidth)))
 }

 // cap to maximum size of backgroundWidth
 if distanceWidth > backgroundWidth {
 distanceWidth = backgroundWidth
 }

 compensate := math.Abs(float64(distanceWidth - elasticWidth))

 //fmt.Println("Distance from camera width : ", distanceWidth)
 //fmt.Println("Compensate back : ", compensate)
 //fmt.Println("Elastic width plus compensate  : ", elasticWidth+int(compensate))

 elasticWidth = elasticWidth + int(compensate)

 if elasticWidth > 0 {
 rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, backgroundHeight)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, backgroundHeight)
 }

 if elasticWidth == 0 {
 rect = opencv.NewRect(sliderPosX-1, sliderPosY, 1, backgroundHeight)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, 1, backgroundHeight)
 }

 }

 if ((sliderPosY + elasticHeight) < cameraHeight) && (elasticHeight < backgroundHeight) {

 // calculate the distance of sliderPosY to camera Height

 distanceHeight = sliderPosY - cameraHeight

 if distanceHeight <= 0 {
 distanceHeight = int(math.Abs(float64(distanceHeight)))
 }

 // cap to maximum size of backgroundHeight
 if distanceHeight > backgroundHeight {
 distanceHeight = backgroundHeight
 }

 compensate := math.Abs(float64(distanceHeight - elasticHeight))

 //fmt.Println("Distance from camera height : ", distanceHeight)
 //fmt.Println("Compensate back : ", compensate)
 //fmt.Println("Elastic height plus compensate  : ", elasticHeight+int(compensate))

 elasticHeight = elasticHeight + int(compensate)

 if elasticHeight > 0 {
 rect = opencv.NewRect(sliderPosX, sliderPosY, backgroundWidth, elasticHeight)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, backgroundWidth, elasticHeight)
 }

 if elasticHeight == 0 {
 rect = opencv.NewRect(sliderPosX, sliderPosY-1, backgroundWidth, 1)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, backgroundWidth, 1)
 }

 }

 if (elasticWidth > 0) && (elasticHeight > 0) {

 if ((sliderPosX + backgroundWidth) > cameraWidth) && ((sliderPosY + backgroundHeight) > cameraHeight) {
 // we are at bottom right corner
 fmt.Println("TODO : Handle X and Y beyond boundary together")

 // calculate the distance of sliderPosY to camera Height

 distanceHeight = sliderPosY - cameraHeight

 if distanceHeight <= 0 {
 distanceHeight = int(math.Abs(float64(distanceHeight)))
 }

 // cap to maximum size of backgroundHeight
 if distanceHeight > backgroundHeight {
 distanceHeight = backgroundHeight
 }

 compensate := math.Abs(float64(distanceHeight - elasticHeight))

 elasticHeight = elasticHeight + int(compensate)

 // calculate the distance of sliderPosX to camera Width

 distanceWidth = sliderPosX - cameraWidth

 if distanceWidth <= 0 {
 distanceWidth = int(math.Abs(float64(distanceWidth)))
 }

 // cap to maximum size of backgroundWidth
 if distanceWidth > backgroundWidth {
 distanceWidth = backgroundWidth
 }

 compensate = math.Abs(float64(distanceWidth - elasticWidth))

 elasticWidth = elasticWidth + int(compensate)

 rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, elasticHeight)

 IplImgFrame.SetROI(rect)

 opencv.Copy(utf8TextImgCropped, IplImgFrame, nil)
 IplImgFrame.ResetROI() // don't forget this!

 opencv.Rectangle(IplImgFrame,
 opencv.Point{sliderPosX + elasticWidth, sliderPosY},
 opencv.Point{sliderPosX, sliderPosY + backgroundHeight},
 opencv.ScalarAll(0.0), 2, 2, 0)

 } else {

 fmt.Println("Elastic width : ", elasticWidth)
 fmt.Println("Elastic height : ", elasticHeight)

 rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)

 // the easiest solution is ... not to show if the rectangle is beyond
 // the camera boundary or visual range :P
 // such as entering the negative zones. X and Y less than 0

 if !((sliderPosX < 0) || (sliderPosY < 0)) {

 IplImgFrame.SetROI(rect)
 fmt.Println(utf8TextImgCropped.Width())
 fmt.Println(utf8TextImgCropped.Height())
 utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, elasticHeight)

 opencv.Copy(utf8TextImgCropped, IplImgFrame, nil)
 IplImgFrame.ResetROI() // don't forget this!
 }

 opencv.Rectangle(IplImgFrame,
 opencv.Point{sliderPosX + elasticWidth, sliderPosY},
 opencv.Point{sliderPosX, sliderPosY + backgroundHeight},
 opencv.ScalarAll(0.0), 2, 2, 0)

 }

 }

 textFont.PutText(IplImgFrame, currentTime, opencv.Point{sliderPosX, sliderPosY + int(verticalScale*200.0)}, cyanColor)

 win.ShowImage(IplImgFrame)

 }
 }
 }
  }

 }

 func main() {

  cores := runtime.NumCPU()

  fmt.Printf("This machine has %d CPU cores. Using all cores. \n", cores)

  // maximize CPU usage for maximum performance
  runtime.GOMAXPROCS(cores)

  jpegImageFile, err := os.Open("./JPEG-file.jpg")

  if err != nil {
 fmt.Println("JPEG-file.jpg file not found!")
 os.Exit(1)
  }

  defer jpegImageFile.Close()

  // create image from JPEG file
  //imgSource, err := jpeg.Decode(jpegImageFile)
  imgSource, err := jpeg.Decode(jpegImageFile)

  if err != nil {
 fmt.Println(err)
 os.Exit(1)
  }

  // create a new Image with the same dimension of PNG image
  //background = image.NewRGBA(imgSource.Bounds())

  // for this example, we harcode the width and height,
  // change the dimension to suite your image file

  background = image.NewRGBA(image.Rect(0, 0, backgroundWidth, backgroundHeight))
  // paste JPEG image OVER to newImage
  draw.Draw(background, background.Bounds(), imgSource, imgSource.Bounds().Min, draw.Over)

  // a new OpenCV window
  win = opencv.NewWindow("Handle image beyond OpenCV video capture boundary")
  defer win.Destroy()

  // activate webCamera
  webCamera = opencv.NewCameraCapture(opencv.CV_CAP_ANY) // autodetect

  if webCamera == nil {
 panic("Unable to open camera")
  }

  defer webCamera.Release()

  // get some data from camera
  cameraWidth = int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_WIDTH))
  cameraHeight = int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_HEIGHT))

  fmt.Println("Camera width : ", cameraWidth)
  fmt.Println("Camera height : ", cameraHeight)

  // open up a new "pure" OpenCV window first
  go processFrameAndUpdate() // goroutine to update feed from camera

  // then our "floating" GTK toolbar
  gtk.Init(nil)
  window := gtk.NewWindow(gtk.WINDOW_TOPLEVEL)

  window.SetPosition(gtk.WIN_POS_CENTER)
  window.SetTitle("Example of writing UTF8 text on Go-OpenCV video capture!")
  window.SetIconName("gtk-dialog-info")
  window.Connect("destroy", func(ctx *glib.CallbackContext) {
 println("got destroy!", ctx.Data().(string))
 gtk.MainQuit()
  }, "Happy coding!")

  vbox := gtk.NewVBox(false, 1)

  //--------------------------------------------------------
  // GtkVPaned
  //--------------------------------------------------------
  vpaned := gtk.NewVPaned()
  vbox.Add(vpaned)

  //--------------------------------------------------------
  // GtkFrame
  //--------------------------------------------------------

  frame1 := gtk.NewFrame("Adjust X & Y co-ordinates to place the text location :")
  framebox1 := gtk.NewVBox(false, 1)
  frame1.Add(framebox1)

  //--------------------------------------------------------
  // GtkScale
  //--------------------------------------------------------
  scaleXHBox := gtk.NewHBox(false, 1)

  scaleX := gtk.NewHScaleWithRange(-100, float64(cameraWidth), 1)
  scaleX.SetValue(float64(sliderPosX))
  scaleX.Connect("value-changed", func() {
 //println("scale:", int(scale.GetValue()))
 sliderPosX = int(scaleX.GetValue())
 statusbar.Push(statusbar.GetContextId("go-gtk"), "X : "+strconv.Itoa(sliderPosX)+" Y : "+strconv.Itoa(sliderPosY))
  })
  scaleXHBox.Add(scaleX)
  framebox1.PackStart(scaleXHBox, false, false, 0)

  scaleYHBox := gtk.NewHBox(false, 1)

  scaleY := gtk.NewHScaleWithRange(-100, float64(cameraHeight), 1)
  scaleY.SetValue(float64(sliderPosY))
  scaleY.Connect("value-changed", func() {
 //println("scale:", int(scale.GetValue()))
 sliderPosY = int(scaleY.GetValue())
 statusbar.Push(statusbar.GetContextId("go-gtk"), "X : "+strconv.Itoa(sliderPosX)+" Y : "+strconv.Itoa(sliderPosY))
  })
  scaleYHBox.Add(scaleY)
  framebox1.PackStart(scaleYHBox, false, false, 0)

  vpaned.Pack1(frame1, false, false)

  //--------------------------------------------------------
  // GtkHBox
  //--------------------------------------------------------
  buttons := gtk.NewHBox(false, 1)

  //--------------------------------------------------------
  // GtkButton
  //--------------------------------------------------------

  quitButton := gtk.NewButtonWithLabel("Quit")
  quitButton.Clicked(func() {
 stopCamera = true
 // if use defer above, don't release here
 // if release here, don't use defer
 // otherwise will cause segmentation fault

 // --- webCamera.Release() // don't forget to release !!
 gtk.MainQuit()
  })

  buttons.Add(quitButton)
  framebox1.PackStart(buttons, false, false, 0)

  //--------------------------------------------------------
  // GtkVSeparator
  //--------------------------------------------------------
  vsep := gtk.NewVSeparator()
  framebox1.PackStart(vsep, false, false, 0)

  statusbar = gtk.NewStatusbar()
  //context_id := statusbar.GetContextId("go-gtk")

  //--------------------------------------------------------
  // GtkStatusbar
  //--------------------------------------------------------
  framebox1.PackStart(statusbar, false, false, 0)

  //--------------------------------------------------------
  // Event
  //--------------------------------------------------------
  window.Add(vbox)
  window.SetSizeRequest(600, 128)
  window.ShowAll()

  gtk.Main()

 }
References:
https://socketloop.com/tutorials/golang-put-utf8-text-on-opencv-video-capture-image-frame
https://www.socketloop.com/tutorials/golang-convert-png-transparent-background-image-to-jpg-or-jpeg-image
Golang : Handling image beyond OpenCV video capture boundary

See also : Golang : Print UTF-8 fonts on image example

By Adam Ng

Advertisement

Tutorials