Monday, April 24, 2023

GPS sensor interface with ESP8266 using Blynk IoT cloud

 




 Circuit diagram:


Source Code:
#include <TinyGPS++.h>
#include <SoftwareSerial.h>
#define BLYNK_PRINT Serial
#include <ESP8266WiFi.h>
#include <BlynkSimpleEsp8266.h>

static const int RXPin = 4, TXPin = 5;   // GPIO 4=D2(conneect Tx of GPS) and GPIO 5=D1(Connect Rx of GPS
static const uint32_t GPSBaud = 9600; //if Baud rate 9600 didn't work in your case then use 4800

TinyGPSPlus gps; // The TinyGPS++ object
WidgetMap myMap(V0);  // V0 for virtual pin of Map Widget

SoftwareSerial ss(RXPin, TXPin);  // The serial connection to the GPS device

BlynkTimer timer;

float spd;       //Variable  to store the speed
float sats;      //Variable to store no. of satellites response
String bearing;  //Variable to store orientation or direction of GPS

char auth[] = "**************";              //Your Project authentication key
char ssid[] = "*****";                                       // Name of your network (HotSpot or Router name)
char pass[] = "*******";                                      // Corresponding Password

//unsigned int move_index;         // moving index, to be used later
unsigned int move_index = 1;       // fixed location for now
  

void setup()
{
  Serial.begin(115200);
  Serial.println();
  ss.begin(GPSBaud);
  Blynk.begin(auth, ssid, pass, "blynk.cloud", 80);
  timer.setInterval(5000L, checkGPS); // every 5s check if GPS is connected, only really needs to be done once
}

void checkGPS(){
  if (gps.charsProcessed() < 10)
  {
    Serial.println(F("No GPS detected: check wiring."));
      Blynk.virtualWrite(V4, "GPS ERROR");  // Value Display widget  on V4 if GPS not detected
  }
}

void loop()
{
    while (ss.available() > 0) 
    {
      // sketch displays information every time a new sentence is correctly encoded.
      if (gps.encode(ss.read()))
        displayInfo();
  }
  Blynk.run();
  timer.run();
}

void displayInfo()
  if (gps.location.isValid() ) 
  {    
    float latitude = (gps.location.lat());     //Storing the Lat. and Lon. 
    float longitude = (gps.location.lng()); 
    
    Serial.print("LAT:  ");
    Serial.println(latitude, 6);  // float to x decimal places
    Serial.print("LONG: ");
    Serial.println(longitude, 6);
    Blynk.virtualWrite(V1, String(latitude, 6));   
    Blynk.virtualWrite(V2, String(longitude, 6));  
    myMap.location(move_index, latitude, longitude, "GPS_Location");
    spd = gps.speed.kmph();               //get speed
       Blynk.virtualWrite(V3, spd);
       Serial.print("Speed: ");
       Serial.println(spd);
       sats = gps.satellites.value();    //get number of satellites
       Blynk.virtualWrite(V4, sats);

       bearing = TinyGPSPlus::cardinal(gps.course.value()); // get the direction
       Blynk.virtualWrite(V5, bearing);                   
  }
  
 Serial.println();
}



Saturday, April 8, 2023

Virtual Mouse using hand gesture





The concept of a virtual mouse using hand gestures is to control the movement of the mouse cursor on a computer screen without using a physical mouse. Instead, the user can use hand gestures captured by a camera or sensor to move the cursor and perform actions like clicking and scrolling.

The basic idea is to detect the position of the hand in the camera frame and track the movement of the hand to move the mouse cursor accordingly. Hand gestures can be recognized by analyzing the movement and position of the fingers and the palm.

Various computer vision techniques and machine learning algorithms can be used to implement this concept. OpenCV and Mediapipe are two popular libraries that provide hand tracking and gesture recognition functionality. These libraries use deep learning models to detect and track the position of the hand and fingers in real-time video frames.

Once the position of the hand is tracked, the movement of the hand can be mapped to the movement of the mouse cursor on the computer screen. The position of the fingers can be used to simulate mouse clicks, right-clicks, and scrolling actions.

The virtual mouse using hand gestures is a convenient and intuitive way to control the computer without the need for a physical mouse. It can be particularly useful for people with disabilities or those who find it difficult to use a traditional mouse. Additionally, it can be used in situations where a physical mouse is not available, such as in a presentation or a remote desktop environment.

Moving cursor : Middle and index are opened. Remaining finger closed. 

Double click : Joining middle and index finger. 

Right click: folding index finger alone and middle finger unfold. 

Left click: folding middle finger and index finger unfold. 

Dragging folder : closing all the fingers 


To create a virtual mouse using hand gesture recognition with OpenCV and 

Mediapipe, you can follow these steps:

Install OpenCV and Mediapipe libraries.


Source Code:
# Imports

import cv2
import mediapipe as mp
import pyautogui
import math
from enum import IntEnum
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
from google.protobuf.json_format import MessageToDict
import screen_brightness_control as sbcontrol

pyautogui.FAILSAFE = False
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Gesture Encodings 
class Gest(IntEnum):
    # Binary Encoded
    FIST = 0
    PINKY = 1
    RING = 2
    MID = 4
    LAST3 = 7
    INDEX = 8
    FIRST2 = 12
    LAST4 = 15
    THUMB = 16    
    PALM = 31
    
    # Extra Mappings
    V_GEST = 33
    TWO_FINGER_CLOSED = 34
    PINCH_MAJOR = 35
    PINCH_MINOR = 36

# Multi-handedness Labels
class HLabel(IntEnum):
    MINOR = 0
    MAJOR = 1

# Convert Mediapipe Landmarks to recognizable Gestures
class HandRecog:
    
    def __init__(self, hand_label):
        self.finger = 0
        self.ori_gesture = Gest.PALM
        self.prev_gesture = Gest.PALM
        self.frame_count = 0
        self.hand_result = None
        self.hand_label = hand_label
    
    def update_hand_result(self, hand_result):
        self.hand_result = hand_result

    def get_signed_dist(self, point):
        sign = -1
        if self.hand_result.landmark[point[0]].y < self.hand_result.landmark[point[1]].y:
            sign = 1
        dist = (self.hand_result.landmark[point[0]].x - self.hand_result.landmark[point[1]].x)**2
        dist += (self.hand_result.landmark[point[0]].y - self.hand_result.landmark[point[1]].y)**2
        dist = math.sqrt(dist)
        return dist*sign
    
    def get_dist(self, point):
        dist = (self.hand_result.landmark[point[0]].x - self.hand_result.landmark[point[1]].x)**2
        dist += (self.hand_result.landmark[point[0]].y - self.hand_result.landmark[point[1]].y)**2
        dist = math.sqrt(dist)
        return dist
    
    def get_dz(self,point):
        return abs(self.hand_result.landmark[point[0]].z - self.hand_result.landmark[point[1]].z)
    
    # Function to find Gesture Encoding using current finger_state.
    # Finger_state: 1 if finger is open, else 0
    def set_finger_state(self):
        if self.hand_result == None:
            return

        points = [[8,5,0],[12,9,0],[16,13,0],[20,17,0]]
        self.finger = 0
        self.finger = self.finger | 0 #thumb
        for idx,point in enumerate(points):
            
            dist = self.get_signed_dist(point[:2])
            dist2 = self.get_signed_dist(point[1:])
            
            try:
                ratio = round(dist/dist2,1)
            except:
                ratio = round(dist1/0.01,1)

            self.finger = self.finger << 1
            if ratio > 0.5 :
                self.finger = self.finger | 1
    

    # Handling Fluctations due to noise
    def get_gesture(self):
        if self.hand_result == None:
            return Gest.PALM

        current_gesture = Gest.PALM
        if self.finger in [Gest.LAST3,Gest.LAST4] and self.get_dist([8,4]) < 0.05:
            if self.hand_label == HLabel.MINOR :
                current_gesture = Gest.PINCH_MINOR
            else:
                current_gesture = Gest.PINCH_MAJOR

        elif Gest.FIRST2 == self.finger :
            point = [[8,12],[5,9]]
            dist1 = self.get_dist(point[0])
            dist2 = self.get_dist(point[1])
            ratio = dist1/dist2
            if ratio > 1.7:
                current_gesture = Gest.V_GEST
            else:
                if self.get_dz([8,12]) < 0.1:
                    current_gesture =  Gest.TWO_FINGER_CLOSED
                else:
                    current_gesture =  Gest.MID
            
        else:
            current_gesture =  self.finger
        
        if current_gesture == self.prev_gesture:
            self.frame_count += 1
        else:
            self.frame_count = 0

        self.prev_gesture = current_gesture

        if self.frame_count > 4 :
            self.ori_gesture = current_gesture
        return self.ori_gesture

# Executes commands according to detected gestures
class Controller:
    tx_old = 0
    ty_old = 0
    trial = True
    flag = False
    grabflag = False
    pinchmajorflag = False
    pinchminorflag = False
    pinchstartxcoord = None
    pinchstartycoord = None
    pinchdirectionflag = None
    prevpinchlv = 0
    pinchlv = 0
    framecount = 0
    prev_hand = None
    pinch_threshold = 0.3
    
    def getpinchylv(hand_result):
        dist = round((Controller.pinchstartycoord - hand_result.landmark[8].y)*10,1)
        return dist

    def getpinchxlv(hand_result):
        dist = round((hand_result.landmark[8].x - Controller.pinchstartxcoord)*10,1)
        return dist
    
    def changesystembrightness():
        currentBrightnessLv = sbcontrol.get_brightness()/100.0
        currentBrightnessLv += Controller.pinchlv/50.0
        if currentBrightnessLv > 1.0:
            currentBrightnessLv = 1.0
        elif currentBrightnessLv < 0.0:
            currentBrightnessLv = 0.0       
        sbcontrol.fade_brightness(int(100*currentBrightnessLv) , start = sbcontrol.get_brightness())
    
    def changesystemvolume():
        devices = AudioUtilities.GetSpeakers()
        interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
        volume = cast(interface, POINTER(IAudioEndpointVolume))
        currentVolumeLv = volume.GetMasterVolumeLevelScalar()
        currentVolumeLv += Controller.pinchlv/50.0
        if currentVolumeLv > 1.0:
            currentVolumeLv = 1.0
        elif currentVolumeLv < 0.0:
            currentVolumeLv = 0.0
        volume.SetMasterVolumeLevelScalar(currentVolumeLv, None)
    
    def scrollVertical():
        pyautogui.scroll(120 if Controller.pinchlv>0.0 else -120)
        
    
    def scrollHorizontal():
        pyautogui.keyDown('shift')
        pyautogui.keyDown('ctrl')
        pyautogui.scroll(-120 if Controller.pinchlv>0.0 else 120)
        pyautogui.keyUp('ctrl')
        pyautogui.keyUp('shift')

    # Locate Hand to get Cursor Position
    # Stabilize cursor by Dampening
    def get_position(hand_result):
        point = 9
        position = [hand_result.landmark[point].x ,hand_result.landmark[point].y]
        sx,sy = pyautogui.size()
        x_old,y_old = pyautogui.position()
        x = int(position[0]*sx)
        y = int(position[1]*sy)
        if Controller.prev_hand is None:
            Controller.prev_hand = x,y
        delta_x = x - Controller.prev_hand[0]
        delta_y = y - Controller.prev_hand[1]

        distsq = delta_x**2 + delta_y**2
        ratio = 1
        Controller.prev_hand = [x,y]

        if distsq <= 25:
            ratio = 0
        elif distsq <= 900:
            ratio = 0.07 * (distsq ** (1/2))
        else:
            ratio = 2.1
        x , y = x_old + delta_x*ratio , y_old + delta_y*ratio
        return (x,y)

    def pinch_control_init(hand_result):
        Controller.pinchstartxcoord = hand_result.landmark[8].x
        Controller.pinchstartycoord = hand_result.landmark[8].y
        Controller.pinchlv = 0
        Controller.prevpinchlv = 0
        Controller.framecount = 0

    # Hold final position for 5 frames to change status
    def pinch_control(hand_result, controlHorizontal, controlVertical):
        if Controller.framecount == 5:
            Controller.framecount = 0
            Controller.pinchlv = Controller.prevpinchlv

            if Controller.pinchdirectionflag == True:
                controlHorizontal() #x

            elif Controller.pinchdirectionflag == False:
                controlVertical() #y

        lvx =  Controller.getpinchxlv(hand_result)
        lvy =  Controller.getpinchylv(hand_result)
            
        if abs(lvy) > abs(lvx) and abs(lvy) > Controller.pinch_threshold:
            Controller.pinchdirectionflag = False
            if abs(Controller.prevpinchlv - lvy) < Controller.pinch_threshold:
                Controller.framecount += 1
            else:
                Controller.prevpinchlv = lvy
                Controller.framecount = 0

        elif abs(lvx) > Controller.pinch_threshold:
            Controller.pinchdirectionflag = True
            if abs(Controller.prevpinchlv - lvx) < Controller.pinch_threshold:
                Controller.framecount += 1
            else:
                Controller.prevpinchlv = lvx
                Controller.framecount = 0

    def handle_controls(gesture, hand_result):        
        x,y = None,None
        if gesture != Gest.PALM :
            x,y = Controller.get_position(hand_result)
        
        # flag reset
        if gesture != Gest.FIST and Controller.grabflag:
            Controller.grabflag = False
            pyautogui.mouseUp(button = "left")

        if gesture != Gest.PINCH_MAJOR and Controller.pinchmajorflag:
            Controller.pinchmajorflag = False

        if gesture != Gest.PINCH_MINOR and Controller.pinchminorflag:
            Controller.pinchminorflag = False

        # implementation
        if gesture == Gest.V_GEST:
            Controller.flag = True
            pyautogui.moveTo(x, y, duration = 0.1)

        elif gesture == Gest.FIST:
            if not Controller.grabflag : 
                Controller.grabflag = True
                pyautogui.mouseDown(button = "left")
            pyautogui.moveTo(x, y, duration = 0.1)

        elif gesture == Gest.MID and Controller.flag:
            pyautogui.click()
            Controller.flag = False

        elif gesture == Gest.INDEX and Controller.flag:
            pyautogui.click(button='right')
            Controller.flag = False

        elif gesture == Gest.TWO_FINGER_CLOSED and Controller.flag:
            pyautogui.doubleClick()
            Controller.flag = False

        elif gesture == Gest.PINCH_MINOR:
            if Controller.pinchminorflag == False:
                Controller.pinch_control_init(hand_result)
                Controller.pinchminorflag = True
            Controller.pinch_control(hand_result,Controller.scrollHorizontal, Controller.scrollVertical)
        
        elif gesture == Gest.PINCH_MAJOR:
            if Controller.pinchmajorflag == False:
                Controller.pinch_control_init(hand_result)
                Controller.pinchmajorflag = True
            Controller.pinch_control(hand_result,Controller.changesystembrightness, Controller.changesystemvolume)
        
'''
----------------------------------------  Main Class  ----------------------------------------
    Entry point of Gesture Controller
'''


class GestureController:
    gc_mode = 0
    cap = None
    CAM_HEIGHT = None
    CAM_WIDTH = None
    hr_major = None # Right Hand by default
    hr_minor = None # Left hand by default
    dom_hand = True

    def __init__(self):
        GestureController.gc_mode = 1
        GestureController.cap = cv2.VideoCapture(0)
        GestureController.CAM_HEIGHT = GestureController.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        GestureController.CAM_WIDTH = GestureController.cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    
    def classify_hands(results):
        left , right = None,None
        try:
            handedness_dict = MessageToDict(results.multi_handedness[0])
            if handedness_dict['classification'][0]['label'] == 'Right':
                right = results.multi_hand_landmarks[0]
            else :
                left = results.multi_hand_landmarks[0]
        except:
            pass

        try:
            handedness_dict = MessageToDict(results.multi_handedness[1])
            if handedness_dict['classification'][0]['label'] == 'Right':
                right = results.multi_hand_landmarks[1]
            else :
                left = results.multi_hand_landmarks[1]
        except:
            pass
        
        if GestureController.dom_hand == True:
            GestureController.hr_major = right
            GestureController.hr_minor = left
        else :
            GestureController.hr_major = left
            GestureController.hr_minor = right

    def start(self):
        
        handmajor = HandRecog(HLabel.MAJOR)
        handminor = HandRecog(HLabel.MINOR)

        with mp_hands.Hands(max_num_hands = 2,min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
            while GestureController.cap.isOpened() and GestureController.gc_mode:
                success, image = GestureController.cap.read()

                if not success:
                    print("Ignoring empty camera frame.")
                    continue
                
                image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
                image.flags.writeable = False
                results = hands.process(image)
                
                image.flags.writeable = True
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                if results.multi_hand_landmarks:                   
                    GestureController.classify_hands(results)
                    handmajor.update_hand_result(GestureController.hr_major)
                    handminor.update_hand_result(GestureController.hr_minor)

                    handmajor.set_finger_state()
                    handminor.set_finger_state()
                    gest_name = handminor.get_gesture()

                    if gest_name == Gest.PINCH_MINOR:
                        Controller.handle_controls(gest_name, handminor.hand_result)
                    else:
                        gest_name = handmajor.get_gesture()
                        Controller.handle_controls(gest_name, handmajor.hand_result)
                    
                    for hand_landmarks in results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                else:
                    Controller.prev_hand = None
                cv2.imshow('Gesture Controller', image)
                if cv2.waitKey(5) & 0xFF == 13:
                    break
        GestureController.cap.release()
        cv2.destroyAllWindows()

# uncomment to run directly
gc1 = GestureController()
gc1.start()

Virtual keyboard using openCV ptyhon

 


A virtual keyboard using OpenCV is a graphical interface that allows users to type without the need for a physical keyboard. It provides a visual representation of a keyboard on a screen, and users can select keys using a pointer or by using their fingers in front of a camera.

The concept of a virtual keyboard using OpenCV involves using a computer's webcam to capture live video of the user's hand gestures. The captured video is then processed using the OpenCV library to detect the user's hand landmarks and track their movement.

Once the hand landmarks are detected, the position of the user's fingers can be used to identify the key on the virtual keyboard that the user wants to select. The selected key can then be sent to the computer's operating system as if it was typed on a physical keyboard.

To create a virtual keyboard using OpenCV, the following steps are typically involved:

Capture video from the computer's webcam
Preprocess the video to remove noise and improve image quality
Detect hand landmarks using OpenCV and MediaPipe library
Track the movement of the user's fingers
Map the position of the fingers to the keys on the virtual keyboard
Send the selected key to the operating system as keyboard input
Overall, a virtual keyboard using OpenCV can provide a convenient and accessible way for users to type without the need for a physical keyboard. It can be useful in situations where a physical keyboard is not available or when users have difficulty using a physical keyboard due to disability or injury.


To create a virtual keyboard using OpenCV, Python, and MediaPipe, you can follow these general steps:

Install the necessary libraries:

OpenCV: pip install opencv-python
MediaPipe: pip install mediapipe
Import the necessary libraries:

Source Code:

virtual keyboard.py

import cv2
import numpy as np 
import time
from keys import *
from handTracker import *
from pynput.keyboard import Controller

def getMousPos(event , x, y, flags, param):
    global clickedX, clickedY
    global mouseX, mouseY
    if event == cv2.EVENT_LBUTTONUP:
        #print(x,y)
        clickedX, clickedY = x, y
    if event == cv2.EVENT_MOUSEMOVE:
    #     print(x,y)
        mouseX, mouseY = x, y

def calculateIntDidtance(pt1, pt2):
    return int(((pt1[0]-pt2[0])**2 + (pt1[1]-pt2[1])**2)**0.5)

# Creating keys
w,h = 80, 60
startX, startY = 40, 200
keys=[]
letters =list("QWERTYUIOPASDFGHJKLZXCVBNM")
for i,l in enumerate(letters):
    if i<10:
        keys.append(Key(startX + i*w + i*5, startY, w, h, l))
    elif i<19:
        keys.append(Key(startX + (i-10)*w + i*5, startY + h + 5,w,h,l))  
    else:
        keys.append(Key(startX + (i-19)*w + i*5, startY + 2*h + 10, w, h, l)) 

keys.append(Key(startX+25, startY+3*h+15, 5*w, h, "Space"))
keys.append(Key(startX+8*w + 50, startY+2*h+10, w, h, "clr"))
keys.append(Key(startX+5*w+30, startY+3*h+15, 5*w, h, "<--"))

showKey = Key(300,5,80,50, 'Show')
exitKey = Key(300,65,80,50, 'Exit')
textBox = Key(startX, startY-h-5, 10*w+9*5, h,'')

cap = cv2.VideoCapture(0)
ptime = 0

# initiating the hand tracker
tracker = HandTracker(detectionCon=0.8)

# getting frame's height and width
frameHeight, frameWidth, _ = cap.read()[1].shape
showKey.x = int(frameWidth*1.5) - 85
exitKey.x = int(frameWidth*1.5) - 85
#print(showKey.x)

clickedX, clickedY = 0, 0
mousX, mousY = 0, 0

show = False
cv2.namedWindow('video')
counter = 0
previousClick = 0

keyboard = Controller()
while True:
    if counter >0:
        counter -=1
        
    signTipX = 0
    signTipY = 0

    thumbTipX = 0
    thumbTipY = 0

    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.resize(frame,(int(frameWidth*1.5), int(frameHeight*1.5)))
    frame = cv2.flip(frame, 1)
    #find hands
    frame = tracker.findHands(frame)
    lmList = tracker.getPostion(frame, draw=False)
    if lmList:
        signTipX, signTipY = lmList[8][1], lmList[8][2]
        thumbTipX, thumbTipY = lmList[4][1], lmList[4][2]
        if calculateIntDidtance((signTipX, signTipY), (thumbTipX, thumbTipY)) <50:
            centerX = int((signTipX+thumbTipX)/2)
            centerY = int((signTipY + thumbTipY)/2)
            cv2.line(frame, (signTipX, signTipY), (thumbTipX, thumbTipY), (0,255,0),2)
            cv2.circle(frame, (centerX, centerY), 5, (0,255,0), cv2.FILLED)

    ctime = time.time()
    fps = int(1/(ctime-ptime))

    cv2.putText(frame,str(fps) + " FPS", (10,20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0),2)
    showKey.drawKey(frame,(255,255,255), (0,0,0),0.1, fontScale=0.5)
    exitKey.drawKey(frame,(255,255,255), (0,0,0),0.1, fontScale=0.5)
    cv2.setMouseCallback('video', getMousPos)

    if showKey.isOver(clickedX, clickedY):
        show = not show
        showKey.text = "Hide" if show else "Show"
        clickedX, clickedY = 0, 0

    if exitKey.isOver(clickedX, clickedY):
        #break
        exit()

    #checking if sign finger is over a key and if click happens
    alpha = 0.5
    if show:
        textBox.drawKey(frame, (255,255,255), (0,0,0), 0.3)
        for k in keys:
            if k.isOver(mouseX, mouseY) or k.isOver(signTipX, signTipY):
                alpha = 0.1
                # writing using mouse right click
                if k.isOver(clickedX, clickedY):                              
                    if k.text == '<--':
                        textBox.text = textBox.text[:-1]
                    elif k.text == 'clr':
                        textBox.text = ''
                    elif len(textBox.text) < 30:
                        if k.text == 'Space':
                            textBox.text += " "
                        else:
                            textBox.text += k.text
                            
                # writing using fingers
                if (k.isOver(thumbTipX, thumbTipY)):
                    clickTime = time.time()
                    if clickTime - previousClick > 0.4:                               
                        if k.text == '<--':
                            textBox.text = textBox.text[:-1]
                        elif k.text == 'clr':
                            textBox.text = ''
                        elif len(textBox.text) < 30:
                            if k.text == 'Space':
                                textBox.text += " "
                            else:
                                textBox.text += k.text
                                #simulating the press of actuall keyboard
                                keyboard.press(k.text)
                        previousClick = clickTime
            k.drawKey(frame,(255,255,255), (0,0,0), alpha=alpha)
            alpha = 0.5
        clickedX, clickedY = 0, 0        
    ptime = ctime
    cv2.imshow('video', frame)

    ## stop the video when 'q' is pressed
    pressedKey = cv2.waitKey(1)
    if pressedKey == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


handTracker.py

import mediapipe as mp
import numpy as np
import cv2


class HandTracker():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)


keys.py

import cv2
import numpy as np

class Key():

    def __init__(self,x,y,w,h,text):
        self.x = x
        self.y = y
        self.w = w
        self.h = h
        self.text=text
    
    def drawKey(self, img, text_color=(255,255,255), bg_color=(0,0,0),alpha=0.5, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8, thickness=2):
        
        #draw the box
        bg_rec = img[self.y : self.y + self.h, self.x : self.x + self.w]
        white_rect = np.ones(bg_rec.shape, dtype=np.uint8) #* 25
        white_rect[:] = bg_color
        res = cv2.addWeighted(bg_rec, alpha, white_rect, 1-alpha, 1.0)
        
        # Putting the image back to its position
        img[self.y : self.y + self.h, self.x : self.x + self.w] = res

        #put the letter
        tetx_size = cv2.getTextSize(self.text, fontFace, fontScale, thickness)
        text_pos = (int(self.x + self.w/2 - tetx_size[0][0]/2), int(self.y + self.h/2 + tetx_size[0][1]/2))
        cv2.putText(img, self.text,text_pos , fontFace, fontScale,text_color, thickness)

    def isOver(self,x,y):
        if (self.x + self.w > x > self.x) and (self.y + self.h> y >self.y):
            return True
        return False


Image to Text to Speech conversion using python program

 



Tesseract is a cross-OS optical character recognition (OCR) engine developed by HP in the 1980s, and since 2006, maintained by Google as an open-source project with high marks for accuracy in reading raw image data into digital characters. The project has been continuously developed and now offers OCR supported by LSTM neural networks for highly improved results.  In this session, we’ll use the Python wrapper for Tesseract to first test drive OCR on images through code before connecting our solution to a live IP video feed from your smartphone processed through OpenCV, and then translating the resultant text stream into audible form with gTTS (Google Text-To-Speech), enabling our mashup program to automatically read out loud from any script it ‘sees’.  


Prerequisites: —Python IDE such as PyCharm / VScode

—The Tesseract engine (https://tesseract ocr.github.io/tessdoc/Home.html) 

—A smartphone configured as an IP Webcam (https://www.makeuseof.com/tag/ip-webcam-android-phone-as-a-web-cam/)  


Program code :

# Imports

import cv2

import pytesseract

from gtts import gTTS

from playsound import playsound


# Connects pytesseract(wrapper) to the trained tesseract module

pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'


# # Video feed

# video = cv2.VideoCapture(0)

#

# # Setting width and height for video feed

# video.set(3, 640)

# video.set(4, 480)


# Image feeds

img1 = cv2.imread('Capture_1.JPG')

img2 = cv2.imread('Capture_2.JPG')

img3 = cv2.imread('Capture_3.JPG')

# Obtains only the string from images without visual feedback

# print(pytesseract.image_to_string(img1))

# print(pytesseract.image_to_string(img2))

# print(pytesseract.image_to_string(img3))


# Obtain the height and width for each image 3rd value is not needed

# ONLY FOR CHARACTER

h1Img, w1Img, none1 = img1.shape

h2Img, w2Img, none2 = img2.shape

h3Img, w3Img, none3 = img3.shape

# print(img1.shape)

# print(img2.shape)

# print(img3.shape)


# Convert images into bounding box values: x, y, width and height

# ONLY FOR CHARACTERS

box1 = pytesseract.image_to_boxes(img1)

box2 = pytesseract.image_to_boxes(img2)

box3 = pytesseract.image_to_boxes(img3)

# print(box1)

# print(box2)

# print(box3)


# Convert images into bound data values: level, page no, block no, paragraph no,

# line no, word no, x, y, width, height, conf, value

# ONLY FOR WORDS

data1 = pytesseract.image_to_data(img1)

data2 = pytesseract.image_to_data(img2)

data3 = pytesseract.image_to_data(img3)

# print(data1)

# print(data2)

# print(data3)



def charone():

    for a in box1.splitlines():

        # Converts 'box1' string into a list stored in 'a'

        a = a.split()

        # Storing values in the right variables

        x, y = int(a[1]), int(a[2])

        w, h = int(a[3]), int(a[4])

        # Display bounding box of each letter

        cv2.rectangle(img1, (x, h1Img - y), (w, h1Img - h), (0, 0, 255), 1)

        # Display detected letter under each bounding box

        cv2.putText(img1, a[0], (x, h1Img - y - 25), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 0, 255), 2)

    # Output the bounding box with the image

    cv2.imshow('Image Output', img1)

    cv2.waitKey(0)



def chartwo():

    for a in box2.splitlines():

        # Converts 'box2' string into a list stored in 'a'

        a = a.split()

        # Storing values in the right variables

        x, y = int(a[1]), int(a[2])

        w, h = int(a[3]), int(a[4])

        # Display bounding box of each letter

        cv2.rectangle(img2, (x, h2Img - y), (w, h2Img - h), (0, 128, 0), 1)

        # Display detected letter under each bounding box

        cv2.putText(img2, a[0], (x, h2Img - y - 25), cv2.FONT_HERSHEY_PLAIN, 1, (0, 128, 0), 1)

    # Output the bounding box with the image

    cv2.imshow('Image Output', img2)

    cv2.waitKey(0)



def charthree():

    for a in box3.splitlines():

        # Converts 'box3' string into a list stored in 'a'

        a = a.split()

        # Storing values in the right variables

        x, y = int(a[1]), int(a[2])

        w, h = int(a[3]), int(a[4])

        # Display bounding box of each letter

        cv2.rectangle(img3, (x, h3Img - y), (w, h3Img - h), (255, 0, 0), 1)

        # Display detected letter under each bounding box

        cv2.putText(img3, a[0], (x, h3Img - y - 50), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 1)

    # Output the bounding box with the image

    cv2.imshow('Image Output', img3)

    cv2.waitKey(0)



def wordone():

    for z, a in enumerate(data1.splitlines()):

        # Counter

        if z != 0:

            # Converts 'data1' string into a list stored in 'a'

            a = a.split()

            # Checking if array contains a word

            if len(a) == 12:

                # Storing values in the right variables

                x, y = int(a[6]), int(a[7])

                w, h = int(a[8]), int(a[9])

                # Display bounding box of each word

                cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 0, 255), 1)

                # Display detected word under each bounding box

                cv2.putText(img1, a[11], (x - 15, y), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 1)

    # Output the bounding box with the image

    cv2.imshow('Image output', img1)

    cv2.waitKey(0)



def wordtwo():

    for z, a in enumerate(data2.splitlines()):

        # Counter

        if z != 0:

            # Converts 'data1' string into a list stored in 'a'

            a = a.split()

            # Checking if array contains a word

            if len(a) == 12:

                # Storing values in the right variables

                x, y = int(a[6]), int(a[7])

                w, h = int(a[8]), int(a[9])

                # Display bounding box of each word

                cv2.rectangle(img2, (x, y), (x + w, y + h), (0, 255, 0), 1)

                # Display detected word under each bounding box

                cv2.putText(img2, a[11], (x - 15, y), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 1)

    # Output the bounding box with the image

    cv2.imshow('Image output', img2)

    cv2.waitKey(0)



def wordthree():

    for z, a in enumerate(data3.splitlines()):

        # Counter

        if z != 0:

            # Converts 'data1' string into a list stored in 'a'

            a = a.split()

            # Checking if array contains a word

            if len(a) == 12:

                # Storing values in the right variables

                x, y = int(a[6]), int(a[7])

                w, h = int(a[8]), int(a[9])

                # Display bounding box of each word

                cv2.rectangle(img3, (x, y), (x + w, y + h), (255, 0, 0), 1)

                # Display detected word under each bounding box

                cv2.putText(img3, a[11], (x - 15, y), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 1)

    # Output the bounding box with the image

    cv2.imshow('Image output', img3)

    cv2.waitKey(0)



def startvideofeed():

    # Video feed

    video = cv2.VideoCapture("http://192.168.43.107/video")


    # Setting width and height for video feed

    video.set(3, 640)

    video.set(4, 480)


    # Allows continuous frames

    while True:

        # Capture each frame from the video feed

        extra, frames = video.read()

        data4 = pytesseract.image_to_data(frames)

        for z, a in enumerate(data4.splitlines()):

            # Counter

            if z != 0:

                # Converts 'data1' string into a list stored in 'a'

                a = a.split()

                # Checking if array contains a word

                if len(a) == 12:

                    # Storing values in the right variables

                    x, y = int(a[6]), int(a[7])

                    w, h = int(a[8]), int(a[9])

                    # Display bounding box of each word

                    cv2.rectangle(frames, (x, y), (x + w, y + h), (0, 0, 255), 2)

                    # Display detected word under each bounding box

                    cv2.putText(frames, a[11], (x - 15, y), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 1)

        # Output the bounding box with the image

        cv2.imshow('Video output', frames)

        if cv2.waitKey(1) & 0xFF == ord('q'):

            video.release()

            cv2.destroyAllWindows()

            break



def startimagefeed():

    # Video feed

    video = cv2.VideoCapture("http://192.168.43.107/video")


    # Setting width and height for video feed

    video.set(3, 640)

    video.set(4, 480)


    # Capture one frame from the video feed

    extra, frames = video.read()

    data4 = pytesseract.image_to_data(frames)

    for z, a in enumerate(data4.splitlines()):

        # Counter

        if z != 0:

            # Converts 'data1' string into a list stored in 'a'

            a = a.split()

            # Checking if array contains a word

            if len(a) == 12:

                # Storing values in the right variables

                x, y = int(a[6]), int(a[7])

                w, h = int(a[8]), int(a[9])

                # Display bounding box of each word

                cv2.rectangle(frames, (x, y), (x + w, y + h), (0, 0, 255), 2)

                # Display detected word under each bounding box

                cv2.putText(frames, a[11], (x - 15, y), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 1)

    # Output the bounding box with the image

    cv2.imshow('Image output', frames)

    cv2.waitKey(0)

    video.release()


def texttospeech():

    # Open the file with write permission

    filewrite = open("String.txt", "w")

    for z, a in enumerate(data1.splitlines()):

        # Counter

        if z != 0:

            # Converts 'data1' string into a list stored in 'a'

            a = a.split()

            # Checking if array contains a word

            if len(a) == 12:

                # Storing values in the right variables

                x, y = int(a[6]), int(a[7])

                w, h = int(a[8]), int(a[9])

                # Display bounding box of each word

                cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 0, 255), 1)

                # Display detected word under each bounding box

                cv2.putText(img1, a[11], (x - 15, y), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 1)

                # Writing to the file

                filewrite.write(a[11] + " ")

    filewrite.close()

    # Open the file with read permission

    fileread = open("String.txt", "r")

    language = 'en'

    line = fileread.read()

    if line != " ":

        fileread.close()

        speech = gTTS(text=line, lang=language, slow=False)

        speech.save("test.mp3")

    # Output the bounding box with the image

    cv2.imshow('Image output', img1)

    cv2.waitKey(0)

    playsound("test.mp3")



# Calling character methods

while True:

    option = input("Which option do you choose (1 - 9): ")

    print("\n")

    if option == '1':

        charone()

    elif option == '2':

        chartwo()

    elif option == '3':

        charthree()

    elif option == '4':

        wordone()

    elif option == '5':

        wordtwo()

    elif option == '6':

        wordthree()

    elif option == '7':

        startvideofeed()

    elif option == '8':

        startimagefeed()

    elif option == '9':

        texttospeech()

    else:

        print("Thank you for using the the OCR program")

        break


Sample Picture:



Tesseract Software for window7


Go to below link

https://digi.bib.uni-mannheim.de/tesseract/


choose version:

tesseract-ocr-w64-setup-v5.0.1.20220118.exe

GPS sensor interface with ESP8266 using Blynk IoT cloud

   Circuit diagram: Source Code: #include <TinyGPS++.h> #include <SoftwareSerial.h> #define BLYNK_PRINT Serial #include <ESP8...