-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvector_camera_test.py
More file actions
63 lines (45 loc) · 2.61 KB
/
vector_camera_test.py
File metadata and controls
63 lines (45 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import asyncio
from concurrent.futures import CancelledError
import os
import sys
import numpy as np
##from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, decode_predictions # First version uses MobileNet
from tensorflow.keras.preprocessing.image import img_to_array
import utils
import anki_vector
from PIL import Image, ImageDraw
# Width of images passed to the network
IMAGE_WIDTH: int = 416 # 416 for YoloV3 net, 224 for MobileNetV2
# Height of images passed to the network
IMAGE_HEIGHT: int = 416
screen_dimensions = anki_vector.screen.SCREEN_WIDTH, anki_vector.screen.SCREEN_HEIGHT
#model = MobileNetV2(input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, 3), alpha=1.0, include_top=True, weights='imagenet', pooling=None, classes=1000)
model = utils.getTinyYoloV3Model()
def make_prediction(robot):
while True:
camera_image = robot.camera.latest_image.raw_image
# Crop the image to reduce the complexity of the network
cropped_image = utils.crop_image(camera_image, IMAGE_WIDTH, IMAGE_HEIGHT)
# Convert image to an array with shape (image_width, image_height, 1)
image = img_to_array(cropped_image)
# Normalize the image data
image = image.astype("float") / 255.0
# Expand array shape to add an axis to denote the number of images fed as input
image = np.expand_dims(image, axis=0)
prediction = model.predict(image)[0]
predicted_classes = utils.tiny_yolo_decode_classes(prediction[0])
#predicted_classes = decode_predictions(prediction.reshape(1, 1000)) # mobilenet
#print('Classes:',predicted_classes)
#max_filtered_result = [x for x in predicted_classes[0] if x[2] > 0.6] # mobilenet
max_filtered_result = [x for x in predicted_classes]
if len(max_filtered_result) > 0:
#max_filtered_result = max_filtered_result[0] # Taking the best prediction
print("Main class: ",max_filtered_result)
# Display prediction on vector screen + captured image
screen_data = anki_vector.screen.convert_image_to_screen_data(utils.text_to_image(max_filtered_result, screen_dimensions[0], screen_dimensions[1], base_image=utils.crop_image(cropped_image, screen_dimensions[0], screen_dimensions[1])))
robot.screen.set_screen_with_image_data(screen_data, 1)
# Vector can say prediction, funny at first, annoying in a second time :p
#robot.behavior.say_text(max_filtered_result[1])
with anki_vector.Robot(None) as robot:
robot.camera.init_camera_feed()
make_prediction(robot)