2 of 179

The slides are meant as visual support for the lecture.

They are neither a documentation nor a script.

Please do not print the slides.

Comments and feedback at n.meseth@hs-osnabrueck.de

3 of 179

ORGANIZATION

BACK

4 of 179

ILIAS

Microsoft Teams

6 of 179

group work

7 of 179

examination

8 of 179

working environment

9 of 179

visual studio code

python

tinkerforge

git

10 of 179

DIGITAL TECHNOLOGIES

BACK

11 of 179

solution

input

output

%%problem_model_input_solution_output%%

a model for solving problems

12 of 179

cyber physical systems

artificial intelligence

software prototyping

13 of 179

cyber physical systems

sensors

actuators

temperature

humidity

co2

uv light

ambient light

sound pressure

thermal image

camera

...

led

speaker

display

motor

…

artificial intelligence

software prototyping

14 of 179

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

15 of 179

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

image classification

image segmentation

object recognition

object tracking

face recognition

face identification

emotion recognition

pose estimation

text recognition

16 of 179

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

text generation

text summary

text analysis

image generation

image description

video generation

music generation

17 of 179

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

speech-to-text

text-to-speech

translation

18 of 179

artificial intelligence

user interfaces

cloud services

databases

cyber physical systems

software prototyping

19 of 179

introductory example

20 of 179

visual studio code

programs

python

22 of 179

large language models

23 of 179

speech-to-text

24 of 179

user interface

25 of 179

SENSORS

BACK

26 of 179

BACK

temperature / humidity

rgb led button

camera

thermal imaging camera

microphone

keyboard

27 of 179

temperature / humidity

28 of 179

th = BrickletHumidityV2(UID, ipcon)…

29 of 179

th.get_humidity()

th.get_temperature()

30 of 179

th.register_callback(th.CALLBACK_HUMIDITY, cb_humidity)

th.register_callback(th.CALLBACK_TEMPERATURE, …)

31 of 179

th.set_humidity_callback_configuration(250, False, "x", 0, 0)

th.set_temperature_callback_configuration(...)

32 of 179

rgb led button

33 of 179

btn = BrickletRGBLEDButton(UID, ipcon)…

34 of 179

btn.set_color(255, 0, 0)

35 of 179

btn.get_button_state()

36 of 179

btn.register_callback(...)

38 of 179

OpenCV

import cv2

39 of 179

# Get video capture device (webcam)

webcam = cv2.VideoCapture(0)

40 of 179

# Read a frame

success, frame = webcam.read()

41 of 179

# Show the image from the frame

cv2.imshow("Webcam", frame)

42 of 179

# Save the frame as .png

cv2.imwrite("screenshot.png", frame)

43 of 179

thermal imaging camera

44 of 179

OpenCV

Tinkerforge

45 of 179

ti = BrickletThermalImaging(UID, ipcon)

ti.set_image_transfer_config(...)

img = ti.get_high_contrast_image()

46 of 179

ti.register_callback(...)

47 of 179

microphone

48 of 179

import pyaudio

49 of 179

# Define recording parameters

FORMAT = pyaudio.paInt16

CHANNELS = 1

RATE = 44100

CHUNK = 1024

50 of 179

# Get access to the microphone

audio = pyaudio.PyAudio()

51 of 179

# Start listening

stream = audio.open(...)

52 of 179

# Read a chunk of frames

stream.read(CHUNK)

53 of 179

# Stop and close stream

stream.stop_stream()

stream.close()

54 of 179

# Terminate access to microphone

audio.terminate()

55 of 179

keyboard

56 of 179

import keyboard

57 of 179

# Define a callback function for a key

def record_audio():

print("Recording audio…")

58 of 179

# Add key listener

keyboard.add_hotkey("r", record_audio)

59 of 179

# Wait until a specific key was pressed

keyboard.wait("esc")

60 of 179

ACTUATORS

BACK

61 of 179

62 of 179

rgb led

63 of 179

led = BrickletRGBLEDV2(UID, ipcon)

led.set_rgb_value(255, 0, 0)

64 of 179

OLED display

65 of 179

oled = BrickletOLED128x64V2(UID, ipcon)

oled.clear_display()

oled.write_line(0, 0, "Welcome!")

66 of 179

speaker

67 of 179

import simpleaudio as sa

68 of 179

# Create a wave object from .wav-file and play it

wav = sa.WaveObject.from_wave_file("sound.wav")

wav.play().wait_done()

69 of 179

COMPUTER VISION

BACK

70 of 179

finding oranges in images

output

71 of 179

Image source: Wikimedia

72 of 179

Image source: Wikimedia

73 of 179

what set of rules can solve this?

74 of 179

machine learning algorithms

75 of 179

rule-based program

rules

answer

data

76 of 179

rule-based program

rules

answer

data

machine learning

rules

data

answers

77 of 179

images in a computer

85 of 179

172

137

86 of 179

image classification

87 of 179

Q: Does an image belong to one or the other class from a fixed set of classes?

88 of 179

Cat or Dog?

model

"cat"

89 of 179

Cat or Dog?

model

"cat"

model

"dog"

90 of 179

Google's teachable machine

https://teachablemachine.withgoogle.com

91 of 179

pip install keras

pip install tensorflow==2.12.0

92 of 179

# Load the classifier and class names

model = load_model("my_model.h5")

class_names = open("labels.txt", "r").readlines()

93 of 179

# Convert the image t0 224 x 224

image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)

# Turn into a list of pixels

image = np.asarray(image, dtype=np.float32).reshape(1, 224, 224, 3)

# Normalize each pixel's color value (-1/1)

image = (image / 127.5) - 1

94 of 179

# Make a prediction for the class

prediction = model.predict(image)

# Get the class with the highest confidence value

index = np.argmax(prediction)

class_name = class_names[index]

# Get the confidence score for the predicted class

confidence_score = prediction[0][index]

96 of 179

YOLO v8 Image Classification

https://docs.ultralytics.com/

97 of 179

pip install ultralytics

98 of 179

# Load the classifier

from ultralytics import YOLO

model = YOLO("yolov8n-cls.pt")

99 of 179

# Make a prediction

results = model('cat.jpg')

100 of 179

100

# Show result

results[0].show()

101 of 179

101

# Get the top result

top = results[0].probs.top1

class_name = results[0].names[top]

print(class_name)

102 of 179

102

zero-shot image classification

103 of 179

103

Q: Which classes do you train your model on?

104 of 179

104

GPT-4 Vision

105 of 179

105

pip install openai

106 of 179

106

# import openai API and set api key

from openai import OpenAI

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

107 of 179

107

# define a suitable prompt for the task

prompt = "Classify the image into 'dog' or 'cat'. Return only the word for the class of the image."

108 of 179

108

# This function is needed to encode an image to base64 for OpenAI's API

def encode_image(image_path):

with open(image_path, "rb") as image_file:

return base64.b64encode(image_file.read()).decode('utf-8')

image_path = "cat.webp"

image = encode_image(image_path)

109 of 179

109

response = client.chat.completions.create(

model="gpt-4-turbo",

messages = [

{ "role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

]

}

max_tokens=300,

)

110 of 179

110

# Show the answer of the classification

print(response.choices[0].message.content)

111 of 179

111

object detection

112 of 179

112

Q: Which objects are in the image and where?

113 of 179

113

dog

bee

114 of 179

114

cat

frog

115 of 179

115

YOLO v8 Object Detection

https://docs.ultralytics.com/

116 of 179

116

# Load the detector

from ultralytics import YOLO

model = YOLO("yolov8n.pt")

117 of 179

117

# Make a prediction one each frame

results = model(frame)

# Annotate frame

annotated_frame = results[0].plot()

119 of 179

119

Q: Which objects do you teach your model to recognize?

120 of 179

120

zero-shot object detection

121 of 179

121

"Simple Open-Vocabulary Object Detection with Vision Transformers"��https://arxiv.org/abs/2205.06230

122 of 179

122

# Load the open world detector

from ultralytics import YOLO

model = YOLO("yolov8s-world.pt")

123 of 179

123

# Define custom objects to look for

model.set_classes(["person with glasses"])

124 of 179

124

# Make a prediction one each frame

results = model(frame)

# Annotate frame

annotated_frame = results[0].plot()

125 of 179

125

optical character

recognition (OCR)

126 of 179

126

tesseract

127 of 179

127

GPT-4 Vision

128 of 179

128

# define a suitable prompt for the task

prompt = "Extract all food and beverage items with their quantity and price from this receipt into a JSON list. The receipt is in German."

129 of 179

129

response = client.chat.completions.create(

model="gpt-4o",

response_format={ "type": "json_object" },

messages = [

{ "role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

]

}

max_tokens=300,

)

130 of 179

130

GENERATIVE AI

BACK

131 of 179

131

LARGE LANGUAGE MODELS

132 of 179

132

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

133 of 179

133

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

134 of 179

134

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

(filter)

(discriminating, insulting content)

135 of 179

135

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

next word (token)

(filter)

(discriminating, insulting content)

136 of 179

136

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

next word (token)

(filter)

(discriminating, insulting content)

137 of 179

137

PROMPTING

https://www.promptingguide.ai/

138 of 179

138

Prompt

Answer

Language

Model

139 of 179

139

elements of a prompt

140 of 179

140

example prompt

Explain the binary number system.

elements of a prompt

141 of 179

141

example prompt

Explain the binary number system.

elements of a prompt

start simple

142 of 179

142

example prompt

You are a friendly tutor and your task is to explain complex concepts as simple as possible.

Explain the binary number system.

elements of a prompt

143 of 179

143

example prompt

You are a friendly tutor and your task is to explain complex concepts as simple as possible.

Your answers are never longer than 10 sentence.

Explain the binary number system.

elements of a prompt

144 of 179

144

ZERO-SHOT PROMPTING

145 of 179

145

example prompt

Classify the text into neutral, negative or positive.

Text: "What a great dinner!"

Sentiment:

elements of a prompt

146 of 179

146

example prompt

Classify the text into neutral, negative or positive.

Text: "What a great dinner!"

Sentiment:

elements of a prompt

this will be replaced with data later…

147 of 179

147

FEW-SHOT PROMPTING

IN-CONTEXT LEARNING

148 of 179

148

examples in the context to learn from

Extract all references to countries and their continent in the following text using the format from the examples below.

Example 1: "They played the team called 'Die Mannschaft' in the world cup final"

Correct answer: Germany, Europe

Example 2: "The Three Lions once again lost to Germany in a semi final"

Correct answer: England, Europe, Germany, Europe

Text: "The Selecao was destroyed 1:7 by the DFB selection in their home stadium."

Answer:

149 of 179

149

examples in the context to learn from

Extract all references to countries and their continent in the following text using the format from the examples below.

Example 1: "They played the team called 'Die Mannschaft' in the world cup final"

Correct answer: Germany, Europe

Example 2: "The Three Lions once again lost to Germany in a semi final"

Correct answer: England, Europe, Germany, Europe

Text: "The Selecao was destroyed 1:7 by the DFB selection in their home stadium."

Answer:

150 of 179

150

more prompting strategies

chain-of-thought (CoT)

self-consistency

generate knowledge prompting

prompt chaining (subtasks)

tree-of-thoughts (ToT)

retrieval-augmented-generation (RAG)

…

151 of 179

151

OpenAI

152 of 179

152

pip install openai

153 of 179

153

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "<YOUR_API_KEY>"

client = OpenAI()

154 of 179

154

# define a system message

system_message = """� You are a world-famous 5-star chef. Based on ingredients the user has at home,

you suggest easy-to-cook recipes. """

155 of 179

155

# define a prompt for the task

prompt = """� Suggest a recipe for lunch.

List of ingredients:

- butter

- eggs

- flour

- salt

- milk

Recipe: """

156 of 179

156

response = client.chat.completions.create(

model="gpt-4o",

messages = [

{"role": "system", "content": system_message },

{"role": "user", "content": prompt },]

}

max_tokens=2000

)

157 of 179

157

USER INTERFACES

BACK

158 of 179

158

streamlit

https://docs.streamlit.io/ # official documentation

https://streamlit.io/components # third-party extensions

159 of 179

159

pip install streamlit

160 of 179

160

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

161 of 179

161

entry point to our UI

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

162 of 179

162

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

more pages in our app

entry point to our UI

163 of 179

163

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

entry point to our UI

our custom functions we'd like to use from our UI

more pages in our app

164 of 179

164

Home.py

import streamlit as st

st.title("My first UI")

st.write("This is a simple UI for prototyping our application.")

name = st.text_input("Enter your name")

if st.button("Greet me"):

st.write(f"Hello {name} 🤞")

165 of 179

165

Home.py

import streamlit as st

st.title("My first UI")

st.write("This is a simple UI for prototyping our application.")

name = st.text_input("Enter your name")

if st.button("Greet me"):

st.write(f"Hello {name} 🤞")

166 of 179

166

1_Speech.py

import streamlit as st

from pages.lib.text_to_speech import text_to_speech

st.title("Speech demo")

st.write("Enter a text and it will be converted to speech.")

text = st.text_input("Enter some text")

voice = st.selectbox("Select a voice", ["alloy", … "shimmer"])

if st.button("Turn to speech"):

audio_file = text_to_speech(text, voice=voice)

st.audio(audio_file.as_posix(), format="audio/mpeg")

167 of 179

167

1_Speech.py

import streamlit as st

from pages.lib.text_to_speech import text_to_speech

st.title("Speech demo")

st.write("Enter a text and it will be converted to speech.")

text = st.text_input("Enter some text")

voice = st.selectbox("Select a voice", ["alloy", … "shimmer"])

if st.button("Turn to speech"):

audio_file = text_to_speech(text, voice=voice)

st.audio(audio_file.as_posix(), format="audio/mpeg")

from lib/text_to_speech.py

168 of 179

168

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def text_to_speech(text, voice="alloy"):

speech_file_path = Path(__file__).parent / "speech.mp3"

response = client.audio.speech.create(

model="tts-1",

voice=voice,

input=text

)

response.write_to_file(speech_file_path)

return speech_file_path

lib/text_to_speech.py

169 of 179

169

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def text_to_speech(text, voice="alloy"):

speech_file_path = Path(__file__).parent / "speech.mp3"

response = client.audio.speech.create(

model="tts-1",

voice=voice,

input=text

)

response.write_to_file(speech_file_path)

return speech_file_path

lib/text_to_speech.py

setup OpenAI API

170 of 179

170

lib/text_to_speech.py

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def text_to_speech(text, voice="alloy"):

speech_file_path = Path(__file__).parent / "speech.mp3"

response = client.audio.speech.create(

model="tts-1",

voice=voice,

input=text

)

response.write_to_file(speech_file_path)

return speech_file_path

setup OpenAI API

define custom

function

171 of 179

171

2_Webcam.py

import streamlit as st

from pages.lib.vision import ask_gpt4o

st.title("Video camera test")

picture = st.camera_input("Take a picture")

if picture:

st.image(picture)

answer = ask_gpt4o("What is in this picture?", picture)

st.write(answer)

172 of 179

172

2_Webcam.py

import streamlit as st

from pages.lib.vision import ask_gpt4o

st.title("Video camera test")

picture = st.camera_input("Take a picture")

if picture:

st.image(picture)

answer = ask_gpt4o("What is in this picture?", picture)

st.write(answer)

from lib/vision.py

173 of 179

173

lib/vision.py

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def encode_image(image_buffer):

def ask_gpt4o(prompt, image_buffer):

image = encode_image(image_buffer)

response = client.chat.completions.create(

model="gpt-4o",

messages=[

{

"role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

}

]

)

return response.choices[0].message.content

174 of 179

174

lib/vision.py

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def encode_image(image_buffer):

def ask_gpt4o(prompt, image_buffer):

image = encode_image(image_buffer)

response = client.chat.completions.create(

model="gpt-4o",

messages=[

{

"role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

}

]

)

return response.choices[0].message.content

setup OpenAI API

define custom

function

175 of 179

175

3_Microphone.py

import streamlit as st

from streamlit_mic_recorder import mic_recorder

from pages.lib.text_to_speech import speech_to_text

st.title("Microphone test")

def callback():

if st.session_state.my_recorder_output:

audio = st.session_state.my_recorder_output

text = text_to_speech(audio)

st.success(text)

audio = mic_recorder(key='my_recorder', callback=callback)

176 of 179

176

3_Microphone.py

import streamlit as st

from streamlit_mic_recorder import mic_recorder

from pages.lib.text_to_speech import speech_to_text

st.title("Microphone test")

def callback():

if st.session_state.my_recorder_output:

audio = st.session_state.my_recorder_output

text = text_to_speech(audio)

st.success(text)

audio = mic_recorder(key='my_recorder', callback=callback)

pip install streamlit-mic-recorder

177 of 179

177

3_Microphone.py

import streamlit as st

from streamlit_mic_recorder import mic_recorder

from pages.lib.text_to_speech import speech_to_text

st.title("Microphone test")

def callback():

if st.session_state.my_recorder_output:

audio = st.session_state.my_recorder_output

text = text_to_speech(audio)

st.success(text)

audio = mic_recorder(key='my_recorder', callback=callback)

from lib/speech_to_text.py

pip install streamlit-mic-recorder

178 of 179

178

lib/speech_to_text.py

from openai import OpenAI

import os

import io

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def speech_to_text(audio):

audio_bio = io.BytesIO(audio['bytes'])

audio_bio.name = 'audio.mp3'

transcription = client.audio.transcriptions.create(

model="whisper-1",

file=audio_bio

)

return transcription.text

179 of 179

179

lib/speech_to_text.py

from openai import OpenAI

import os

import io

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def speech_to_text(audio):

audio_bio = io.BytesIO(audio['bytes'])

audio_bio.name = 'audio.mp3'

transcription = client.audio.transcriptions.create(

model="whisper-1",

file=audio_bio

)

return transcription.text

setup OpenAI API

define custom

function

1 of 179

2 of 179

3 of 179

4 of 179

5 of 179

6 of 179

7 of 179

8 of 179

9 of 179

10 of 179

11 of 179

12 of 179

13 of 179

14 of 179

15 of 179

16 of 179

17 of 179

18 of 179

19 of 179

20 of 179

21 of 179

22 of 179

23 of 179

24 of 179

25 of 179

26 of 179

27 of 179

28 of 179

29 of 179

30 of 179

31 of 179

32 of 179

33 of 179

34 of 179

35 of 179

36 of 179

37 of 179

38 of 179

39 of 179

40 of 179

41 of 179

42 of 179

43 of 179

44 of 179

45 of 179

46 of 179

47 of 179

48 of 179

49 of 179

50 of 179

51 of 179

52 of 179

53 of 179

54 of 179

55 of 179

56 of 179

57 of 179

58 of 179

59 of 179

60 of 179

61 of 179

62 of 179

63 of 179

64 of 179

65 of 179

66 of 179

67 of 179

68 of 179

69 of 179

70 of 179

71 of 179

72 of 179

73 of 179

74 of 179

75 of 179

76 of 179

77 of 179

78 of 179

79 of 179

80 of 179