1 of 179

1

2 of 179

2

The slides are meant as visual support for the lecture.

They are neither a documentation nor a script.

Please do not print the slides.

Comments and feedback at n.meseth@hs-osnabrueck.de

3 of 179

3

ORGANIZATION

4 of 179

4

ILIAS

Microsoft Teams

5 of 179

5

sessions

6 of 179

6

group work

7 of 179

7

examination

8 of 179

8

working environment

9 of 179

9

visual studio code

python

tinkerforge

git

10 of 179

10

DIGITAL TECHNOLOGIES

11 of 179

11

solution

input

output

%%problem_model_input_solution_output%%

a model for solving problems

12 of 179

12

cyber physical systems

artificial intelligence

software prototyping

13 of 179

13

cyber physical systems

sensors

actuators

temperature

humidity

co2

uv light

ambient light

sound pressure

thermal image

camera

...

led

speaker

display

motor

…

artificial intelligence

software prototyping

14 of 179

14

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

15 of 179

15

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

image classification

image segmentation

object recognition

object tracking

face recognition

face identification

emotion recognition

pose estimation

text recognition

16 of 179

16

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

text generation

text summary

text analysis

image generation

image description

video generation

music generation

17 of 179

17

artificial intelligence

computer vision

generative ai

natural language processing

cyber physical systems

software prototyping

speech-to-text

text-to-speech

translation

18 of 179

18

artificial intelligence

user interfaces

cloud services

databases

cyber physical systems

software prototyping

19 of 179

19

introductory example

20 of 179

20

visual studio code

programs

python

21 of 179

21

LEDs

22 of 179

22

large language models

23 of 179

23

speech-to-text

24 of 179

24

user interface

25 of 179

25

SENSORS

26 of 179

26

27 of 179

27

temperature / humidity

28 of 179

28

th = BrickletHumidityV2(UID, ipcon)…

29 of 179

29

th.get_humidity()

th.get_temperature()

30 of 179

30

th.register_callback(th.CALLBACK_HUMIDITY, cb_humidity)

th.register_callback(th.CALLBACK_TEMPERATURE, …)

31 of 179

31

th.set_humidity_callback_configuration(250, False, "x", 0, 0)

th.set_temperature_callback_configuration(...)

32 of 179

32

rgb led button

33 of 179

33

btn = BrickletRGBLEDButton(UID, ipcon)…

34 of 179

34

btn.set_color(255, 0, 0)

35 of 179

35

btn.get_button_state()

36 of 179

36

btn.register_callback(...)

37 of 179

37

camera

38 of 179

38

OpenCV

import cv2

39 of 179

39

# Get video capture device (webcam)

webcam = cv2.VideoCapture(0)

40 of 179

40

# Read a frame

success, frame = webcam.read()

41 of 179

41

# Show the image from the frame

cv2.imshow("Webcam", frame)

42 of 179

42

# Save the frame as .png

cv2.imwrite("screenshot.png", frame)

43 of 179

43

thermal imaging camera

44 of 179

44

OpenCV

Tinkerforge

45 of 179

45

ti = BrickletThermalImaging(UID, ipcon)

ti.set_image_transfer_config(...)

img = ti.get_high_contrast_image()

46 of 179

46

ti.register_callback(...)

47 of 179

47

microphone

48 of 179

48

import pyaudio

49 of 179

49

# Define recording parameters

FORMAT = pyaudio.paInt16

CHANNELS = 1

RATE = 44100

CHUNK = 1024

50 of 179

50

# Get access to the microphone

audio = pyaudio.PyAudio()

51 of 179

51

# Start listening

stream = audio.open(...)

52 of 179

52

# Read a chunk of frames

stream.read(CHUNK)

53 of 179

53

# Stop and close stream

stream.stop_stream()

stream.close()

54 of 179

54

# Terminate access to microphone

audio.terminate()

55 of 179

55

keyboard

56 of 179

56

import keyboard

57 of 179

57

# Define a callback function for a key

def record_audio():

print("Recording audio…")

58 of 179

58

# Add key listener

keyboard.add_hotkey("r", record_audio)

59 of 179

59

# Wait until a specific key was pressed

keyboard.wait("esc")

60 of 179

60

ACTUATORS

61 of 179

61

62 of 179

62

rgb led

63 of 179

63

led = BrickletRGBLEDV2(UID, ipcon)

led.set_rgb_value(255, 0, 0)

64 of 179

64

OLED display

65 of 179

65

oled = BrickletOLED128x64V2(UID, ipcon)

oled.clear_display()

oled.write_line(0, 0, "Welcome!")

66 of 179

66

speaker

67 of 179

67

import simpleaudio as sa

68 of 179

68

# Create a wave object from .wav-file and play it

wav = sa.WaveObject.from_wave_file("sound.wav")

wav.play().wait_done()

69 of 179

69

COMPUTER VISION

70 of 179

70

finding oranges in images

?

output

71 of 179

71

Image source: Wikimedia

72 of 179

72

Image source: Wikimedia

Image source: Wikimedia

73 of 179

73

what set of rules can solve this?

74 of 179

74

machine learning algorithms

75 of 179

75

rule-based program

rules

answer

data

76 of 179

76

rule-based program

rules

answer

data

machine learning

rules

data

answers

77 of 179

77

images in a computer

78 of 179

78

79 of 179

79

80 of 179

80

81 of 179

81

82 of 179

82

83 of 179

83

?

84 of 179

84

R

G

B

85 of 179

85

R

G

B

172

137

9

86 of 179

86

image classification

87 of 179

87

Q: Does an image belong to one or the other class from a fixed set of classes?

88 of 179

88

Cat or Dog?

model

"cat"

89 of 179

89

Cat or Dog?

model

"cat"

model

"dog"

90 of 179

90

Google's teachable machine

https://teachablemachine.withgoogle.com

91 of 179

91

pip install keras

pip install tensorflow==2.12.0

92 of 179

92

# Load the classifier and class names

model = load_model("my_model.h5")

class_names = open("labels.txt", "r").readlines()

93 of 179

93

# Convert the image t0 224 x 224

image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)

# Turn into a list of pixels

image = np.asarray(image, dtype=np.float32).reshape(1, 224, 224, 3)

# Normalize each pixel's color value (-1/1)

image = (image / 127.5) - 1

94 of 179

94

# Make a prediction for the class

prediction = model.predict(image)

# Get the class with the highest confidence value

index = np.argmax(prediction)

class_name = class_names[index]

# Get the confidence score for the predicted class

confidence_score = prediction[0][index]

95 of 179

95

96 of 179

96

YOLO v8 Image Classification

https://docs.ultralytics.com/

97 of 179

97

pip install ultralytics

98 of 179

98

# Load the classifier

from ultralytics import YOLO

model = YOLO("yolov8n-cls.pt")

99 of 179

99

# Make a prediction

results = model('cat.jpg')

100 of 179

100

# Show result

results[0].show()

101 of 179

101

# Get the top result

top = results[0].probs.top1

class_name = results[0].names[top]

print(class_name)

102 of 179

102

zero-shot image classification

103 of 179

103

Q: Which classes do you train your model on?

104 of 179

104

GPT-4 Vision

105 of 179

105

pip install openai

106 of 179

106

# import openai API and set api key

from openai import OpenAI

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

107 of 179

107

# define a suitable prompt for the task

prompt = "Classify the image into 'dog' or 'cat'. Return only the word for the class of the image."

108 of 179

108

# This function is needed to encode an image to base64 for OpenAI's API

def encode_image(image_path):

with open(image_path, "rb") as image_file:

return base64.b64encode(image_file.read()).decode('utf-8')

image_path = "cat.webp"

image = encode_image(image_path)

109 of 179

109

response = client.chat.completions.create(

model="gpt-4-turbo",

messages = [

{ "role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

]

}

],

max_tokens=300,

)

110 of 179

110

# Show the answer of the classification

print(response.choices[0].message.content)

111 of 179

111

object detection

112 of 179

112

Q: Which objects are in the image and where?

113 of 179

113

AI

dog

bee

114 of 179

114

AI

cat

frog

115 of 179

115

YOLO v8 Object Detection

https://docs.ultralytics.com/

116 of 179

116

# Load the detector

from ultralytics import YOLO

model = YOLO("yolov8n.pt")

117 of 179

117

# Make a prediction one each frame

results = model(frame)

# Annotate frame

annotated_frame = results[0].plot()

118 of 179

118

119 of 179

119

Q: Which objects do you teach your model to recognize?

120 of 179

120

zero-shot object detection

121 of 179

121

"Simple Open-Vocabulary Object Detection with Vision Transformers"��https://arxiv.org/abs/2205.06230

122 of 179

122

# Load the open world detector

from ultralytics import YOLO

model = YOLO("yolov8s-world.pt")

123 of 179

123

# Define custom objects to look for

model.set_classes(["person with glasses"])

124 of 179

124

# Make a prediction one each frame

results = model(frame)

# Annotate frame

annotated_frame = results[0].plot()

125 of 179

125

optical character

recognition (OCR)

126 of 179

126

tesseract

127 of 179

127

GPT-4 Vision

128 of 179

128

# define a suitable prompt for the task

prompt = "Extract all food and beverage items with their quantity and price from this receipt into a JSON list. The receipt is in German."

129 of 179

129

response = client.chat.completions.create(

model="gpt-4o",

response_format={ "type": "json_object" },

messages = [

{ "role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

]

}

],

max_tokens=300,

)

130 of 179

130

GENERATIVE AI

131 of 179

131

LARGE LANGUAGE MODELS

132 of 179

132

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

133 of 179

133

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

134 of 179

134

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

(filter)

(discriminating, insulting content)

135 of 179

135

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

next word (token)

(filter)

(discriminating, insulting content)

136 of 179

136

what has been said so far?

(history + prompt)

prediction of next token based on learnt probability distribution

(randomness)

next word (token)

(filter)

(discriminating, insulting content)

137 of 179

137

PROMPTING

138 of 179

138

Prompt

Answer

Language

Model

139 of 179

139

elements of a prompt

<instruction>

<context>

<input data>

<output indicator>

140 of 179

140

example prompt

Explain the binary number system.

elements of a prompt

<instruction>

<context>

<input data>

<output indicator>

141 of 179

141

example prompt

Explain the binary number system.

elements of a prompt

<instruction>

<context>

<input data>

<output indicator>

start simple

142 of 179

142

example prompt

You are a friendly tutor and your task is to explain complex concepts as simple as possible.

Explain the binary number system.

elements of a prompt

<instruction>

<context>

<input data>

<output indicator>

143 of 179

143

example prompt

You are a friendly tutor and your task is to explain complex concepts as simple as possible.

Your answers are never longer than 10 sentence.

Explain the binary number system.

elements of a prompt

<instruction>

<context>

<input data>

<output indicator>

144 of 179

144

ZERO-SHOT PROMPTING

145 of 179

145

example prompt

Classify the text into neutral, negative or positive.

Text: "What a great dinner!"

Sentiment:

elements of a prompt

<instruction>

<context>

<input data>

<output indicator>

146 of 179

146

example prompt

Classify the text into neutral, negative or positive.

Text: "What a great dinner!"

Sentiment:

elements of a prompt

<instruction>

<context>

<input data>

<output indicator>

this will be replaced with data later…

147 of 179

147

FEW-SHOT PROMPTING

IN-CONTEXT LEARNING

148 of 179

148

examples in the context to learn from

Extract all references to countries and their continent in the following text using the format from the examples below.

Example 1: "They played the team called 'Die Mannschaft' in the world cup final"

Correct answer: Germany, Europe

Example 2: "The Three Lions once again lost to Germany in a semi final"

Correct answer: England, Europe, Germany, Europe

Text: "The Selecao was destroyed 1:7 by the DFB selection in their home stadium."

Answer:

149 of 179

149

examples in the context to learn from

Extract all references to countries and their continent in the following text using the format from the examples below.

Example 1: "They played the team called 'Die Mannschaft' in the world cup final"

Correct answer: Germany, Europe

Example 2: "The Three Lions once again lost to Germany in a semi final"

Correct answer: England, Europe, Germany, Europe

Text: "The Selecao was destroyed 1:7 by the DFB selection in their home stadium."

Answer:

150 of 179

150

more prompting strategies

chain-of-thought (CoT)

self-consistency

generate knowledge prompting

prompt chaining (subtasks)

tree-of-thoughts (ToT)

retrieval-augmented-generation (RAG)

…

151 of 179

151

OpenAI

152 of 179

152

pip install openai

153 of 179

153

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "<YOUR_API_KEY>"

client = OpenAI()

154 of 179

154

# define a system message

system_message = """� You are a world-famous 5-star chef. Based on ingredients the user has at home,

you suggest easy-to-cook recipes. """

155 of 179

155

# define a prompt for the task

prompt = """� Suggest a recipe for lunch.

List of ingredients:

- butter

- eggs

- flour

- salt

- milk

Recipe: """

156 of 179

156

response = client.chat.completions.create(

model="gpt-4o",

messages = [

{"role": "system", "content": system_message },

{"role": "user", "content": prompt },]

}

],

max_tokens=2000

)

157 of 179

157

USER INTERFACES

158 of 179

158

streamlit

https://docs.streamlit.io/ # official documentation

https://streamlit.io/components # third-party extensions

159 of 179

159

pip install streamlit

160 of 179

160

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

161 of 179

161

entry point to our UI

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

162 of 179

162

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

more pages in our app

entry point to our UI

163 of 179

163

- Home.py

- pages/

- 1_Speech.py

- 2_Webcam.py

- 3_Microphone.py

- lib/

- speech_to_text.py

- text_to_speech.py

- vision.py

entry point to our UI

our custom functions we'd like to use from our UI

more pages in our app

164 of 179

164

Home.py

import streamlit as st

st.title("My first UI")

st.write("This is a simple UI for prototyping our application.")

name = st.text_input("Enter your name")

if st.button("Greet me"):

st.write(f"Hello {name} 🤞")

165 of 179

165

Home.py

import streamlit as st

st.title("My first UI")

st.write("This is a simple UI for prototyping our application.")

name = st.text_input("Enter your name")

if st.button("Greet me"):

st.write(f"Hello {name} 🤞")

166 of 179

166

1_Speech.py

import streamlit as st

from pages.lib.text_to_speech import text_to_speech

st.title("Speech demo")

st.write("Enter a text and it will be converted to speech.")

text = st.text_input("Enter some text")

voice = st.selectbox("Select a voice", ["alloy", … "shimmer"])

if st.button("Turn to speech"):

audio_file = text_to_speech(text, voice=voice)

st.audio(audio_file.as_posix(), format="audio/mpeg")

167 of 179

167

1_Speech.py

import streamlit as st

from pages.lib.text_to_speech import text_to_speech

st.title("Speech demo")

st.write("Enter a text and it will be converted to speech.")

text = st.text_input("Enter some text")

voice = st.selectbox("Select a voice", ["alloy", … "shimmer"])

if st.button("Turn to speech"):

audio_file = text_to_speech(text, voice=voice)

st.audio(audio_file.as_posix(), format="audio/mpeg")

from lib/text_to_speech.py

168 of 179

168

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def text_to_speech(text, voice="alloy"):

speech_file_path = Path(__file__).parent / "speech.mp3"

response = client.audio.speech.create(

model="tts-1",

voice=voice,

input=text

)

response.write_to_file(speech_file_path)

return speech_file_path

lib/text_to_speech.py

169 of 179

169

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def text_to_speech(text, voice="alloy"):

speech_file_path = Path(__file__).parent / "speech.mp3"

response = client.audio.speech.create(

model="tts-1",

voice=voice,

input=text

)

response.write_to_file(speech_file_path)

return speech_file_path

lib/text_to_speech.py

setup OpenAI API

170 of 179

170

lib/text_to_speech.py

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def text_to_speech(text, voice="alloy"):

speech_file_path = Path(__file__).parent / "speech.mp3"

response = client.audio.speech.create(

model="tts-1",

voice=voice,

input=text

)

response.write_to_file(speech_file_path)

return speech_file_path

setup OpenAI API

define custom

function

171 of 179

171

2_Webcam.py

import streamlit as st

from pages.lib.vision import ask_gpt4o

st.title("Video camera test")

picture = st.camera_input("Take a picture")

if picture:

st.image(picture)

answer = ask_gpt4o("What is in this picture?", picture)

st.write(answer)

172 of 179

172

2_Webcam.py

import streamlit as st

from pages.lib.vision import ask_gpt4o

st.title("Video camera test")

picture = st.camera_input("Take a picture")

if picture:

st.image(picture)

answer = ask_gpt4o("What is in this picture?", picture)

st.write(answer)

from lib/vision.py

173 of 179

173

lib/vision.py

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def encode_image(image_buffer):

def ask_gpt4o(prompt, image_buffer):

image = encode_image(image_buffer)

response = client.chat.completions.create(

model="gpt-4o",

messages=[

{

"role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

],

}

]

)

return response.choices[0].message.content

174 of 179

174

lib/vision.py

from openai import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def encode_image(image_buffer):

def ask_gpt4o(prompt, image_buffer):

image = encode_image(image_buffer)

response = client.chat.completions.create(

model="gpt-4o",

messages=[

{

"role": "user", "content": [

{ "type": "text", "text": prompt },

{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image}" } }

],

}

]

)

return response.choices[0].message.content

setup OpenAI API

define custom

function

175 of 179

175

3_Microphone.py

import streamlit as st

from streamlit_mic_recorder import mic_recorder

from pages.lib.text_to_speech import speech_to_text

st.title("Microphone test")

def callback():

if st.session_state.my_recorder_output:

audio = st.session_state.my_recorder_output

text = text_to_speech(audio)

st.success(text)

audio = mic_recorder(key='my_recorder', callback=callback)

176 of 179

176

3_Microphone.py

import streamlit as st

from streamlit_mic_recorder import mic_recorder

from pages.lib.text_to_speech import speech_to_text

st.title("Microphone test")

def callback():

if st.session_state.my_recorder_output:

audio = st.session_state.my_recorder_output

text = text_to_speech(audio)

st.success(text)

audio = mic_recorder(key='my_recorder', callback=callback)

pip install streamlit-mic-recorder

177 of 179

177

3_Microphone.py

import streamlit as st

from streamlit_mic_recorder import mic_recorder

from pages.lib.text_to_speech import speech_to_text

st.title("Microphone test")

def callback():

if st.session_state.my_recorder_output:

audio = st.session_state.my_recorder_output

text = text_to_speech(audio)

st.success(text)

audio = mic_recorder(key='my_recorder', callback=callback)

from lib/speech_to_text.py

pip install streamlit-mic-recorder

178 of 179

178

lib/speech_to_text.py

from openai import OpenAI

import os

import io

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def speech_to_text(audio):

audio_bio = io.BytesIO(audio['bytes'])

audio_bio.name = 'audio.mp3'

transcription = client.audio.transcriptions.create(

model="whisper-1",

file=audio_bio

)

return transcription.text

179 of 179

179

lib/speech_to_text.py

from openai import OpenAI

import os

import io

os.environ["OPENAI_API_KEY"] = "..."

client = OpenAI()

def speech_to_text(audio):

audio_bio = io.BytesIO(audio['bytes'])

audio_bio.name = 'audio.mp3'

transcription = client.audio.transcriptions.create(

model="whisper-1",

file=audio_bio

)

return transcription.text

setup OpenAI API

define custom

function