llmware/examples/UI/multimedia_bot.py at main · llmware-ai/llmware · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169

""" This example shows a multimedia bot created in less than 100 lines of code that
leverages the CPU, GPU and NPU

    -- designed to run on an AI PC with Intel Lunar Lake with CPU, GPU and NPU
    -- if you do not have GPU, it will auto-fallback to CPU
    -- if you do not have NPU, you can change the option to GPU

    To run this example, we will need the following dependencies in addition to llmware:

    -- pip3 install openvino_genai
    -- pip3 install pywebio

"""

from llmware.models import ModelCatalog
from llmware.configs import LLMWareConfig

import os
import threading

from pywebio.input import input_group, textarea, actions
from pywebio.output import put_text, put_markdown, put_image, use_scope, put_info
from pywebio.session import set_env


def text_gen_bot(**kwargs):

    """ Simple text generation streaming bot - will run using GGUF on CPU """

    user_msg = kwargs.get("user_msg", "")
    img_counter = kwargs.get("img_counter", 0)

    # llmware load_model
    text_gen_model = ModelCatalog().load_model("phi-3-gguf",
                                               max_output=200)

    inst = "Complete this story: "
    prompt = inst + user_msg
    text_output = ""

    with use_scope(f"text_gen" + str(img_counter)):

        # llmware stream generation
        for token in text_gen_model.stream(prompt):
            put_text(token, inline=True)
            text_output += token

        put_text("\nTo be continued ...")

    # for demo example, we will write the text from the thread to a tmp file
    fp = os.path.join(LLMWareConfig().get_llmware_path(), "txt_tmp.txt")
    if os.path.exists(fp):
        os.remove(fp)
    f = open(fp, "w")
    f.write(text_output)
    f.close()

    return text_output


def image_gen_bot(**kwargs):

    """ Image generation bot that will run on GPU. """

    user_msg = kwargs.get("user_msg", "")
    img_counter = kwargs.get("img_counter", 0)

    # llmware load_model
    model = ModelCatalog().load_model("lcm-dreamshaper-ov")

    inst = "Draw an image: "
    prompt = inst + user_msg

    # specialized pipeline on the model
    img_path = model.text_to_image_gen(prompt, f"test_image_{img_counter}")
    content = open(img_path, "rb").read()

    # display the image on the screen with pywebio
    with use_scope(f"img_gen" + str(img_counter)):
        put_image(content)

    return img_path


def classifier_agent_bot(**kwargs):

    """ Simple classification agent running on NPU """

    text_output = kwargs.get("text_output", "")
    npu_model = kwargs.get("npu_model", None)

    # pass the model to the thread - and execute a function call
    response = npu_model.function_call(text_output)

    put_text("\n\nNPU Classification Agent: " + str(response["llm_response"]))

    return True


def run_bot():

    """ Main function - starts a user prompt loop, and then kicks off
    three threads in parallel on CPU, GPU and NPU. """

    set_env(input_panel_fixed=False, output_animation=False)
    put_markdown("""# Multimedia Bot with LLMWare, OpenVINO, & PyWebio""")

    img_counter = 0
    start_bot = True

    while start_bot:

        # user input chat box

        form = input_group('', [
            textarea(name='msg', placeholder='Ask LLMWare Bot', rows=3),
            actions(name='cmd', buttons=['Send', 'Exit'])
        ])

        if form['cmd'] == "Exit":
            start_bot = False
            break

        user_msg = form['msg']

        # display the user prompt
        put_info(user_msg)

        # thread 1 - CPU - text gen
        text_gen_thread = threading.Thread(target=text_gen_bot,
                                           kwargs={"user_msg": user_msg,
                                                   "img_counter": img_counter})
        text_gen_thread.start()

        # thread 2 - GPU - text to image gen
        image_gen_thread = threading.Thread(target=image_gen_bot,
                                            kwargs={"user_msg": user_msg,
                                                    "img_counter": img_counter})
        image_gen_thread.start()

        # load the npu model in main and pass to thread
        npu_model = ModelCatalog().load_model("slim-topics-npu-ov",
                                              sample=False,temperature=0.0,
                                              device="NPU")

        image_gen_thread.join()
        text_gen_thread.join()

        # pull the text output file created in the text gen thread
        fp = os.path.join(LLMWareConfig().get_llmware_path(), "txt_tmp.txt")
        text_output = ""
        if os.path.exists(fp):
            text_output = open(fp, "r").read()

        # kick off NPU thread
        npu_gen_thread = threading.Thread(target=classifier_agent_bot,
                                          kwargs={"text_output": text_output,
                                                  "npu_model": npu_model})

        npu_gen_thread.start()

        img_counter += 1

    return True


if __name__ == "__main__":
    run_bot()