File size: 15,493 Bytes
d0e3307
77c4795
d0e3307
77c4795
d0e3307
77c4795
fa44311
10458ab
d0e3307
 
093632f
77c4795
 
d0e3307
 
 
 
 
 
 
 
093632f
77c4795
 
d0e3307
77c4795
 
 
 
d0e3307
 
 
 
 
 
 
9ade43d
d0e3307
 
 
 
 
 
 
 
 
 
 
77c4795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74c9bd0
77c4795
d0e3307
 
77c4795
4f26c60
d0e3307
77c4795
d0e3307
 
77c4795
d0e3307
 
 
 
 
77c4795
d0e3307
 
 
 
 
 
 
 
fa44311
d0e3307
125f8fe
d0e3307
 
 
125f8fe
d0e3307
 
125f8fe
 
 
 
 
 
 
fa44311
125f8fe
4f26c60
10458ab
125f8fe
4f26c60
093632f
d0e3307
 
4c91236
 
 
 
 
 
 
 
 
d31c125
f474755
4d858d6
f474755
fa44311
 
 
4c91236
 
 
 
f474755
d0e3307
fa44311
d0e3307
d31c125
d0e3307
 
 
 
fa44311
f24aba3
 
d0e3307
f24aba3
d0e3307
 
 
 
 
 
 
fa44311
 
b864e48
f474755
4c91236
 
 
d0e3307
 
d31c125
aebb122
d31c125
d0e3307
 
fa44311
5f7776b
 
 
fa44311
 
 
b864e48
 
fa44311
 
f7be280
fa44311
 
 
d0e3307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b942b6b
 
 
 
 
 
 
 
 
 
77c4795
 
d0e3307
b942b6b
77c4795
 
 
b942b6b
9b1726d
 
 
 
 
 
 
 
 
d0e3307
b942b6b
 
 
 
 
 
 
 
 
d0e3307
 
77c4795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0e3307
 
5f8d745
d0e3307
 
5f8d745
 
d0e3307
c309583
35a1eaf
5f8d745
37fd8e1
 
4d858d6
5f7776b
d3ae060
77c4795
d3ae060
ce32f1f
d3ae060
c309583
35a1eaf
37fd8e1
b783618
37fd8e1
 
 
 
35a1eaf
4d858d6
5f8d745
 
d0e3307
5f8d745
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
import json
import random
import time
import pandas as pd
from openai import OpenAI
from Messaging_system.LLMR import LLMR
from Messaging_system.PromptGenerator import PromptGenerator
from Messaging_system.PromptEng import PromptEngine
from Messaging_system.protection_layer import ProtectionLayer
import openai
from Messaging_system.LLM import LLM
from copy import deepcopy
from Messaging_system.Homepage_Recommender import DefaultRec

class MultiMessage:
    def __init__(self, CoreConfig):
        """
        Class that generates a sequence of messages (multi-step push notifications)
        for each user, building on previously generated messages.
        """
        self.Core = CoreConfig
        self.llm = LLM(CoreConfig)
        self.defaultRec = DefaultRec(CoreConfig)
        self.promptGen=PromptGenerator(self.Core)

        if self.Core.involve_recsys_result:
            self.llmr = LLMR(CoreConfig, random=True)

    # ==============================================================
    def generate_multi_messages(self, user):
        """
        Generates multiple messages per user, storing them in a single JSON structure.
        The first message is assumed to already exist in user["message"].
        Subsequent messages are generated by referencing all previously generated ones.
        """
        first_message_str = user.get("message", None)
        if first_message_str is None:
            print("No initial message found; cannot build a multi-message sequence.")
            return None

        try:
            first_message_dict = json.loads(first_message_str)
        except (json.JSONDecodeError, TypeError):
            print("Could not parse the first message as JSON. Returning None.")
            return None

        message_sequence = [first_message_dict]

        # how many total messages you want (self.Core.subsequence_messages is a dict)
        total_configured = len(self.Core.subsequent_examples) + 1 # includes the first
        to_generate = max(0, total_configured - 1)

        # figure out DF index once
        idx = self._get_user_idx(user)

        for i in range(to_generate):
            # The ordinal number of the next message in the sequence (first was #1)
            msg_number = i + 2

            # ---- (A) pick the next recommendation BEFORE generating the text if required ----
            recommendation_info = content_info = recsys_json = None
            zero_tokens = {"prompt_tokens": 0, "completion_tokens": 0}

            if getattr(self.Core, "involve_recsys_result", False):
                rec_info, cinfo, rjson = self.select_next_recommendation(user)
                recommendation_info, content_info, recsys_json = rec_info, cinfo, rjson

                if recommendation_info is None:
                    # fallback
                    content_id = self.defaultRec.recommendation
                    content_info = self.defaultRec.recommendation_info
                    recsys_json = self.defaultRec.for_you_url
                # Update DF and local user snapshot
                user = self._update_user_fields(idx, user,{
                    "recommendation": recommendation_info,
                    "recommendation_info": content_info,
                    "recsys_result": recsys_json
                })

            # ---- (B) actually generate the next message; hand it the UPDATED user ----
            next_msg_raw = self.generate_next_messages(message_sequence, msg_number, user)
            if next_msg_raw is None:
                print(f"Could not generate the message for step {msg_number}. Stopping.")
                break

            # If you have a protection layer, call it here (omitted for brevity)
            criticized_msg = next_msg_raw

            # ---- (C) Parse & validate ----
            parsed_output_str = self.parsing_output_message(criticized_msg, user)
            if not parsed_output_str:
                print(f"Parsing output failed for step {msg_number}. Stopping.")
                break

            try:
                parsed_output_dict = json.loads(parsed_output_str)
            except json.JSONDecodeError:
                print(f"Could not parse the new message as JSON for step {msg_number}. Stopping.")
                break

            message_sequence.append(parsed_output_dict)

        final_structure = {"messages_sequence": message_sequence}
        return json.dumps(final_structure, ensure_ascii=False)

    # --------------------------------------------------------------
    def generate_next_messages(self, previous_messages, step, user):
        """
        Uses only the last two previously generated messages to produce the next message.
        Returns a *raw* dictionary (header, message, etc.) from the LLM.

        :param previous_messages: A list of dicts, each containing at least "header" and "message".
        :param step: The 1-based index of the message we’re about to generate.
        :return: A dictionary from LLM (with 'header' and 'message'), or None if generation fails.
        """
        # Only keep up to the last two messages
        if len(previous_messages) > 2:
            context = previous_messages[-2:]
        else:
            context = previous_messages

        # 1) Build a prompt that includes only those last two messages
        prompt = self.generate_prompt(context, step, user)

        # new_prompt = self.engine.prompt_engineering(prompt)

        # 2) Call our existing LLM routine
        response_dict = self.llm.get_response(prompt=prompt, instructions=self.llm_instructions())

        return response_dict

    # ===============================================================
    def get_examples(self, step):
        """
        providing examples and instructions
        :return:
        """

        if self.Core.subsequent_examples is not None:


            instructions = f"""
Below are the available options to select the best header and message: 
### **Available options:**

    {self.Core.subsequent_examples[step]} 
"""
            return instructions
        else:
            return ""


    # --------------------------------------------------------------
    def generate_prompt(self, previous_messages, step, user):
        """
        Creates a prompt to feed to the LLM, incorporating 3 previously generated messages.

        :param previous_messages: A list of dicts, each containing 'header' and 'message'.
        :return: A user-facing prompt string instructing the model to produce a new message.
        """
        # Build a textual summary of previous messages - last three
        recent_messages = previous_messages[-3:]

        previous_text = []
        for i, m in enumerate(recent_messages, start=1):
            header = m.get("header", "").strip()
            body   = m.get("message", "").strip()
            previous_text.append(f"Message {i}: (Header) {header}\n           (Body) {body}")

        # Combine into a single string
        previous_text_str = "\n\n".join(previous_text)

        user_info = self.promptGen.get_user_profile(user=user)
        input_context = self.promptGen.input_context()
        recommendation_instructions = self.promptGen.recommendations_instructions(user)
        output_instructions = self.promptGen.output_instruction()
        examples = self.get_examples(step)


        # Craft the prompt
        prompt = f"""
We have previously sent these push notifications to the user and The user has not re-engaged yet:

** Previous messages **
{previous_text_str}

{input_context}
- The final header and message  should be different from previous headers and messages and we should not have similar words and phrases from previous sends.

{examples}

{user_info}

{recommendation_instructions}

{output_instructions}
"""

        return prompt

    # =============================================================================
    def parsing_output_message(self, message, user):
        """
        Parses the output JSON from the LLM and enriches it with additional content
        information if needed (e.g., from recsys). Re-uses the logic from the single-message
        pipeline to keep the results consistent.

        :param message: Output JSON *dictionary* from the LLM (with at least "message" and "header").
        :param user: The user row dictionary.
        :return: A valid JSON string or None if the structure is invalid.
        """
        if self.Core.involve_recsys_result:
            # If recsys is used, fetch recommendation data
            output_message = self.fetch_recommendation_data(user, message)
        elif self.Core.messaging_mode == "recommend_playlist":
            # If recommending a playlist, add the relevant fields
            if "playlist_id" in message and "message" in message:
                playlist_id = str(message["playlist_id"])
                web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
                output_message = {
                    "header": message.get("header", ""),
                    "message": message.get("message", ""),
                    "playlist_id": int(message["playlist_id"]),
                    "web_url_path": web_url_path,
                }
            else:
                print("LLM output is missing either 'playlist_id' or 'message'.")
                return None
        else:
            # Basic scenario: Only 'header' and 'message' expected
            if "message" not in message or "header" not in message:
                print("LLM output is missing 'header' or 'message'.")
                return None
            output_message = {
                "header": message["header"],
                "message": message["message"]
            }

        return json.dumps(output_message, ensure_ascii=False)

    # --------------------------------------------------------------
    def fetch_recommendation_data(self, user, message):

        if user["recommendation"] == "for_you":
            output_message = {
                "header": message.get("header"),
                "message": message.get("message"),
                "content_id": None,
                "web_url_path": user["recsys_result"],
                "title": user["recommendation"],
                "thumbnail_url": None
            }
        else:
            recommendation_dict = user["recommendation"]
            content_id = int(recommendation_dict["content_id"])

            # Extract required fields from found_item
            web_url_path = recommendation_dict["web_url_path"]
            title = recommendation_dict["title"]
            thumbnail_url = recommendation_dict["thumbnail_url"]

            msg = message.get("message")
            if isinstance(msg, str):
                msg = msg.replace('\\', '').replace('"', '')
            else:
                msg = str(msg)  # or handle it differently if this shouldn't happen

            message["message"] = msg

            # message["message"].replace('\\', '').replace('"', '')

            # Add these to the message dict
            output_message = {
                "header": message.get("header"),
                "message": message.get("message"),
                "content_id": content_id,
                "web_url_path": web_url_path,
                "title": title,
                "thumbnail_url": thumbnail_url
            }
        return output_message

    # ===============================================================
    def _remove_from_all(self, recsys_dict, cid):
        for sec, recs in list(recsys_dict.items()):
            if isinstance(recs, list):
                recsys_dict[sec] = [r for r in recs if r.get("content_id") != cid]
        return recsys_dict

    # ===============================================================
    def _lookup_content_info(self, cid):
        row = self.Core.content_info[self.Core.content_info["content_id"] == cid]
        return row["content_info"].iloc[0] if not row.empty else None

    # ===============================================================

    def select_next_recommendation(self, user):
        """
        Select next recommendation from the user's current recsys_result.
        Returns: content_id, content_info, updated_recsys_json
        """
        self.llmr.user = user  # _get_recommendation expects self.user to be set
        cid, cinfo, updated_json, _ = self.llmr._get_recommendation()
        return cid, cinfo, updated_json

    # ==============================================================
    def _get_user_idx(self, u):
        # If it's a Series, its index label is usually the row index
        if isinstance(u, pd.Series) and u.name in self.Core.users_df.index:
            return u.name
        # Otherwise try a stable key like user_id (change if your key is different)
        key_col = "user_id" if "user_id" in self.Core.users_df.columns else None
        if key_col and key_col in u:
            matches = self.Core.users_df.index[self.Core.users_df[key_col] == u[key_col]]
            if len(matches):
                return matches[0]
        # Fallback: try exact row equality (last resort; slower)
        try:
            return self.Core.users_df.index[self.Core.users_df.eq(pd.Series(u)).all(1)][0]
        except Exception:
            return None
    # =============================================================
    def _update_user_fields(self, idx, user, fields: dict):
        """Update DF row and return a fresh copy of the user row (Series) with those fields reflected."""
        if idx is None:
            # no index? just mutate the local dict/Series
            for k, v in fields.items():
                user[k] = v
            return user
        for k, v in fields.items():
            self.Core.users_df.at[idx, k] = v
        return self.Core.users_df.loc[idx]

    # --------------------------------------------------------------
    # --------------------------------------------------------------

    def llm_instructions(self):
        """
        Setting instructions for llm
        :return: instructions as string
        """
        banned_phrases = "\n".join(f"- {word}" for word in self.Core.config_file["AI_Jargon"])
        jargon_list = "\n".join(f"- {word}" for word in self.Core.config_file[f"AI_phrases_{self.Core.brand}"])

        if self.Core.personalization:
            instructions = f"""
Your task is to select the best 'header' and a 'message' for a {self.Core.get_instrument()} student as a push notification. 
Based on the user instructions, you might need to **modify the selected option** very minimal and slightly to improve personalization if capable while preserving the original brand voice, tone, rhythm, and structure.

**Important Note**: header < {self.Core.config_file["header_limit"]} and message < {self.Core.config_file["message_limit"]} characters.
**Important Note**: NEVER use time-related words (“new,” “recent,” “latest,” etc.) and NEVER imply recency in any way.

### Don't use below phrases, words, or similar variations of them:
{banned_phrases}
{jargon_list}
"""

        else:
            instructions = f"""
Your task is to select the best 'header' and a 'message' for a {self.Core.get_instrument()} student as a push notification. 
DO NOT **change** or **modify** or **add to** the selected option in any shape or form. **Use the exact original selected header and message without ANY change**

"""


        return instructions