Spaces:

Mahbodez
/

knee_report_checklist

Runtime error

App Files Files Community

Mahbodez commited on Jul 10, 2023

Commit

1d80bec

1 Parent(s): 1a15844

Upload 5 files

Browse files

Files changed (5) hide show

app.py +65 -0
interface.py +406 -0
knee_template.json +359 -0
treegraph.py +226 -0
utils.py +361 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import gradio as gr
+import interface
+import utils
+import treegraph as tg
+system_prompt = """
+You are a critical AI radiology assistant.
+You are helping a radiologist correctly fill out a radiology report.
+The report is regarding a Knee MRI.
+"""
+graph, nodes_dict = tg.build_tree_from_file("knee_template.json")
+report_interface = interface.ReportChecklistInterface(
+    llm=utils.LLM(model="gpt-3.5-turbo"),
+    system_prompt=system_prompt,
+    graph=graph,
+    nodes_dict=nodes_dict,
+)
+if report_interface.prime_model() is False:
+    print("Model priming failed. Please try again.")
+    exit()
+else:
+    print("Model priming successful.")
+with gr.Blocks(theme="soft") as demo:
+    gr.Markdown("## Radiology Report Assistant")
+    gr.Markdown(report_interface.help_message)
+    running = gr.components.Variable(True)
+    report_textbox = gr.TextArea(label="Report", lines=20, max_lines=50)
+    check_btn = gr.Button(
+        value="Check Report",
+    )
+    clear_btn = gr.ClearButton(
+        value="Clear Messages",
+    )
+    quit_btn = gr.Button(
+        value="Quit",
+    )
+    results_textbox = gr.TextArea(label="Results", lines=20, max_lines=50)
+    clear_btn.add([results_textbox, report_textbox])
+    def check_report(report):
+        if running:
+            results = report_interface.process_input(report)
+            if results == "quit":
+                quit_fn()
+            elif results == "help":
+                return report_interface.help_message
+            elif results == "exception":
+                return "An exception occurred. Please try again."
+            else:
+                return results
+        else:
+            return "Model has been stopped."
+    def quit_fn():
+        running.value = False
+        results_textbox.value = "Model has been stopped."
+    check_btn.click(fn=check_report, inputs=[report_textbox], outputs=[results_textbox])
+    quit_btn.click(fn=quit_fn)
+demo.launch()

interface.py ADDED Viewed

	@@ -0,0 +1,406 @@

+import json
+import numpy as np
+import treegraph as tg
+import colorama
+from colorama import Fore
+import networkx as nx
+import utils
+import re
+DEBUG = True
+INPUT_COLOR = Fore.LIGHTGREEN_EX
+DEBUG_COLOR = Fore.LIGHTBLACK_EX
+OUTPUT_COLOR = Fore.LIGHTMAGENTA_EX
+INFO_COLOR = Fore.BLUE
+HELP_COLOR = Fore.CYAN
+def print_debug(*args, color=DEBUG_COLOR):
+    """
+    Prints debug messages if DEBUG is set to True.
+    """
+    if DEBUG:
+        for arg in args:
+            print(color + str(arg))
+class ReportInterface:
+    def __init__(
+        self,
+        llm: utils.LLM,
+        system_prompt: str,
+        tree_graph: nx.Graph,
+        nodes_dict: dict[str, tg.Node],
+        api_key: str = None,
+    ):
+        self.llm = llm
+        self.system_prompt = system_prompt
+        self.tree_graph = tree_graph
+        self.nodes_dict = nodes_dict
+        self.api_key = api_key
+        self.build()
+    def build(self):
+        utils.set_api_key(self.api_key)
+        self.system_prompt = utils.make_message("system", self.system_prompt)
+        self.visitable_nodes = self._get_visitable_nodes()
+        self.report_dict = self._get_report_dict()
+        self.active_node: tg.Node = self.nodes_dict["root"]
+        self.unique_visited_nodes = set()  # set of nodes visited
+        self.node_journey = []  # list of nodes visited
+        self.distance_travelled = 0  # number of edges travelled
+        self.jumps = 0  # number of jumps
+        self.jump_lengths = []  # list of jump lengths
+        self.counter = 0  # number of questions asked
+        colorama.init(autoreset=True)  # to reset the color after each print statement
+        self.help_message = f"""You are presented with a Knee MRI.
+        You are asked to fill out a radiology report.
+        Please only report the findings in the MRI.
+        Please mention your findings with the corresponding anatomical structures.
+        There are {len(self.visitable_nodes.keys())} visitable nodes in the tree.
+        You must visit as many nodes as possible, while avoiding too many jumps."""
+    def _get_visitable_nodes(self):
+        return dict(
+            zip(
+                [
+                    node.name
+                    for node in self.tree_graph.nodes
+                    if node.name != "root" and node.has_children() is False
+                ],
+                [
+                    node
+                    for node in self.tree_graph.nodes
+                    if node.name != "root" and node.has_children() is False
+                ],
+            )
+        )
+    def _get_report_dict(self):
+        return {
+            node.name: tg.Node(node.name, "", node.children)
+            for node in self.visitable_nodes.values()
+        }
+    @utils.debug(DEBUG, print_debug)
+    def _check_question_validity(
+        self,
+        question: str,
+    ):
+        # let's ask the question from the model and check if it's valid
+        template_json = json.dumps(
+            {key: node.value for key, node in self.visitable_nodes.items()},
+            indent=4,
+        )
+        q = f"""the following is a Knee MRI report "template" in a JSON format with keys and values.
+        You are given a "finding" phrase from a radiologist.
+        Match as best as possible the "finding" with one of keys in the "template".
+        <template>
+        {template_json}
+        </template>
+        <finding>
+        {question}
+        </finding>
+        "available": [Is the "finding" relevant to any key in the "template"? say "yes" or "no".
+        Make sure the "finding" is relevant to Knee MRI and knee anatomy otherwise say 'no'.
+        Do not answer irrelevant phrases.]
+        "node": [if the above answer is 'yes', write only the KEY of the most relevant node to the "finding". otherwise, say 'none'.]
+        """
+        keys = ["available", "node"]
+        prompt = [self.system_prompt] + [
+            utils.make_question(utils.JSON_TEMPLATE, question=q, keys=keys)
+        ]
+        response = self.llm(prompt)
+        print_debug(
+            prompt,
+            response,
+        )
+        available = utils.json2dict(response)["available"].strip().lower()
+        node = utils.json2dict(response)["node"]
+        return available, node
+    def _update_node(self, node_name, findings):
+        self.report_dict[node_name].value += str(findings) + "\n"
+        response = f"Updated node '{node_name}' with finding '{findings}'"
+        print(OUTPUT_COLOR + response)
+        return response
+    def save_report(self, filename: str):
+        # convert performance metrics to json
+        metrics = {
+            "distance_travelled": self.distance_travelled,
+            "jumps": self.jumps,
+            "jump_lengths": self.jump_lengths,
+            "unique_visited_nodes": [node.name for node in self.unique_visited_nodes],
+            "node_journey": [node.name for node in self.node_journey],
+            "report": {
+                node_name: node.value for node_name, node in self.report_dict.items()
+            },
+        }
+        # save the report
+        with open(filename, "w") as file:
+            json.dump(metrics, file, indent=4)
+    def prime_model(self):
+        """
+        Primes the model with the system prompt.
+        """
+        q = "Are you ready to begin?\nSay 'yes' or 'no'."
+        keys = ["answer"]
+        response = self.llm(
+            [
+                self.system_prompt,
+                utils.make_question(utils.JSON_TEMPLATE, question=q, keys=keys),
+            ],
+        )
+        print_debug(q, response)
+        if utils.json2dict(response)["answer"].lower() == "yes":
+            print(INFO_COLOR + "The model is ready.")
+            return True
+        else:
+            print(INFO_COLOR + "The model is not ready.")
+            return False
+    def performance_summary(self):
+        # print out the summary info
+        print(INFO_COLOR + "Performance Summary:")
+        print(
+            INFO_COLOR + f"Total distance travelled: {self.distance_travelled} edge(s)"
+        )
+        print(INFO_COLOR + f"Jump lengths: {self.jump_lengths}")
+        print(INFO_COLOR + f"Jump lengths mean: {np.mean(self.jump_lengths):.1f}")
+        print(INFO_COLOR + f"Jump lengths SD: {np.std(self.jump_lengths):.1f}")
+        print(INFO_COLOR + f"Nodes visited in order: {self.node_journey}")
+        print(INFO_COLOR + f"Unique nodes visited: {self.unique_visited_nodes}")
+        print(
+            INFO_COLOR
+            + f"You have explored {len(self.unique_visited_nodes)/len(self.visitable_nodes):.1%} ({len(self.unique_visited_nodes)}/{len(self.visitable_nodes)}) of the tree."
+        )
+        print_debug("\n")
+        print_debug("Report Summary:".rjust(20))
+        for name, node in self.report_dict.items():
+            if node.value != "":
+                print_debug(f"{name}: {node.value}")
+        print(INFO_COLOR + f"total cost: ${self.llm.cost:.4f}")
+        print(INFO_COLOR + f"total tokens used: {self.llm.token_counter}")
+    def get_stats(self):
+        report_string = ""
+        for name, node in self.report_dict.items():
+            if node.value != "":
+                report_string += f"{name}: <{node.value}> \n"
+        return {
+            "Lengths travelled": self.distance_travelled,
+            "Number of jumps": self.jumps,
+            "Jump lengths": self.jump_lengths,
+            "Unique nodes visited": [node.name for node in self.unique_visited_nodes],
+            "Visited Nodes": [node.name for node in self.node_journey],
+            "Report": report_string,
+        }
+    def visualize_tree(self, **kwargs):
+        tg.visualize_graph(tg.from_list(self.node_journey), self.tree_graph, **kwargs)
+    def get_plot(self, **kwargs):
+        return tg.get_graph(tg.from_list(self.node_journey), self.tree_graph, **kwargs)
+    def process_input(self, input_text: str):
+        res = "n/a"
+        try:
+            finding = input_text
+            if finding.strip().lower() == "quit":
+                print(INFO_COLOR + "Exiting...")
+                return "quit"
+            elif finding.strip().lower() == "help":
+                return "help"
+            available, node = self._check_question_validity(finding)
+            if available != "yes":
+                print(
+                    OUTPUT_COLOR
+                    + "Could not find a relevant node.\nWrite more clearly and provide more details."
+                )
+                return "n/a"
+            if node not in self.visitable_nodes.keys():
+                print(
+                    OUTPUT_COLOR
+                    + "Could not find a relevant node.\nWrite more clearly and provide more details."
+                )
+                return "n/a"
+            else:
+                # modify the tree to update the node with findings
+                res = self._update_node(node, finding)
+                print(
+                    INFO_COLOR
+                    + f"jumping from node '{self.active_node}' to node '{node}'..."
+                )
+                distance = tg.num_edges_between_nodes(
+                    self.tree_graph, self.active_node, self.nodes_dict[node]
+                )
+                print(INFO_COLOR + f"distance travelled: {distance} edge(s)")
+                self.active_node = self.nodes_dict[node]
+                self.jumps += 1
+                self.jump_lengths.append(distance)
+                self.distance_travelled += distance
+                if self.active_node.name != "root":
+                    self.unique_visited_nodes.add(self.active_node)
+                    self.node_journey.append(self.active_node)
+        except Exception as ex:
+            print_debug(ex, color=Fore.LIGHTRED_EX)
+            return "exception"
+        self.counter += 1
+        try:
+            self.performance_summary()
+        except Exception as ex:
+            print_debug(ex, color=Fore.LIGHTRED_EX)
+        return res
+class ReportChecklistInterface:
+    def __init__(
+        self,
+        llm: utils.LLM,
+        system_prompt: str,
+        graph: nx.Graph,
+        nodes_dict: dict[str, tg.Node],
+        api_key: str = None,
+    ):
+        self.llm = llm
+        self.system_prompt = system_prompt
+        self.tree_graph: nx.Graph = graph
+        self.nodes_dict = nodes_dict
+        self.api_key = api_key
+        self.build()
+    def build(self):
+        utils.set_api_key(self.api_key)
+        self.system_prompt = utils.make_message("system", self.system_prompt)
+        self.visitable_nodes = self._get_visitable_nodes()
+        colorama.init(autoreset=True)  # to reset the color after each print statement
+        self.help_message = f"""You are presented with a Knee MRI.
+        You are asked to fill out a radiology report.
+        Please only report the findings in the MRI.
+        Please mention your findings with the corresponding anatomical structures.
+        There are {len(self.visitable_nodes.keys())} visitable nodes in the tree."""
+    def _get_visitable_nodes(self):
+        return dict(
+            zip(
+                [
+                    node.name
+                    for node in self.tree_graph.nodes
+                    if node.name != "root" and node.has_children() is False
+                ],
+                [
+                    node
+                    for node in self.tree_graph.nodes
+                    if node.name != "root" and node.has_children() is False
+                ],
+            )
+        )
+    @utils.debug(DEBUG, print_debug)
+    def _check_report(
+        self,
+        report: str,
+    ):
+        # let's ask the question from the model and check if it's valid
+        checklist_json = json.dumps(
+            {key: node.value for key, node in self.visitable_nodes.items()},
+            indent=4,
+        )
+        q = f"""the following is a Knee MRI "checklist" in JSON format with keys as items and values as findings:
+        A knee MRI "report" is also provided in raw text format written by a radiologist:
+        <checklist>
+        {checklist_json}
+        </checklist>
+        <report>
+        {report}
+        </report>
+        Your task is to find all the corresponding items from the "checklist" in the "report" and fill out a JSON with the same keys as the "checklist" but extract the corresponding values from the "report".
+        If a key is not found in the "report", please set the value to "n/a", otherwise set it to the corresponding finding from the "report".
+        You must check the "report" phrases one by one and find a corresponding key(s) for EACH phrase in the "report" from the "checklist" and fill out the "report_checked" JSON.
+        Try to fill out as many items as possible.
+        ALL of the items in the "checklist" must be filled out.
+        Don't generate findings that are not present in the "report" (new findings).
+        Be comprehensive and don't miss any findings that are present in the "report".
+        Watch out for encompassing terms (e.g., "cruciate ligaments" means both "ACL" and "PCL").
+        "thought_process": [Think in steps on how you would do this task.]
+        "report_ckecked" : [a JSON with the same keys as the "checklist" but take the values from the "report", as described above.]
+        """
+        keys = ["thought_process", "report_checked"]
+        prompt = [self.system_prompt] + [
+            utils.make_question(utils.JSON_TEMPLATE, question=q, keys=keys)
+        ]
+        response = self.llm(prompt)
+        print_debug(
+            prompt,
+            response,
+        )
+        report_checked = utils.json2dict(response)
+        return report_checked["report_checked"]
+    def prime_model(self):
+        """
+        Primes the model with the system prompt.
+        """
+        q = "Are you ready to begin?\nSay 'yes' or 'no'."
+        keys = ["answer"]
+        response = self.llm(
+            [
+                self.system_prompt,
+                utils.make_question(utils.JSON_TEMPLATE, question=q, keys=keys),
+            ],
+        )
+        print_debug(q, response)
+        if utils.json2dict(response)["answer"].lower() == "yes":
+            print(INFO_COLOR + "The model is ready.")
+            return True
+        else:
+            print(INFO_COLOR + "The model is not ready.")
+            return False
+    def process_input(self, input_text: str):
+        try:
+            report = input_text
+            if report.strip().lower() == "quit":
+                print(INFO_COLOR + "Exiting...")
+                return "quit"
+            elif report.strip().lower() == "help":
+                return "help"
+            checked_report: dict = self._check_report(report)
+            # make a string of the report
+            # replace true with [checkmark emoji] and false with [cross emoji]
+            report_string = ""
+            CHECKMARK = "\u2705"
+            CROSS = "\u274C"
+            # we need a regex to convert the camelCase keys to a readable format
+            def camel2readable(camel: str):
+                string = re.sub("([a-z])([A-Z])", r"\1 \2", camel)
+                # captialize every word
+                string = " ".join([word.capitalize() for word in string.split()])
+                return string
+            for key, value in checked_report.items():
+                if str(value).lower() == "true":
+                    report_string += f"{camel2readable(key)}: {CHECKMARK}\n"
+                elif str(value).lower() == "n/a":
+                    report_string += f"{camel2readable(key)}: {CROSS}\n"
+                else:
+                    report_string += f"{camel2readable(key)}: <{value}> {CHECKMARK}\n"
+            return report_string
+        except Exception as ex:
+            print_debug(ex, color=Fore.LIGHTRED_EX)
+            return "exception"

knee_template.json ADDED Viewed

	@@ -0,0 +1,359 @@

+{
+    "root": {
+        "value": "root",
+        "parent": null,
+        "children": [
+            "kneeJointEffusion",
+            "kneeMeniscus",
+            "kneeAclPcl",
+            "kneeMcl",
+            "kneePosterolateralCorner",
+            "kneeExtensorMechanism",
+            "kneeCartilage",
+            "kneeBone",
+            "kneeOther"
+        ]
+    },
+    "kneeJointEffusion": {
+        "value": "Presence and/or extent of joint effusion.",
+        "parent": "root",
+        "children": []
+    },
+    "kneeMeniscus": {
+        "value": "",
+        "parent": "root",
+        "children": [
+            "kneeMeniscusMedialTearing",
+            "kneeMeniscusLateralTearing",
+            "kneeMeniscusWrisberg",
+            "kneeMeniscusRootTearing",
+            "kneeMeniscusRampLesion"
+        ]
+    },
+    "kneeMeniscusMedialTearing": {
+        "value": "Presence and/or severity of medial meniscus tearing",
+        "parent": "kneeMeniscus",
+        "children": []
+    },
+    "kneeMeniscusLateralTearing": {
+        "value": "Presence and/or severity of lateral meniscus tearing",
+        "parent": "kneeMeniscus",
+        "children": []
+    },
+    "kneeMeniscusWrisberg": {
+        "value": "Presence and/or severity of Wrisberg variant",
+        "parent": "kneeMeniscus",
+        "children": []
+    },
+    "kneeMeniscusRootTearing": {
+        "value": "Presence and/or severity of meniscus root tearing",
+        "parent": "kneeMeniscus",
+        "children": []
+    },
+    "kneeMeniscusRampLesion": {
+        "value": "Presence and/or severity of ramp lesion",
+        "parent": "kneeMeniscus",
+        "children": []
+    },
+    "kneeAclPcl": {
+        "value": "",
+        "parent": "root",
+        "children": [
+            "kneeAcl",
+            "kneePcl"
+        ]
+    },
+    "kneeAcl": {
+        "value": "",
+        "parent": "kneeAclPcl",
+        "children": [
+            "kneeAclTearing",
+            "kneeAclDegeneration",
+            "kneeAclReconstruction"
+        ]
+    },
+    "kneeAclTearing": {
+        "value": "Presence and/or severity of ACL tearing",
+        "parent": "kneeAcl",
+        "children": []
+    },
+    "kneeAclDegeneration": {
+        "value": "Presence and/or severity of ACL degeneration",
+        "parent": "kneeAcl",
+        "children": []
+    },
+    "kneeAclReconstruction": {
+        "value": "ACL reconstruction status",
+        "parent": "kneeAcl",
+        "children": []
+    },
+    "kneePcl": {
+        "value": "",
+        "parent": "kneeAclPcl",
+        "children": [
+            "kneePclTearing",
+            "kneePclDegeneration",
+            "kneePclReconstruction"
+        ]
+    },
+    "kneePclTearing": {
+        "value": "Presence and/or severity of PCL tearing",
+        "parent": "kneePcl",
+        "children": []
+    },
+    "kneePclDegeneration": {
+        "value": "Presence and/or severity of PCL degeneration",
+        "parent": "kneePcl",
+        "children": []
+    },
+    "kneePclReconstruction": {
+        "value": "PCL reconstruction status",
+        "parent": "kneePcl",
+        "children": []
+    },
+    "kneeMcl": {
+        "value": "",
+        "parent": "root",
+        "children": [
+            "kneeMclTearing",
+            "kneeMclDeepFibers",
+            "kneeMclSuperficialFibers"
+        ]
+    },
+    "kneeMclTearing": {
+        "value": "Presence and/or severity of MCL tearing",
+        "parent": "kneeMcl",
+        "children": []
+    },
+    "kneeMclDeepFibers": {
+        "value": "MCL deep fibers status",
+        "parent": "kneeMcl",
+        "children": []
+    },
+    "kneeMclSuperficialFibers": {
+        "value": "MCL superficial fibers status",
+        "parent": "kneeMcl",
+        "children": []
+    },
+    "kneePosterolateralCorner": {
+        "value": "",
+        "parent": "root",
+        "children": [
+            "kneeIlioTibialBand",
+            "kneeBicepsFemorisTendon",
+            "kneeLateralCollateralLigament"
+        ]
+    },
+    "kneeIlioTibialBand": {
+        "value": "Presence and/or severity of ilio-tibial band findings",
+        "parent": "kneePosterolateralCorner",
+        "children": []
+    },
+    "kneeBicepsFemorisTendon": {
+        "value": "Presence and/or severity of biceps femoris tendon findings",
+        "parent": "kneePosterolateralCorner",
+        "children": []
+    },
+    "kneeLateralCollateralLigament": {
+        "value": "Presence and/or severity of lateral collateral ligament findings",
+        "parent": "kneePosterolateralCorner",
+        "children": []
+    },
+    "kneeExtensorMechanism": {
+        "value": "",
+        "parent": "root",
+        "children": [
+            "kneeQuadricepsTendon",
+            "kneePatellarTendon"
+        ]
+    },
+    "kneeQuadricepsTendon": {
+        "value": "",
+        "parent": "kneeExtensorMechanism",
+        "children": [
+            "kneeQuadricepsTendonTearing",
+            "kneeQuadricepsTendinopathy"
+        ]
+    },
+    "kneeQuadricepsTendonTearing": {
+        "value": "Presence and/or severity of quadriceps tendon tearing",
+        "parent": "kneeQuadricepsTendon",
+        "children": []
+    },
+    "kneeQuadricepsTendinopathy": {
+        "value": "Presence and/or severity of quadriceps tendinopathy",
+        "parent": "kneeQuadricepsTendon",
+        "children": []
+    },
+    "kneePatellarTendon": {
+        "value": "",
+        "parent": "kneeExtensorMechanism",
+        "children": [
+            "kneePatellarTendonTearing",
+            "kneePatellarTendinopathy"
+        ]
+    },
+    "kneePatellarTendonTearing": {
+        "value": "Presence and/or severity of patellar tendon tearing",
+        "parent": "kneePatellarTendon",
+        "children": []
+    },
+    "kneePatellarTendinopathy": {
+        "value": "Presence and/or severity of patellar tendinopathy",
+        "parent": "kneePatellarTendon",
+        "children": []
+    },
+    "kneeCartilage": {
+        "value": "Articular cartilage status",
+        "parent": "root",
+        "children": [
+            "kneeCartilageFemoral",
+            "kneeCartilageTibial",
+            "kneeCartilagePatellar",
+            "kneeOsteochondralLesion"
+        ]
+    },
+    "kneeCartilageFemoral": {
+        "value": "",
+        "parent": "kneeCartilage",
+        "children": [
+            "kneeCartilageFemoralMedial",
+            "kneeCartilageFemoralLateral"
+        ]
+    },
+    "kneeCartilageFemoralMedial": {
+        "value": "Presence and/or severity of knee medial femoral cartilage findings",
+        "parent": "kneeCartilageFemoral",
+        "children": []
+    },
+    "kneeCartilageFemoralLateral": {
+        "value": "Presence and/or severity of knee lateral femoral cartilage findings",
+        "parent": "kneeCartilageFemoral",
+        "children": []
+    },
+    "kneeCartilageTibial": {
+        "value": "",
+        "parent": "kneeCartilage",
+        "children": [
+            "kneeCartilageTibialMedial",
+            "kneeCartilageTibialLateral"
+        ]
+    },
+    "kneeCartilageTibialMedial": {
+        "value": "Presence and/or severity of knee medial tibial cartilage findings",
+        "parent": "kneeCartilageTibial",
+        "children": []
+    },
+    "kneeCartilageTibialLateral": {
+        "value": "Presence and/or severity of knee lateral tibial cartilage findings",
+        "parent": "kneeCartilageTibial",
+        "children": []
+    },
+    "kneeCartilagePatellar": {
+        "value": "",
+        "parent": "kneeCartilage",
+        "children": [
+            "kneeCartilagePatellarMedial",
+            "kneeCartilagePatellarLateral"
+        ]
+    },
+    "kneeOsteochondralLesion": {
+        "value": "Presence and/or severity of knee osteochondral lesions/defects",
+        "parent": "kneeCartilage",
+        "children": []
+    },
+    "kneeCartilagePatellarMedial": {
+        "value": "Presence and/or severity of knee medial patellar cartilage findings",
+        "parent": "kneeCartilagePatellar",
+        "children": []
+    },
+    "kneeCartilagePatellarLateral": {
+        "value": "Presence and/or severity of knee lateral patellar cartilage findings",
+        "parent": "kneeCartilagePatellar",
+        "children": []
+    },
+    "kneeBone": {
+        "value": "",
+        "parent": "root",
+        "children": [
+            "kneeBoneFracture",
+            "kneeBoneMarrowEdema",
+            "kneeSubchondralFracture",
+            "kneeOsteonecrosis",
+            "kneeBoneAvn"
+        ]
+    },
+    "kneeBoneFracture": {
+        "value": "Presence and/or severity and/or location and/or type of knee bone fracture",
+        "parent": "kneeBone",
+        "children": []
+    },
+    "kneeBoneMarrowEdema": {
+        "value": "Presence and/or severity of knee bone marrow edema/contusion",
+        "parent": "kneeBone",
+        "children": []
+    },
+    "kneeSubchondralFracture": {
+        "value": "Presence and/or severity of knee subchondral fractures",
+        "parent": "kneeBone",
+        "children": []
+    },
+    "kneeOsteonecrosis": {
+        "value": "Presence and/or severity of knee osteonecrosis",
+        "parent": "kneeBone",
+        "children": []
+    },
+    "kneeBoneAvn": {
+        "value": "Presence and/or severity of knee avascular necrosis",
+        "parent": "kneeBone",
+        "children": []
+    },
+    "kneeOther": {
+        "value": "Other knee findings",
+        "parent": "root",
+        "children": [
+            "kneeBursa",
+            "kneePoplitealCyst",
+            "kneeGanglionCyst",
+            "kneeLipoma",
+            "kneeMass",
+            "kneeSynovium",
+            "other"
+        ]
+    },
+    "kneeBursa": {
+        "value": "Presence and/or severity of knee bursa findings, e.g. bursitis",
+        "parent": "kneeOther",
+        "children": []
+    },
+    "kneePoplitealCyst": {
+        "value": "Presence and/or extent of knee popliteal/Baker's cyst",
+        "parent": "kneeOther",
+        "children": []
+    },
+    "kneeGanglionCyst": {
+        "value": "Presence and/or extent of knee ganglion cyst",
+        "parent": "kneeOther",
+        "children": []
+    },
+    "kneeLipoma": {
+        "value": "Presence and/or extent of knee lipoma",
+        "parent": "kneeOther",
+        "children": []
+    },
+    "kneeMass": {
+        "value": "Presence and/or extent of knee mass",
+        "parent": "kneeOther",
+        "children": []
+    },
+    "kneeSynovium": {
+        "value": "Presence and/or extent of knee synovial findings, e.g. synovitis, thickening",
+        "parent": "kneeOther",
+        "children": []
+    },
+    "other": {
+        "value": "Any other findings not listed above",
+        "parent": "kneeOther",
+        "children": []
+    }
+}

treegraph.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import networkx as nx
+import json
+import matplotlib.pyplot as plt
+class Node:
+    def __init__(self, name: str, value=None, parent=None, children: list = []):
+        self.name = name
+        self.children = set(children)
+        self.parent = parent
+        self.value = value
+    def __repr__(self):
+        return self.name
+    def __str__(self):
+        return self.name
+    def __eq__(self, other):
+        return self.name == other.name
+    def __hash__(self) -> int:
+        return hash(self.name)
+    # make serializable for json
+    def __getstate__(self):
+        return self.__dict__
+    def __dict__(self):
+        # return a dict of the node's attributes
+        return {
+            "name": self.name,
+            "children": self.children,
+            "parent": self.parent,
+            "value": self.value,
+        }
+    def to_json(self):
+        """
+        Returns a JSON string representation of the node.
+        """
+        return json.dumps(self.__dict__)
+    def add_child(self, child):
+        self.children.add(child)
+    def has_children(self):
+        return len(self.children) > 0
+    def set_parent(self, new_parent):
+        self.parent = new_parent
+    def set_value(self, new_value):
+        self.value = new_value
+def read_json(fname: str) -> dict:
+    assert fname.endswith(".json"), "File must be a json file"
+    with open(fname, "r") as f:
+        data = json.load(f)
+    return dict(data)
+def build_tree_from_dict(data: dict, connect_children: bool = True):
+    # every dict key is a node's name
+    # dict value is a dict with keys "value", "parent", "children"
+    # "value" is the node's value
+    # "parent" is the node's parent's name
+    # "children" is a list of the node's children's names
+    # create a networkx graph
+    G = nx.Graph()
+    nodes_dict = dict()
+    # build the nodes
+    for name, info in data.items():
+        value = info["value"]
+        parent = info["parent"]
+        children: list = info["children"]
+        nodes_dict[name] = Node(
+            name=name, parent=parent, children=children, value=value
+        )
+        G.add_node(nodes_dict[name], value=value)
+    # build the edges
+    for _, node in nodes_dict.items():
+        for child in node.children:
+            G.add_edge(node, nodes_dict[child])
+            # connect children to each other if connect_children is True
+            if connect_children:
+                for child2 in node.children:
+                    if child != child2:
+                        G.add_edge(nodes_dict[child], nodes_dict[child2])
+    return G, nodes_dict
+def build_tree_from_file(fname: str):
+    data = read_json(fname)
+    return build_tree_from_dict(data)
+# calculate the number of edges between two nodes
+def num_edges_between_nodes(G, node1, node2):
+    return len(nx.shortest_path(G, node1, node2)) - 1
+def explore_bfs(G: nx.Graph, source: Node, nodes_dict: dict[str, Node]):
+    # start from a source node and explore the graph in a breadth-first manner
+    # prioritize nodes with non-empty values
+    # explore the graph and return a list of nodes in the order they were explored
+    explored_nodes = []
+    queue = [source]
+    while queue:
+        node = queue.pop(0)
+        explored_nodes.append(node)
+        for child in node.children:
+            if nodes_dict[child].value:
+                queue.insert(0, nodes_dict[child])
+            else:
+                queue.append(nodes_dict[child])
+    return explored_nodes
+def from_list(node_list: list[Node], directional=True):
+    # create a tree from a list of nodes
+    # and label the edges from the first node to the last node from 1 to n
+    if directional:
+        G = nx.DiGraph()
+    else:
+        G = nx.Graph()
+    G.add_nodes_from(node_list)
+    for i in range(len(node_list) - 1):
+        G.add_edge(node_list[i], node_list[i + 1], label=i + 1)
+    return G
+def visualize_graph(
+    graph: nx.Graph,
+    layout_graph: nx.Graph,
+    title="BFS Tree",
+    fig_size=(30, 20),
+    title_fontsize=20,
+    edge_width=1,
+    font_size=9,
+    node_size=500,
+    node_shape="o",
+    prog="dot",
+):
+    graphviz_args = "-Goverlap=false -Gsplines=true -Gsep=0.1 -Gnodesep=0.1 -Gmaxiter=1000 -Gepsilon=0.0001 -Gstart=0"
+    _, ax = plt.subplots(figsize=fig_size)
+    ax.set_title(title, fontsize=title_fontsize)
+    # also draw edge labels
+    nx.draw(
+        graph,
+        ax=ax,
+        with_labels=True,
+        # color every node lightblue except the root which is colored red
+        node_color=(["lightgreen"] + ["lightblue"] * (len(graph.nodes) - 2) + ["red"])
+        if len(graph.nodes) > 2
+        else ["lightgreen", "red"]
+        if len(graph.nodes) == 2
+        else ["lightgreen"],
+        edge_color="gray",
+        width=edge_width,
+        font_size=font_size,
+        # node size to be proportional to the node's value
+        node_size=node_size,
+        # shape set to rectangle
+        node_shape=node_shape,
+        pos=nx.nx_agraph.graphviz_layout(
+            layout_graph, prog=prog, root="root", args=graphviz_args
+        ),
+    )
+    nx.draw_networkx_edge_labels(
+        graph,
+        pos=nx.nx_agraph.graphviz_layout(
+            layout_graph, prog=prog, root="root", args=graphviz_args
+        ),
+        edge_labels=nx.get_edge_attributes(graph, "label"),
+        font_size=font_size,
+    )
+    plt.show()
+def get_graph(
+    graph: nx.Graph,
+    layout_graph: nx.Graph,
+    title="BFS Tree",
+    fig_size=(30, 20),
+    title_fontsize=20,
+    edge_width=1,
+    font_size=9,
+    node_size=500,
+    node_shape="o",
+    prog="dot",
+):
+    graphviz_args = "-Goverlap=false -Gsplines=true -Gsep=0.1 -Gnodesep=0.1 -Gmaxiter=1000 -Gepsilon=0.0001 -Gstart=0"
+    fig, ax = plt.subplots(figsize=fig_size)
+    ax.set_title(title, fontsize=title_fontsize)
+    nx.draw(
+        graph,
+        ax=ax,
+        with_labels=True,
+        # color every node lightblue except the root which is colored red
+        node_color=(["lightgreen"] + ["lightblue"] * (len(graph.nodes) - 2) + ["red"])
+        if len(graph.nodes) > 2
+        else ["lightgreen", "red"]
+        if len(graph.nodes) == 2
+        else ["lightgreen"],
+        edge_color="gray",
+        width=edge_width,
+        font_size=font_size,
+        # node size to be proportional to the node's value
+        node_size=node_size,
+        # shape set to rectangle
+        node_shape=node_shape,
+        pos=nx.nx_agraph.graphviz_layout(
+            layout_graph, prog=prog, root="root", args=graphviz_args
+        ),
+    )
+    nx.draw_networkx_edge_labels(
+        graph,
+        pos=nx.nx_agraph.graphviz_layout(
+            layout_graph, prog=prog, root="root", args=graphviz_args
+        ),
+        edge_labels=nx.get_edge_attributes(graph, "label"),
+        font_size=font_size,
+    )
+    return fig, ax

utils.py ADDED Viewed

	@@ -0,0 +1,361 @@

+import colorama
+from colorama import Fore, Style
+import openai
+from tenacity import retry, stop_after_attempt, wait_fixed
+import json
+import os
+import tiktoken
+import functools as ft
+import time
+JSON_TEMPLATE = """
+{question}
+The required key(s) are: {keys}.
+Only and only respond with the key(s) and value(s) mentioned above.
+Your answer in valid JSON format:\n
+"""
+MODEL_COST_DICT = {
+    "gpt-3.5-turbo": {
+        "input": 0.0015,
+        "output": 0.002,
+    },
+    "gpt-4": {
+        "input": 0.03,
+        "output": 0.06,
+    },
+}
+def set_api_key(key=None):
+    """Sets the OpenAI API key."""
+    if key is None:
+        key = os.environ.get("OPENAI_API_KEY")
+    openai.api_key = key
+def num_tokens_from_string(string: str, encoding_name: str) -> int:
+    """Returns the number of tokens in a text string."""
+    encoding = tiktoken.get_encoding(encoding_name)
+    num_tokens = len(encoding.encode(string))
+    return num_tokens
+def num_tokens_from_messages(messages: list[dict], model="gpt-3.5-turbo-0613"):
+    """Returns the number of tokens used by a list of messages."""
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        encoding = tiktoken.get_encoding("cl100k_base")
+    if model == "gpt-3.5-turbo-0613":  # note: future models may deviate from this
+        num_tokens = 0
+        for message in messages:
+            num_tokens += (
+                4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            )
+            for key, value in message.items():
+                num_tokens += len(encoding.encode(value))
+                if key == "name":  # if there's a name, the role is omitted
+                    num_tokens += -1  # role is always required and always 1 token
+        num_tokens += 2  # every reply is primed with <im_start>assistant
+        return num_tokens
+    else:
+        raise NotImplementedError(
+            f"""num_tokens_from_messages() is not presently implemented for model {model}.
+  See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        )
+@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
+def chat(messages: list[dict], model="gpt-3.5-turbo", temperature=0.0):
+    response = openai.ChatCompletion().create(
+        model=model,
+        messages=messages,
+        temperature=temperature,
+    )
+    return response["choices"][0]["message"]["content"]
+def make_message(role: str, content: str) -> dict:
+    return {
+        "role": role,
+        "content": content,
+    }
+def make_prompt(template: str, **kwargs):
+    return template.format(**kwargs)
+def unravel_messages(messages: list[dict]) -> list[str]:
+    """Returns a string representation of a list of messages."""
+    return [f"{message['role']}: {message['content']}" for message in messages]
+class LLM:
+    def __init__(self, model="gpt-3.5-turbo", temperature=0.0):
+        self.model = model
+        self.temperature = temperature
+        self.token_counter = 0
+        self.cost = 0.0
+    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
+    def chat(self, messages: list[dict]):
+        response = openai.ChatCompletion().create(
+            model=self.model,
+            messages=messages,
+            temperature=self.temperature,
+        )
+        self.token_counter += int(response["usage"]["total_tokens"])
+        self.cost += (
+            response["usage"]["prompt_tokens"]
+            / 1000
+            * MODEL_COST_DICT[self.model]["input"]
+            + response["usage"]["completion_tokens"]
+            / 1000
+            * MODEL_COST_DICT[self.model]["output"]
+        )
+        return response["choices"][0]["message"]["content"]
+    def reset(self):
+        self.token_counter = 0
+        self.cost = 0.0
+    def __call__(self, messages: list[dict]):
+        return self.chat(messages)
+class SummaryMemory:
+    """
+    A class that manages a memory of messages and automatically summarizes them when the maximum token limit is reached.
+    Attributes:
+        max_token_limit (int): The maximum number of tokens allowed in the memory before summarization occurs.
+        messages (list[dict]): A list of messages in the memory.
+        model (str): The name of the GPT model to use for chat completion.
+        ai_role (str): The role of the AI in the conversation.
+        human_role (str): The role of the human in the conversation.
+        auto_summarize (bool): Whether to automatically summarize the messages when the maximum token limit is reached.
+    """
+    # ...
+    summary_template = "Summarize the following messages into a paragraph and replace '{user}' with '{human_role}', and '{assistant}' with '{ai_role}':\n{messages}"
+    def __init__(
+        self,
+        system_prompt="",
+        max_token_limit=4000,
+        model="gpt-3.5-turbo",
+        ai_role="answer",
+        human_role="question/exam",
+        auto_summarize=False,
+    ):
+        self.max_token_limit = max_token_limit
+        self.messages: list[dict] = []
+        self.model = model
+        self.ai_role = ai_role
+        self.human_role = human_role
+        self.auto_summarize = auto_summarize
+        self.system_prompt = system_prompt
+        self.reset()
+    def reset(self):
+        self.messages = [self.system_prompt]
+    def remove_last(self):
+        if len(self.messages) > 1:  # don't remove the system prompt
+            self.messages.pop()
+    def remove(
+        self, index: int
+    ):  # don't remove the system prompt and start counting from 1
+        if index > 0 and index < len(self.messages):
+            self.messages.pop(index)
+    def replace(self, index: int, message: dict):
+        if index > 0 and index < len(self.messages):
+            self.messages[index] = message
+    def change_system_prompt(self, new_prompt: str):
+        self.system_prompt = new_prompt
+        self.messages[0] = new_prompt
+    def remove_first(self):
+        # dont remove the system prompt
+        if len(self.messages) > 1:
+            self.messages.pop(1)  # remove the first message after the system prompt
+    def append(self, message: dict):
+        total_tokens = num_tokens_from_messages(self.messages + [message])
+        while (
+            self.auto_summarize and total_tokens > self.max_token_limit
+        ):  # keep summarizing until we're under the limit
+            self.summarize()
+            total_tokens = num_tokens_from_messages(self.messages + [message])
+        self.messages.append(message)
+    def summarize(self):
+        prompt = make_prompt(
+            self.summary_template,
+            user="user",
+            human_role=self.human_role,
+            assistant="assistant",
+            ai_role=self.ai_role,
+            messages="\n".join(
+                unravel_messages(self.messages[1:])
+            ),  # don't include the system prompt
+        )
+        summary = chat(
+            messages=[make_message("user", prompt)],
+            model=self.model,
+        )
+        self.reset()
+        self.append(make_message("user", summary))
+    def get_messages(self):
+        return self.messages[1:]  # don't include the system prompt
+    def get_unraveled_messages(self):
+        return unravel_messages(self.messages[1:])
+class MemoryBuffer:
+    """
+    A class that manages a buffer of messages and clips them to a maximum token limit.
+    Attributes:
+        max_token_limit (int): The maximum number of tokens allowed in the buffer.
+        messages (list[dict]): A list of messages in the buffer.
+    """
+    def __init__(
+        self,
+        system_prompt,
+        max_token_limit=1000,
+    ):
+        """
+        Initializes a new instance of the MemoryBuffer class.
+        Args:
+            max_token_limit (int, optional): The maximum number of tokens allowed in the buffer. Defaults to 1000.
+        """
+        self.max_token_limit = max_token_limit
+        self.messages = []
+        self.system_prompt = system_prompt
+        self.reset()
+    def reset(self):
+        """
+        Resets the buffer by clearing all messages.
+        """
+        self.messages = [self.system_prompt]
+    def add(self, message: dict):
+        """
+        Adds a message to the buffer and clips the buffer to the maximum token limit.
+        Args:
+            message (dict): The message to add to the buffer.
+        """
+        total_tokens = num_tokens_from_messages(self.messages + [message])
+        if total_tokens > self.max_token_limit:
+            # clip the messages to the max token limit
+            # from the end of the list
+            # remove messages from the beginning of the list
+            # until the total number of tokens is less than the max token limit
+            while total_tokens > self.max_token_limit:
+                self.messages = self.messages[1:]
+                total_tokens = num_tokens_from_messages(self.messages + [message])
+        self.messages.append(message)
+    def remove(self, message: dict):
+        """
+        Removes a message from the buffer.
+        Args:
+            message (dict): The message to remove from the buffer.
+        """
+        if message in self.messages:
+            self.messages.remove(message)
+    def remove_last(self):
+        """
+        Removes the last message from the buffer.
+        """
+        if len(self.messages) > 0:
+            self.messages.pop()
+    def remove_first(self):
+        """
+        Removes the first message from the buffer.
+        """
+        if len(self.messages) > 0:
+            self.messages.pop(0)
+def json2dict(string: str) -> dict:
+    """Returns a dictionary of variables from a string containing JSON."""
+    try:
+        return json.loads(string)
+    except json.decoder.JSONDecodeError:
+        print("Error: JSONDecodeError")
+        return {}
+def print_help(num_nodes, color):
+    """
+    Prints the help message for the AI assistant.
+    """
+    colorama.init()
+    print(color + "The AI assistant presents a clinical case and asks for a diagnosis.")
+    print(
+        color + "You need to explore the case by asking questions to the AI assistant."
+    )
+    print(
+        color
+        + "You have to ask questions in a logical order, conforming to the clinical guidelines."
+    )
+    print(
+        color
+        + "You need to minimize the number of jump between subjects, while covering as many subjects as possible."
+    )
+    print(color + f"there are a total of {num_nodes} visitable nodes in the tree")
+    print(
+        color
+        + "you have to explore the tree as much as possible while avoiding jumps and travelling excessively."
+    )
+    print(Style.RESET_ALL)
+def make_question(template=JSON_TEMPLATE, role="user", **kwargs) -> dict:
+    prompt = make_prompt(template=template, **kwargs)
+    message = make_message(role, prompt)
+    return message
+# a debugging decorator and use functools to preserve the function name and docstring
+# the decorator gets DEBUG as an argument to turn on or off debugging
+def debug(DEBUG, print_func, measure_time=True):
+    def decorator(func):
+        @ft.wraps(func)
+        def wrapper(*args, **kwargs):
+            if DEBUG:
+                print_func(f"\nCalling {func.__name__}")
+            if measure_time and DEBUG:
+                start = time.time()
+            result = func(*args, **kwargs)
+            if measure_time and DEBUG:
+                end = time.time()
+                print_func(f"Elapsed time: {end - start:.2f}s")
+            if DEBUG:
+                print_func(f"Returning {func.__name__}")
+            return result
+        return wrapper
+    return decorator
+# to use the decorator, add @debug(DEBUG) above the function definition