{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "540be26eab654b3e90c0b8ca2ba94f70": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c102ae74d328466fae6141cacd8f3e25", "IPY_MODEL_5f8d4dccc23c475e8502cd3d46e988e6", "IPY_MODEL_67e6a66fc4df46beb14b13717a5c7654" ], "layout": "IPY_MODEL_82ee2a082d35495b9b65403c033fee36" } }, "c102ae74d328466fae6141cacd8f3e25": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4c7f39566e9c44c8840a8fbdf4af4927", "placeholder": "​", "style": "IPY_MODEL_2b2f87c2c83b4e708dd9e03e56cb8956", "value": "Extracting pages: 100%" } }, "5f8d4dccc23c475e8502cd3d46e988e6": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_54893bd5490643fba052ee0e5effe686", "max": 275, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_7bde998b894c46679c12fbc4c7bdeb9f", "value": 275 } }, "67e6a66fc4df46beb14b13717a5c7654": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_97bd2a6fda164953b97c79075e53b5e5", "placeholder": "​", "style": "IPY_MODEL_4a625d3edd18469abf0c7bf808bd1aa2", "value": " 275/275 [00:36<00:00, 103.10it/s]" } }, "82ee2a082d35495b9b65403c033fee36": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4c7f39566e9c44c8840a8fbdf4af4927": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2b2f87c2c83b4e708dd9e03e56cb8956": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "54893bd5490643fba052ee0e5effe686": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7bde998b894c46679c12fbc4c7bdeb9f": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "97bd2a6fda164953b97c79075e53b5e5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4a625d3edd18469abf0c7bf808bd1aa2": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "98b3694ab2cc4876ab8bab653cb4634c": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_fa5b5c063f1f4e0980907d5df3c4412f", "IPY_MODEL_6f5cbbe713eb4ba99f386fd5f7427b16", "IPY_MODEL_48764d985ca54feba32a4bf6ecbb8a05" ], "layout": "IPY_MODEL_a709de25b193460db265b354813fadb5" } }, "fa5b5c063f1f4e0980907d5df3c4412f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ae2b5fadbcf0480b8a4d38423159ed47", "placeholder": "​", "style": "IPY_MODEL_f142164079814e8f87afdaced20780f0", "value": "model.safetensors: 100%" } }, "6f5cbbe713eb4ba99f386fd5f7427b16": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_196fc1d4033a47aa8b7c4053c4958f06", "max": 1625222120, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_f2ab964e5b654b14af87a34417b73cc4", "value": 1625222120 } }, "48764d985ca54feba32a4bf6ecbb8a05": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_305a23ed5f8f485eb8e0137fa519aaab", "placeholder": "​", "style": "IPY_MODEL_25de583d901e493aa0daf755450b67c3", "value": " 1.63G/1.63G [00:17<00:00, 97.1MB/s]" } }, "a709de25b193460db265b354813fadb5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ae2b5fadbcf0480b8a4d38423159ed47": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f142164079814e8f87afdaced20780f0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "196fc1d4033a47aa8b7c4053c4958f06": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f2ab964e5b654b14af87a34417b73cc4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "305a23ed5f8f485eb8e0137fa519aaab": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "25de583d901e493aa0daf755450b67c3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "03af7e4d500b4078a541642d6c854e47": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_12964e5261734dcf9120d87f7213b238", "IPY_MODEL_5a62e8567c2444a8ba2ceccb31e72f06", "IPY_MODEL_ed1e430df02b413f952bb9dc75e544be" ], "layout": "IPY_MODEL_8205712874d843859071378bd3cd194a" } }, "12964e5261734dcf9120d87f7213b238": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ca139255d2cb44c794c00500d2b393ab", "placeholder": "​", "style": "IPY_MODEL_9a4048ffa42a44248611c6ee656b1b62", "value": "Loading weights: 100%" } }, "5a62e8567c2444a8ba2ceccb31e72f06": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f9c3834a70a049e28a1d815e812f9385", "max": 511, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_3b3034bc1c7546b6ba6c611a34a3bf4e", "value": 511 } }, "ed1e430df02b413f952bb9dc75e544be": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4eca6c5ea9554f419f1f2a107228a646", "placeholder": "​", "style": "IPY_MODEL_1e077e02745b4f0d97b790c48c57f0d5", "value": " 511/511 [00:01<00:00, 309.70it/s, Materializing param=model.encoder.layers.11.self_attn_layer_norm.weight]" } }, "8205712874d843859071378bd3cd194a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ca139255d2cb44c794c00500d2b393ab": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9a4048ffa42a44248611c6ee656b1b62": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f9c3834a70a049e28a1d815e812f9385": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3b3034bc1c7546b6ba6c611a34a3bf4e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4eca6c5ea9554f419f1f2a107228a646": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1e077e02745b4f0d97b790c48c57f0d5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "4722c48c787442f1a429e7706dbbb744": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d283fdef0ecc419b8c8c94a822a0571f", "IPY_MODEL_9f8789723f24444891168b9daebacee9", "IPY_MODEL_84f7c67464c44d9f9a453c0eede0105d" ], "layout": "IPY_MODEL_39e903a957824d728b7d336510e491e1" } }, "d283fdef0ecc419b8c8c94a822a0571f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_70cea971619c4fcfab70f0b9ab3d36ce", "placeholder": "​", "style": "IPY_MODEL_476ad5b638de4a50b24a56c6289dc1df", "value": "generation_config.json: 100%" } }, "9f8789723f24444891168b9daebacee9": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4674fc3d2ef54a44afabaf172cb3e814", "max": 363, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b2fb74e32b184f9eb489b68f1c56cc34", "value": 363 } }, "84f7c67464c44d9f9a453c0eede0105d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e16c764e6692499288595619696d90dc", "placeholder": "​", "style": "IPY_MODEL_cc8ca7da2c7b4c338314c27f4f197573", "value": " 363/363 [00:00<00:00, 9.11kB/s]" } }, "39e903a957824d728b7d336510e491e1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "70cea971619c4fcfab70f0b9ab3d36ce": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "476ad5b638de4a50b24a56c6289dc1df": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "4674fc3d2ef54a44afabaf172cb3e814": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b2fb74e32b184f9eb489b68f1c56cc34": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e16c764e6692499288595619696d90dc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cc8ca7da2c7b4c338314c27f4f197573": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5ab4dd0db7ea444780a43b31823ac8b4": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_aa198fc463d34ebf91fa8d0c5a0fd796", "IPY_MODEL_b7383ab084034dfdbd979d2572514114", "IPY_MODEL_17453ebf93ab47b28984657e1e33c3f4" ], "layout": "IPY_MODEL_4661fa64489f499e9f4cc4c94973a733" } }, "aa198fc463d34ebf91fa8d0c5a0fd796": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0f4795ba0d9a4d7587b79bfd44d0c412", "placeholder": "​", "style": "IPY_MODEL_95f0d14f4ff64bc6a1ca68e936c1f57e", "value": "Summarizing chapters: 100%" } }, "b7383ab084034dfdbd979d2572514114": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_21c899d6314e4f39ac51787659348918", "max": 17, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_469438ba3a8040669961be285cde7261", "value": 17 } }, "17453ebf93ab47b28984657e1e33c3f4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3a30fc7003414385a0ea5bee6fd4fffb", "placeholder": "​", "style": "IPY_MODEL_86c4b494d5354ee097efcc42ef08261c", "value": " 17/17 [07:18<00:00, 28.08s/it]" } }, "4661fa64489f499e9f4cc4c94973a733": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0f4795ba0d9a4d7587b79bfd44d0c412": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "95f0d14f4ff64bc6a1ca68e936c1f57e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "21c899d6314e4f39ac51787659348918": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "469438ba3a8040669961be285cde7261": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "3a30fc7003414385a0ea5bee6fd4fffb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "86c4b494d5354ee097efcc42ef08261c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8902daf7a1ce4cb9ab41f148fb15e64b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a6f33682264a48659521aff622244258", "IPY_MODEL_77e0551a12a744e7bc232047c71c94a8", "IPY_MODEL_6116184457a644d6a85ed024e2d11b18" ], "layout": "IPY_MODEL_0093d035e241484f828ab4b62b9674ae" } }, "a6f33682264a48659521aff622244258": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f3bdd3f55bc543e5a07707e4857afa36", "placeholder": "​", "style": "IPY_MODEL_285b043225b1474db5a58608b2ec940c", "value": "Building big organized summary: 100%" } }, "77e0551a12a744e7bc232047c71c94a8": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_81e3478fe6a04384a7c054c9aad0c215", "max": 6, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_73a22c3867044f008471a022fde09548", "value": 6 } }, "6116184457a644d6a85ed024e2d11b18": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_66532e20e18e4160b1bf25e1857a4078", "placeholder": "​", "style": "IPY_MODEL_b8ffee0f3cd043468951fedc96d0b9d1", "value": " 6/6 [00:19<00:00,  3.19s/it]" } }, "0093d035e241484f828ab4b62b9674ae": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f3bdd3f55bc543e5a07707e4857afa36": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "285b043225b1474db5a58608b2ec940c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "81e3478fe6a04384a7c054c9aad0c215": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "73a22c3867044f008471a022fde09548": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "66532e20e18e4160b1bf25e1857a4078": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b8ffee0f3cd043468951fedc96d0b9d1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e0412ff5e1fc4ae0bc8292049940759b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_45ab3f11316e4f5eb4567d0eb9b9e0d2", "IPY_MODEL_a81b6eb8e69548138b67a4cb7da83184", "IPY_MODEL_f0401ee52b6048aa8761e4329f5be659" ], "layout": "IPY_MODEL_a7969073a909412bb5cdee30bb89ae5d" } }, "45ab3f11316e4f5eb4567d0eb9b9e0d2": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_67462fc9e2b94cf6ad0362560b33dd7b", "placeholder": "​", "style": "IPY_MODEL_4a3269c514894e46b54046f185ddbc04", "value": "Writing model shards: 100%" } }, "a81b6eb8e69548138b67a4cb7da83184": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0aed905240d748c9aed4ed055dc6bf09", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_96a72d959eec4cb497ccdb4872cad1b3", "value": 1 } }, "f0401ee52b6048aa8761e4329f5be659": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_97561257035b4e068011fd8d250399d1", "placeholder": "​", "style": "IPY_MODEL_b06fb620635b4a1384f14ecac6da299e", "value": " 1/1 [00:17<00:00, 17.59s/it]" } }, "a7969073a909412bb5cdee30bb89ae5d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "67462fc9e2b94cf6ad0362560b33dd7b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4a3269c514894e46b54046f185ddbc04": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0aed905240d748c9aed4ed055dc6bf09": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "96a72d959eec4cb497ccdb4872cad1b3": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "97561257035b4e068011fd8d250399d1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b06fb620635b4a1384f14ecac6da299e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "Book summarization (general-purpose) using Hugging Face seq2seq models like facebook/bart-large-cnn\n", "- Accepts any book as a long text\n", "- Intelligently divides it into paragraphs/sentences, respecting the token limit\n", "- Summarizes each part sequentially with optional overlap to maintain context/line of events\n", "- Performs hierarchical map-reduction until a single final summary is generated\n", "\n", "\n", "Notes:\n", "- bart-large-cnn is excellent for English. If the books are in Arabic, the quality will likely be poor—choose an Arabic/multilingual model." ], "metadata": { "id": "0b-FKmNdv8jd" } }, { "cell_type": "code", "source": [ "# =========================\n", "# Cell 1 — Install deps (Colab)\n", "# =========================\n", "!apt-get -qq update\n", "!apt-get -qq install -y poppler-utils tesseract-ocr tesseract-ocr-eng tesseract-ocr-ara\n", "!pip -q install -U transformers accelerate sentencepiece pymupdf pdf2image pytesseract pillow tqdm" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "B5NuIDkSOUlZ", "outputId": "ffa24621-beb9-40c9-f403-9112d3cf6ffd" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\n" ] } ] }, { "cell_type": "code", "source": [ "# =========================\n", "# Cell 2 — Imports + Config\n", "# =========================\n", "import os, re, json\n", "from pathlib import Path\n", "from math import ceil\n", "\n", "import torch\n", "from tqdm.auto import tqdm\n", "\n", "import fitz # pymupdf\n", "from pdf2image import convert_from_path\n", "import pytesseract\n", "from PIL import ImageOps, ImageEnhance\n", "\n", "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n", "\n", "OUTPUT_DIR = Path(\"/content/output\")\n", "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n", "\n", "# Model (English-focused)\n", "MODEL_NAME = \"facebook/bart-large-cnn\" # https://huggingface.co/facebook/bart-large-cnn\n", "\n", "# OCR\n", "OCR_LANG = \"eng+ara\"\n", "OCR_DPI = 250\n", "NATIVE_MIN_CHARS_PER_PAGE = 60 # if native extracted text < this => OCR that page\n", "\n", "# Summarization quality/speed knobs\n", "BATCH_SIZE = 4\n", "NUM_BEAMS = 4\n", "NO_REPEAT_NGRAM_SIZE = 3\n", "EARLY_STOPPING = False\n", "\n", "# Chunking\n", "MAX_INPUT_TOKENS = 1024\n", "HEADROOM_TOKENS = 16\n", "EFFECTIVE_MAX_INPUT = MAX_INPUT_TOKENS - HEADROOM_TOKENS\n", "OVERLAP_SENTENCES = 2\n", "\n", "# Output size (big + محترم)\n", "CHAPTER_MAX_NEW_TOKENS_CAP = 320 # max tokens generated per chapter summary\n", "CHAPTER_MIN_NEW_TOKENS_FLOOR = 120\n", "BOOK_PARTS = 8 # final organized \"big\" summary in N parts\n", "\n", "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "print(\"Device:\", device)\n", "print(\"Output folder:\", OUTPUT_DIR)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "djCQTdpuOUh4", "outputId": "3e09c430-5dce-4bf6-d274-47a002d3fba1" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Device: cuda\n", "Output folder: /content/output\n" ] } ] }, { "cell_type": "code", "source": [ "# =========================\n", "# Cell 3 — Upload input (PDF or TXT)\n", "# =========================\n", "from google.colab import files\n", "\n", "uploaded = files.upload()\n", "INPUT_PATH = Path(next(iter(uploaded.keys()))).resolve()\n", "\n", "print(\"Uploaded:\", INPUT_PATH)\n", "print(\"Suffix:\", INPUT_PATH.suffix.lower())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 145 }, "id": "V0Agkn2COUfL", "outputId": "5848ff76-ea10-4bfa-941c-161d8c9ef652" }, "execution_count": 4, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving Harry Potter- Complete Collection-1-275.pdf to Harry Potter- Complete Collection-1-275.pdf\n", "Uploaded: /content/Harry Potter- Complete Collection-1-275.pdf\n", "Suffix: .pdf\n" ] } ] }, { "cell_type": "code", "source": [ "# =========================\n", "# Cell 4 — PDF/TXT -> Clean TXT (robust native + per-page OCR fallback)\n", "# =========================\n", "_SENT_BOUNDARY_RE = re.compile(r\"(?<=[\\.\\!\\?\\u061F\\u06D4\\u061B…])\\s+\") # . ! ? ؟ ۔ ؛ …\n", "\n", "def normalize_text(text: str) -> str:\n", " text = text.replace(\"\\r\\n\", \"\\n\").replace(\"\\r\", \"\\n\")\n", " text = re.sub(r\"[ \\t]+\", \" \", text)\n", " text = re.sub(r\"\\n{3,}\", \"\\n\\n\", text)\n", " return text.strip()\n", "\n", "def ocr_image_pil(img):\n", " # Light preprocessing to improve OCR\n", " img = img.convert(\"RGB\")\n", " img = ImageOps.grayscale(img)\n", " img = ImageEnhance.Contrast(img).enhance(1.6)\n", " return img\n", "\n", "def ocr_pdf_page(pdf_path: Path, page_number_1based: int, dpi: int = OCR_DPI, lang: str = OCR_LANG) -> str:\n", " images = convert_from_path(\n", " str(pdf_path),\n", " dpi=dpi,\n", " first_page=page_number_1based,\n", " last_page=page_number_1based,\n", " fmt=\"png\",\n", " thread_count=2,\n", " )\n", " img = images[0]\n", " img = ocr_image_pil(img)\n", " return pytesseract.image_to_string(img, lang=lang)\n", "\n", "def pdf_to_text_smart(pdf_path: Path,\n", " native_min_chars_per_page: int = NATIVE_MIN_CHARS_PER_PAGE) -> str:\n", " doc = fitz.open(str(pdf_path))\n", " parts = []\n", "\n", " for i in tqdm(range(doc.page_count), desc=\"Extracting pages\"):\n", " page = doc.load_page(i)\n", " native = (page.get_text(\"text\") or \"\").strip()\n", " native_compact_len = len(re.sub(r\"\\s+\", \"\", native))\n", "\n", " if native_compact_len >= native_min_chars_per_page:\n", " parts.append(native)\n", " else:\n", " ocr = ocr_pdf_page(pdf_path, page_number_1based=i+1)\n", " parts.append(ocr)\n", "\n", " doc.close()\n", " return normalize_text(\"\\n\\n\".join(parts))\n", "\n", "def ensure_txt(input_path: Path) -> Path:\n", " out_txt = OUTPUT_DIR / f\"{input_path.stem}.txt\"\n", " suf = input_path.suffix.lower()\n", "\n", " if suf == \".txt\":\n", " raw = input_path.read_text(encoding=\"utf-8\", errors=\"ignore\")\n", " out_txt.write_text(normalize_text(raw), encoding=\"utf-8\")\n", " return out_txt\n", "\n", " if suf == \".pdf\":\n", " text = pdf_to_text_smart(input_path)\n", " out_txt.write_text(text, encoding=\"utf-8\")\n", " return out_txt\n", "\n", " raise ValueError(\"Unsupported type. Upload .pdf or .txt only.\")\n", "\n", "BOOK_TXT_PATH = ensure_txt(INPUT_PATH)\n", "BOOK_TEXT = BOOK_TXT_PATH.read_text(encoding=\"utf-8\", errors=\"ignore\")\n", "\n", "print(\"Saved TXT:\", BOOK_TXT_PATH)\n", "print(\"Chars:\", len(BOOK_TEXT))\n", "print(\"Head preview:\\n\", BOOK_TEXT[:800])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "540be26eab654b3e90c0b8ca2ba94f70", "c102ae74d328466fae6141cacd8f3e25", "5f8d4dccc23c475e8502cd3d46e988e6", "67e6a66fc4df46beb14b13717a5c7654", "82ee2a082d35495b9b65403c033fee36", "4c7f39566e9c44c8840a8fbdf4af4927", "2b2f87c2c83b4e708dd9e03e56cb8956", "54893bd5490643fba052ee0e5effe686", "7bde998b894c46679c12fbc4c7bdeb9f", "97bd2a6fda164953b97c79075e53b5e5", "4a625d3edd18469abf0c7bf808bd1aa2" ] }, "id": "sT4ax8ScOUcs", "outputId": "b6d1f587-6b7d-45c0-f21e-dfeca06302e9" }, "execution_count": 5, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Extracting pages: 0%| | 0/275 [00:00 int:\n", " return len(tokenizer.encode(s, add_special_tokens=False))\n", "\n", "def split_by_tokens(s: str, max_len: int, overlap_tokens: int = 64):\n", " ids = tokenizer.encode(s, add_special_tokens=False)\n", " if len(ids) <= max_len:\n", " return [s.strip()]\n", " overlap_tokens = max(0, min(overlap_tokens, max_len // 3))\n", " step = max(1, max_len - overlap_tokens)\n", " parts = []\n", " for i in range(0, len(ids), step):\n", " chunk_ids = ids[i:i+max_len]\n", " if not chunk_ids:\n", " continue\n", " t = tokenizer.decode(chunk_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True).strip()\n", " if t:\n", " parts.append(t)\n", " return parts\n", "\n", "def chunk_text(text: str, max_input_tokens: int = EFFECTIVE_MAX_INPUT, overlap_sentences: int = OVERLAP_SENTENCES):\n", " \"\"\"\n", " Professional chunking:\n", " - pack sentences under token limit\n", " - add sentence overlap between chunks for continuity\n", " - if a single sentence is too long => token-split it\n", " \"\"\"\n", " text = normalize_text(text)\n", " if not text:\n", " return []\n", "\n", " chunks = []\n", " cur_sents, cur_tok = [], 0\n", "\n", " def flush():\n", " nonlocal cur_sents, cur_tok\n", " if cur_sents:\n", " ch = \" \".join(cur_sents).strip()\n", " if ch:\n", " chunks.append(ch)\n", " cur_sents, cur_tok = [], 0\n", "\n", " for para in iter_paragraphs(text):\n", " for sent in split_sentences(para):\n", " st = sent.strip()\n", " if not st:\n", " continue\n", " st_tok = tok_len(st)\n", "\n", " if st_tok > max_input_tokens:\n", " flush()\n", " chunks.extend(split_by_tokens(st, max_len=max_input_tokens, overlap_tokens=64))\n", " continue\n", "\n", " if cur_tok + st_tok <= max_input_tokens:\n", " cur_sents.append(st)\n", " cur_tok += st_tok\n", " else:\n", " prev = cur_sents[:]\n", " flush()\n", " overlap = prev[-overlap_sentences:] if overlap_sentences and prev else []\n", " cur_sents = overlap + [st]\n", " cur_tok = tok_len(\" \".join(cur_sents))\n", "\n", " flush()\n", " return chunks" ], "metadata": { "id": "-YXsLU2-OUXN" }, "execution_count": 11, "outputs": [] }, { "cell_type": "code", "source": [ "# =========================\n", "# Cell 7 — Summarization helpers (map -> reduce) + \"organized big summary\"\n", "# =========================\n", "@torch.no_grad()\n", "def generate_summaries(texts, min_new_tokens, max_new_tokens, batch_size=BATCH_SIZE):\n", " outs = []\n", " for i in range(0, len(texts), batch_size):\n", " batch = texts[i:i+batch_size]\n", " enc = tokenizer(\n", " batch, return_tensors=\"pt\",\n", " truncation=True, padding=True,\n", " max_length=EFFECTIVE_MAX_INPUT\n", " ).to(device)\n", "\n", " try:\n", " gen = model.generate(\n", " **enc,\n", " num_beams=NUM_BEAMS,\n", " no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE,\n", " min_new_tokens=min_new_tokens,\n", " max_new_tokens=max_new_tokens,\n", " early_stopping=EARLY_STOPPING,\n", " )\n", " except TypeError:\n", " # fallback for older transformers\n", " gen = model.generate(\n", " **enc,\n", " num_beams=NUM_BEAMS,\n", " no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE,\n", " min_length=min_new_tokens,\n", " max_length=max_new_tokens,\n", " early_stopping=EARLY_STOPPING,\n", " )\n", "\n", " decoded = tokenizer.batch_decode(gen, skip_special_tokens=True, clean_up_tokenization_spaces=True)\n", " outs.extend([d.strip() for d in decoded])\n", " return outs\n", "\n", "def summarize_long_text(text: str, min_new: int, max_new: int):\n", " \"\"\"\n", " Summarize very long text reliably:\n", " - chunk -> summarize each chunk\n", " - if multiple chunk summaries, reduce them into one (still ordered)\n", " \"\"\"\n", " chunks = chunk_text(text)\n", " if not chunks:\n", " return \"\"\n", "\n", " # summarize chunks\n", " chunk_summaries = []\n", " for ch in chunks:\n", " tlen = tok_len(ch)\n", " # dynamic summary size per chunk (keeps it detailed)\n", " dyn_max = int(min(max_new, max(min_new, round(tlen * 0.18))))\n", " dyn_min = max(30, min(min_new, dyn_max - 10))\n", " chunk_summaries.append(generate_summaries([ch], dyn_min, dyn_max, batch_size=1)[0])\n", "\n", " if len(chunk_summaries) == 1:\n", " return chunk_summaries[0]\n", "\n", " # reduce in groups (keeps order)\n", " current = chunk_summaries\n", " for _ in range(6):\n", " combined = \"\\n\".join([f\"Part {i+1}: {t}\" for i, t in enumerate(current)])\n", " if tok_len(combined) <= EFFECTIVE_MAX_INPUT:\n", " return generate_summaries([combined], min_new, max_new, batch_size=1)[0]\n", "\n", " # too long -> chunk combined summaries and summarize each chunk\n", " sub_chunks = chunk_text(combined, overlap_sentences=1)\n", " current = generate_summaries(\n", " sub_chunks,\n", " min_new_tokens=max(60, min_new // 2),\n", " max_new_tokens=max(180, max_new // 2),\n", " batch_size=BATCH_SIZE\n", " )\n", " return \"\\n\".join(current).strip()\n", "\n", "def make_big_book_summary(chapter_summaries, parts=BOOK_PARTS):\n", " \"\"\"\n", " Organized \"big\" summary:\n", " - group chapter summaries into N parts\n", " - summarize each group into a longer part-summary\n", " - output stays structured and chronological\n", " \"\"\"\n", " chap_summaries = [s for s in chapter_summaries if s.strip()]\n", " if not chap_summaries:\n", " return []\n", "\n", " n = len(chap_summaries)\n", " group_size = max(1, ceil(n / parts))\n", " groups = [chap_summaries[i:i+group_size] for i in range(0, n, group_size)]\n", "\n", " part_summaries = []\n", " for gi, g in enumerate(tqdm(groups, desc=\"Building big organized summary\")):\n", " combined = \"\\n\".join([f\"ChapterSummary {gi+1}.{i+1}: {t}\" for i, t in enumerate(g)])\n", " ps = summarize_long_text(combined, min_new=220, max_new=520)\n", " part_summaries.append(ps.strip())\n", " return part_summaries" ], "metadata": { "id": "wA5TgbUROUUi" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "# =========================\n", "# Cell 8 — RUN: chapter summaries + big organized summary + save all outputs\n", "# =========================\n", "chapters = split_into_chapters(BOOK_TEXT)\n", "print(\"Detected chapters:\", len(chapters))\n", "print(\"First chapter title:\", chapters[0][0])\n", "\n", "# Save chapters as separate txt files (for debugging)\n", "chapters_dir = OUTPUT_DIR / f\"{BOOK_TXT_PATH.stem}_chapters\"\n", "chapters_dir.mkdir(parents=True, exist_ok=True)\n", "\n", "chapter_summaries = []\n", "chapter_meta = []\n", "\n", "for idx, (title, body) in enumerate(tqdm(chapters, desc=\"Summarizing chapters\")):\n", " safe_title = re.sub(r\"[^A-Za-z0-9 _-]+\", \"\", title)[:80].strip().replace(\" \", \"_\")\n", " ch_txt_path = chapters_dir / f\"{idx+1:03d}_{safe_title or 'CHAPTER'}.txt\"\n", " ch_txt_path.write_text(body, encoding=\"utf-8\")\n", "\n", " # chapter summary (detailed)\n", " # (إذا الفصل طويل جدًا summarize_long_text هيعمل chunking داخليًا)\n", " summary = summarize_long_text(\n", " body,\n", " min_new=CHAPTER_MIN_NEW_TOKENS_FLOOR,\n", " max_new=CHAPTER_MAX_NEW_TOKENS_CAP\n", " )\n", "\n", " chapter_summaries.append(summary)\n", " chapter_meta.append({\"index\": idx+1, \"title\": title, \"txt_path\": str(ch_txt_path)})\n", "\n", "# 1) Save per-chapter summaries (organized)\n", "chapter_summaries_path = OUTPUT_DIR / f\"{BOOK_TXT_PATH.stem}.chapter_summaries.txt\"\n", "with chapter_summaries_path.open(\"w\", encoding=\"utf-8\") as f:\n", " for i, (meta, summ) in enumerate(zip(chapter_meta, chapter_summaries), start=1):\n", " f.write(f\"===== CHAPTER {i}: {meta['title']} =====\\n\")\n", " f.write(summ.strip() + \"\\n\\n\")\n", "\n", "# 2) Save \"big organized book summary\" (multi-part, محترم وكبير)\n", "big_parts = make_big_book_summary(chapter_summaries, parts=BOOK_PARTS)\n", "big_summary_path = OUTPUT_DIR / f\"{BOOK_TXT_PATH.stem}.BIG_book_summary_parts.txt\"\n", "big_summary_path.write_text(\n", " \"\\n\\n\".join([f\"=== BOOK SUMMARY PART {i+1} ===\\n{p}\" for i, p in enumerate(big_parts)]),\n", " encoding=\"utf-8\"\n", ")\n", "\n", "# 3) Also save a single-file \"full\" summary by concatenating chapter summaries (very long, but super clear)\n", "full_concat_path = OUTPUT_DIR / f\"{BOOK_TXT_PATH.stem}.FULL_chapter_summaries_concat.txt\"\n", "full_concat_path.write_text(\"\\n\\n\".join(chapter_summaries), encoding=\"utf-8\")\n", "\n", "# 4) Metadata\n", "meta_path = OUTPUT_DIR / f\"{BOOK_TXT_PATH.stem}.meta.json\"\n", "meta_path.write_text(json.dumps({\n", " \"input_file\": str(INPUT_PATH),\n", " \"book_txt\": str(BOOK_TXT_PATH),\n", " \"model\": MODEL_NAME,\n", " \"device\": device,\n", " \"chapters_detected\": len(chapters),\n", " \"chapter_files_dir\": str(chapters_dir),\n", " \"outputs\": {\n", " \"chapter_summaries\": str(chapter_summaries_path),\n", " \"big_book_summary_parts\": str(big_summary_path),\n", " \"full_concat\": str(full_concat_path),\n", " }\n", "}, ensure_ascii=False, indent=2), encoding=\"utf-8\")\n", "\n", "print(\"\\nSaved outputs:\")\n", "print(\" - Chapter summaries:\", chapter_summaries_path)\n", "print(\" - BIG organized parts:\", big_summary_path)\n", "print(\" - FULL concat:\", full_concat_path)\n", "print(\" - Meta:\", meta_path)\n", "\n", "print(\"\\nPreview BIG summary part 1:\\n\")\n", "print(big_parts[0][:1500] if big_parts else \"N/A\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 309, "referenced_widgets": [ "5ab4dd0db7ea444780a43b31823ac8b4", "aa198fc463d34ebf91fa8d0c5a0fd796", "b7383ab084034dfdbd979d2572514114", "17453ebf93ab47b28984657e1e33c3f4", "4661fa64489f499e9f4cc4c94973a733", "0f4795ba0d9a4d7587b79bfd44d0c412", "95f0d14f4ff64bc6a1ca68e936c1f57e", "21c899d6314e4f39ac51787659348918", "469438ba3a8040669961be285cde7261", "3a30fc7003414385a0ea5bee6fd4fffb", "86c4b494d5354ee097efcc42ef08261c", "8902daf7a1ce4cb9ab41f148fb15e64b", "a6f33682264a48659521aff622244258", "77e0551a12a744e7bc232047c71c94a8", "6116184457a644d6a85ed024e2d11b18", "0093d035e241484f828ab4b62b9674ae", "f3bdd3f55bc543e5a07707e4857afa36", "285b043225b1474db5a58608b2ec940c", "81e3478fe6a04384a7c054c9aad0c215", "73a22c3867044f008471a022fde09548", "66532e20e18e4160b1bf25e1857a4078", "b8ffee0f3cd043468951fedc96d0b9d1" ] }, "id": "CkJ5UGcoOUR6", "outputId": "a48c0377-84e4-4a96-fb46-eca2504a44e6" }, "execution_count": 13, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Detected chapters: 17\n", "First chapter title: CHAPTER ONE\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Summarizing chapters: 0%| | 0/17 [00:00" ], "application/javascript": [ "\n", " async function download(id, filename, size) {\n", " if (!google.colab.kernel.accessAllowed) {\n", " return;\n", " }\n", " const div = document.createElement('div');\n", " const label = document.createElement('label');\n", " label.textContent = `Downloading \"${filename}\": `;\n", " div.appendChild(label);\n", " const progress = document.createElement('progress');\n", " progress.max = size;\n", " div.appendChild(progress);\n", " document.body.appendChild(div);\n", "\n", " const buffers = [];\n", " let downloaded = 0;\n", "\n", " const channel = await google.colab.kernel.comms.open(id);\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", "\n", " for await (const message of channel.messages) {\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", " if (message.buffers) {\n", " for (const buffer of message.buffers) {\n", " buffers.push(buffer);\n", " downloaded += buffer.byteLength;\n", " progress.value = downloaded;\n", " }\n", " }\n", " }\n", " const blob = new Blob(buffers, {type: 'application/binary'});\n", " const a = document.createElement('a');\n", " a.href = window.URL.createObjectURL(blob);\n", " a.download = filename;\n", " div.appendChild(a);\n", " a.click();\n", " div.remove();\n", " }\n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "application/javascript": [ "download(\"download_21b1dac7-802d-42cc-aa63-a5175256f68b\", \"litvision_output.zip\", 750893135)" ] }, "metadata": {} } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "fIi8im7HS2h1" }, "execution_count": null, "outputs": [] } ] }