XA7 commited on
Commit
e037628
·
1 Parent(s): 40c9e65
.dockerignore ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ venv/
8
+ env/
9
+ ENV/
10
+
11
+ # Git
12
+ .git/
13
+ .gitignore
14
+
15
+ # Environment files
16
+ .env
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+ *.swp
22
+ *.swo
23
+
24
+ # OS
25
+ .DS_Store
26
+ Thumbs.db
27
+
28
+ # Documentation and notes
29
+ *.md
30
+ !README.md
31
+
32
+ # Images and media (optional - remove if needed in the app)
33
+ *.jpg
34
+ *.jpeg
35
+ *.png
36
+ image.jpg
37
+
38
+ # Temporary files
39
+ *.pickle
40
+ token.pickle
41
+
42
+ # Scripts
43
+ *.sh
44
+
45
+ # Logs
46
+ *.log
.gitignore ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ dooms_day.sh
6
+ /dooms_day.sh
7
+ dooms_day.sh/
8
+ dooms_day.sh*
9
+ /Story Sessions
10
+ /Story-Generation
11
+ asd/
12
+ /asd/
13
+ /asd
14
+ # C extensions
15
+ *.so
16
+ venv
17
+ # Distribution / packaging
18
+ .Python
19
+ build/
20
+ develop-eggs/
21
+ dist/
22
+ downloads/
23
+ eggs/
24
+ .eggs/
25
+ lib/
26
+ lib64/
27
+ parts/
28
+ sdist/
29
+ var/
30
+ wheels/
31
+ share/python-wheels/
32
+ *.egg-info/
33
+ .installed.cfg
34
+ *.egg
35
+ MANIFEST
36
+
37
+ # PyInstaller
38
+ # Usually these files are written by a python script from a template
39
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
40
+ *.manifest
41
+ *.spec
42
+
43
+ # Installer logs
44
+ pip-log.txt
45
+ pip-delete-this-directory.txt
46
+
47
+ # Unit test / coverage reports
48
+ htmlcov/
49
+ .tox/
50
+ .nox/
51
+ .coverage
52
+ .coverage.*
53
+ .cache
54
+ nosetests.xml
55
+ coverage.xml
56
+ *.cover
57
+ *.py,cover
58
+ .hypothesis/
59
+ .pytest_cache/
60
+ cover/
61
+
62
+ # Translations
63
+ *.mo
64
+ *.pot
65
+
66
+ # Django stuff:
67
+ *.log
68
+ local_settings.py
69
+ db.sqlite3
70
+ db.sqlite3-journal
71
+
72
+ # Flask stuff:
73
+ instance/
74
+ .webassets-cache
75
+
76
+ # Scrapy stuff:
77
+ .scrapy
78
+
79
+ # Sphinx documentation
80
+ docs/_build/
81
+
82
+ # PyBuilder
83
+ .pybuilder/
84
+ target/
85
+
86
+ # Jupyter Notebook
87
+ .ipynb_checkpoints
88
+
89
+ # IPython
90
+ profile_default/
91
+ ipython_config.py
92
+
93
+ # pyenv
94
+ # For a library or package, you might want to ignore these files since the code is
95
+ # intended to run in multiple environments; otherwise, check them in:
96
+ # .python-version
97
+
98
+ # pipenv
99
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
101
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
102
+ # install all needed dependencies.
103
+ #Pipfile.lock
104
+
105
+ # UV
106
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
107
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
108
+ # commonly ignored for libraries.
109
+ #uv.lock
110
+
111
+ # poetry
112
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
113
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
114
+ # commonly ignored for libraries.
115
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
116
+ #poetry.lock
117
+
118
+ # pdm
119
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
120
+ #pdm.lock
121
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
122
+ # in version control.
123
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
124
+ .pdm.toml
125
+ .pdm-python
126
+ .pdm-build/
127
+
128
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
129
+ __pypackages__/
130
+
131
+ # Celery stuff
132
+ celerybeat-schedule
133
+ celerybeat.pid
134
+
135
+ # SageMath parsed files
136
+ *.sage.py
137
+
138
+ # Environments
139
+ .env
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Ruff stuff:
179
+ .ruff_cache/
180
+
181
+ # PyPI configuration file
182
+ .pypirc
.idea/.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
.idea/Hekaya3.iml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="PyDocumentationSettings">
9
+ <option name="format" value="GOOGLE" />
10
+ <option name="myDocStringFormat" value="Google" />
11
+ </component>
12
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ <inspection_tool class="JSHint" enabled="true" level="ERROR" enabled_by_default="true" />
6
+ <inspection_tool class="JupyterPackageInspection" enabled="false" level="WARNING" enabled_by_default="false" />
7
+ <inspection_tool class="PyInterpreterInspection" enabled="false" level="WARNING" enabled_by_default="false" />
8
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false" />
9
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
10
+ <option name="ignoredErrors">
11
+ <list>
12
+ <option value="N806" />
13
+ </list>
14
+ </option>
15
+ </inspection_tool>
16
+ <inspection_tool class="PyShadowingNamesInspection" enabled="true" level="TEXT ATTRIBUTES" enabled_by_default="true" editorAttributes="CONSIDERATION_ATTRIBUTES" />
17
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="false" level="WARNING" enabled_by_default="false" />
18
+ </profile>
19
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Hekaya3.iml" filepath="$PROJECT_DIR$/.idea/Hekaya3.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10 slim image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies for OpenCV and other libraries
8
+ RUN apt-get update && apt-get install -y \
9
+ libgl1 \
10
+ libglib2.0-0 \
11
+ libgomp1 \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy requirements first for better caching
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application files
21
+ COPY . .
22
+
23
+ # Create necessary directories
24
+ RUN mkdir -p content Story-Generation
25
+
26
+ # Expose Gradio default port
27
+ EXPOSE 7860
28
+
29
+ # Set environment variables for Gradio
30
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
31
+ ENV GRADIO_SERVER_PORT=7860
32
+
33
+ # Run the application
34
+ CMD ["python", "app.py"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
TimeLog.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Key Performance Insights
2
+
3
+ ## 🔴 Slowest Functions
4
+ - **generate_direct_comic**: 118.74s total (2 calls, avg 59.37s)
5
+ - **generate_comic**: 76.33s total (2 calls, avg 38.16s)
6
+ - **generate_image_fn**: 76.20s total (2 calls, avg 38.10s)
7
+ - **generate_image_narration**: 36.40s total (2 calls, avg 18.20s)
8
+
9
+ ## 🟡 Medium Duration Functions
10
+ - **enhance_user_story**: 10.01s total (6 calls, avg 1.67s)
11
+ - **extract_comic_scenes**: 2.71s total (2 calls, avg 1.35s)
12
+
13
+ ## 🟢 Fast Functions
14
+ - All other functions executed in **milliseconds (< 0.01s each)**
15
+
16
+ ---
17
+
18
+ # Notable Observations
19
+
20
+ - **Huge Performance Difference**:
21
+ The second run of `generate_image_fn` took **73.82s** vs only **2.38s** in the first run – likely due to the API error in the first attempt vs successful generation in the second.
22
+
23
+ - **Error Impact**:
24
+ The first `generate_direct_comic` run (**22.15s**) was much faster than the second (**96.58s**) because the first failed at image generation due to the OpenAI verification error.
25
+
26
+ - **Most Function Calls**:
27
+ `enhance_user_story` and `_create_detail_focused_enhancement_prompt` were called **6 times each**, indicating retry logic during the enhancement process.
28
+
29
+ - **Total Runtime**:
30
+ The entire process took approximately **5.35 minutes (320.39s)** across **48 function calls**.
31
+
32
+ ---
33
+
34
+ ⚠️ **Main Bottleneck**: The **image generation and processing pipeline** accounts for **over 90%** of the total execution time.
TimeLog2.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ⏱️ Story Generation Time Log
2
+
3
+ ---
4
+
5
+ ## ✨ Story Enhancement
6
+ - 📝 **_create_detail_focused_enhancement_prompt (1)** → 0.0000s
7
+ - 📝 **_create_detail_focused_enhancement_prompt (2)** → 0.0000s
8
+ - 📝 **_create_detail_focused_enhancement_prompt (3)** → 0.0000s
9
+ - 🚀 **enhance_user_story (final)** → 0.0005s
10
+ - ⏳ **enhance_user_story (attempt span)** → 2.0014s
11
+ - ⏳ **enhance_user_story (overall span)** → 3.0035s
12
+
13
+ ---
14
+
15
+ ## 🏗️ Layout & Description Generation
16
+ - 🔢 **_find_all_factorizations** → 0.0000s
17
+ - 🧮 **_calculate_optimal_layout** → 0.0000s
18
+ - 📐 **_calculate_optimal_grid_layout** → 0.0010s
19
+ - 📝 **_get_optimal_layout_description** → 0.0010s
20
+ - 📖 **_create_detailed_story_description** → 0.0000s
21
+ - 🎨 **_get_enhanced_style_specifications** → 0.0000s
22
+ - 🔢 **_find_all_factorizations (2nd)** → 0.0000s
23
+ - 🧮 **_calculate_optimal_layout (2nd)** → 0.0000s
24
+ - 📐 **_calculate_optimal_grid_layout (2nd)** → 0.0000s
25
+ - 📊 **_create_comprehensive_technical_specifications** → 0.0010s
26
+ - 🛠️ **_create_advanced_quality_and_flow_instructions** → 0.0000s
27
+ - ✂️ **_assemble_prompt_with_smart_truncation** → 0.0000s
28
+ - 🖼️ **_create_comic_prompt** → 0.0020s
29
+
30
+ ---
31
+
32
+ ## 🎨 Comic Generation
33
+ - 🖌️ **generate_image_fn** → 69.6515s
34
+ - 🎭 **generate_comic** → 69.7712s
35
+ - 🎤 **generate_image_narration** → 29.2157s
36
+ - 📚 **generate_direct_comic** → 101.9924s
37
+
38
+ ---
39
+
40
+ ## 📸 Scene Extraction
41
+ - 🗂️ **extract_comic_scenes (1st run)** → 2.4218s
42
+ - 🗂️ **extract_comic_scenes (2nd run)** → 2.7060s
43
+
44
+ ---
45
+
46
+ # 📊 Performance Overview
47
+ - ✨ **Enhancement Total:** ~3.0s
48
+ - 🏗️ **Layout/Description Total:** ~0.005s
49
+ - 🎨 **Comic Generation Total:** ~201.6s
50
+ - 📸 **Scene Extraction Total:** ~5.1s
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from google.generativeai import configure
4
+ from ui.story_interface import create_story_interface
5
+ import config
6
+
7
+ def main():
8
+ # Configure Google API
9
+ configure(api_key=config.GOOGLE_API_KEY)
10
+
11
+ # Custom CSS for better narration display
12
+ custom_css = """
13
+ .story-narration-box {
14
+ background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%) !important;
15
+ border: 3px solid #6366f1 !important;
16
+ border-radius: 16px !important;
17
+ padding: 24px !important;
18
+ margin: 24px 0 !important;
19
+ box-shadow: 0 8px 20px rgba(99, 102, 241, 0.15) !important;
20
+ line-height: 1.8 !important;
21
+ color: #1e293b !important;
22
+ font-size: 16px !important;
23
+ min-height: 120px !important;
24
+ }
25
+
26
+ .story-narration-box h3 {
27
+ color: #4338ca !important;
28
+ border-bottom: 3px solid #6366f1 !important;
29
+ padding-bottom: 12px !important;
30
+ margin-bottom: 20px !important;
31
+ font-size: 20px !important;
32
+ font-weight: bold !important;
33
+ text-align: center !important;
34
+ }
35
+
36
+ .story-narration-box p {
37
+ margin-bottom: 16px !important;
38
+ text-align: left !important;
39
+ color: #334155 !important;
40
+ font-size: 16px !important;
41
+ line-height: 1.7 !important;
42
+ font-weight: 500 !important;
43
+ }
44
+
45
+ .story-narration-box div {
46
+ color: #334155 !important;
47
+ font-size: 16px !important;
48
+ }
49
+
50
+ #story_narration {
51
+ max-height: 600px !important;
52
+ overflow-y: auto !important;
53
+ border: 2px solid #e2e8f0 !important;
54
+ border-radius: 12px !important;
55
+ background-color: #f8fafc !important;
56
+ }
57
+
58
+ #scene_narration {
59
+ max-height: 500px !important;
60
+ overflow-y: auto !important;
61
+ border: 2px solid #e2e8f0 !important;
62
+ border-radius: 12px !important;
63
+ }
64
+
65
+ /* Ensure all text in narration boxes is visible and prominent */
66
+ #story_narration *, #scene_narration * {
67
+ color: #334155 !important;
68
+ font-size: 16px !important;
69
+ }
70
+
71
+ #story_narration h3, #scene_narration h3 {
72
+ color: #4338ca !important;
73
+ font-size: 20px !important;
74
+ text-align: center !important;
75
+ }
76
+
77
+ /* Style for quick narration lines */
78
+ .story-narration-box em {
79
+ color: #64748b !important;
80
+ font-style: italic !important;
81
+ text-align: center !important;
82
+ display: block !important;
83
+ margin: 20px 0 !important;
84
+ }
85
+ """
86
+
87
+ # Create and launch the Gradio interface
88
+ with gr.Blocks(
89
+ theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
90
+ css=custom_css
91
+ ) as demo:
92
+ create_story_interface(demo)
93
+
94
+ demo.launch(
95
+ server_name="0.0.0.0", # Allows external access
96
+ server_port=7860,
97
+ share=False, # Set to True if you want a public link
98
+ debug=True
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ main()
config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
7
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
8
+ TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
9
+ BFL_API_KEY = os.environ.get("BFL_API_KEY")
10
+ IMAGE_STYLE_INFO = {
11
+ "Comic Book Style": "Professional comic book art with bold outlines, vibrant colors, and dynamic action poses. Perfect for multi-panel consistency with uniform character designs across panels.",
12
+ "Manga Style": "Japanese manga aesthetic with distinctive toning, speed lines, and expressive character eyes. Excellent consistency for sequential storytelling with consistent character designs.",
13
+ "Cartoon Style": "Animated cartoon style with exaggerated features, bright colors, and simplified designs. Ideal for maintaining character consistency across multiple panels in one image.",
14
+ "Photorealistic": "Highly detailed photographic quality with accurate lighting, textures, and proportions. Consistent realistic style perfect for single-image multi-panel storytelling.",
15
+ "Cinematic Realism": "Movie-like visuals with dramatic lighting, depth of field, and realistic composition. Cinematic consistency across panels with unified lighting and color grading.",
16
+ "Digital Painting": "Digital art with realistic elements but visible brushwork and artistic interpretation. Consistent artistic style throughout multi-panel compositions."
17
+ }
18
+
19
+ IMAGE_STYLES = list(IMAGE_STYLE_INFO.keys())
20
+
21
+ DEFAULT_COMIC_SETTINGS = {
22
+ "num_panels": 12,
23
+ "max_scenes": 12,
24
+ "image_style": "Comic Book Style"
25
+ }
26
+
27
+ AGE_GROUP_INFO = {
28
+ "3-5 (Pre-school)": "Very simple vocabulary, short sentences, playful tone suitable for pre-school children.",
29
+ "6-8 (Kids)": "Simple vocabulary and sentences with lively, descriptive language for early readers.",
30
+ "9-12 (Pre-teen)": "Balanced vocabulary with engaging narrative style appropriate for pre-teens.",
31
+ "13-18 (Teen)": "Richer vocabulary, deeper themes and emotions suitable for teenagers.",
32
+ "18+ (Adult)": "Full vocabulary, complex themes, and immersive detail suitable for adults."
33
+ }
34
+
35
+ AGE_GROUPS = list(AGE_GROUP_INFO.keys())
content/log.txt ADDED
File without changes
content/logs.txt ADDED
The diff for this file is too large to render. See raw diff
 
handover.md ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Comic Story Generator: Code Handover Document
2
+
3
+ **Date:** 2025-7-22
4
+ **Document Purpose:** This document provides a comprehensive technical handover for the Comic Story Generator project. It is intended for developers and future maintainers responsible for the deployment, maintenance, and extension of the application.
5
+
6
+ ---
7
+
8
+ ## 1. Project Overview
9
+
10
+ The Comic Story Generator is a web application that automatically creates multi-page, textless comic stories from a user-provided description. The application leverages generative AI to produce visually coherent narratives, focusing on character consistency, expressive emotion, and logical panel sequencing.
11
+
12
+ ### 1.1. Core Functionality
13
+
14
+ The application is designed to translate a textual story concept into a purely visual comic strip. Key characteristics include:
15
+
16
+ * **AI-Powered Narrative:** Utilizes Google's Gemini to interpret the user's concept and break it down into a structured, panel-by-panel narrative.
17
+ * **Visual Generation:** Employs a GPT-based image model to render complete comic pages based on the AI-generated narrative structure.
18
+ * **Intelligent Panel Detection:** Uses Gemini Vision to analyze the generated full-page image and accurately detect the boundaries of each panel, ensuring precise splitting.
19
+ * **Customization:** Offers users control over the output, including:
20
+ * **Layout:** Choice of panel count (from 4 to 24).
21
+ * **Length:** Generation of 1 to 10 pages.
22
+ * **Art Style:** A selection of visual styles, including "Classic Comic," "Manga," "Cartoon," "Digital Paint," and a high-contrast "Accessible" style designed for users with special needs.
23
+
24
+ ### 1.2. High-Level Workflow
25
+
26
+ The generation process follows a clear, multi-step pipeline:
27
+
28
+ 1. **User Input:** The user submits a short description of the desired story.
29
+ 2. **Story Generation:** The `StoryGenerator` component uses Gemini to create a detailed, scene-by-scene description for each comic panel.
30
+ 3. **Page Generation:** The `ComicGenerator` takes the panel descriptions and instructs the GPT-Image model to generate a single, composite image representing a full comic page with panels arranged in a grid.
31
+ 4. **Layout Analysis:** The generated page is passed to the `GeminiVision` component, which analyzes the image to identify the precise coordinates and boundaries of each panel.
32
+ 5. **Panel Splitting:** The application uses the coordinates from the vision analysis to accurately split the composite image into individual panel images.
33
+ 6. **Final Output:** The processed panels are presented to the user as a complete, multi-page visual story.
34
+
35
+ ---
36
+
37
+ ## 2. System Architecture
38
+
39
+ The application is built on a modular architecture composed of three primary classes, each responsible for a distinct part of the generation pipeline.
40
+
41
+ ### 2.1. System Diagram
42
+
43
+ ```mermaid
44
+ classDiagram
45
+ class StoryGenerator{
46
+ +generate_story(description: string) : list[string]
47
+ +enhance_visuals(panel_descriptions: list) : list[string]
48
+ }
49
+ class ComicGenerator{
50
+ +generate_page(panel_descriptions: list) : Image
51
+ +split_panels(page_image: Image, grid_layout: dict) : list[Image]
52
+ }
53
+ class GeminiVision{
54
+ +analyze_layout(page_image: Image) : dict
55
+ }
56
+
57
+ StoryGenerator "1" -- "1" ComicGenerator : Provides panel descriptions
58
+ ComicGenerator "1" -- "1" GeminiVision : Uses for layout analysis
59
+ ```
60
+
61
+ ### 2.2. Data Flow
62
+
63
+ The end-to-end data flow illustrates the interaction between the user, the application, and the underlying AI models.
64
+
65
+ ```mermaid
66
+ sequenceDiagram
67
+ participant User
68
+ participant App
69
+ participant Gemini as Gemini (Text/Story)
70
+ participant GPTImage as GPT-Image (Visuals)
71
+ participant GeminiVision as Gemini Vision (Analysis)
72
+
73
+ User->>+App: Submits story description
74
+ App->>+Gemini: Requests story structure from description
75
+ Gemini-->>-App: Returns panel-by-panel text descriptions
76
+ App->>+GPTImage: Requests comic page generation from descriptions
77
+ GPTImage-->>-App: Returns single full-page image
78
+ App->>+GeminiVision: Requests layout analysis of the image
79
+ GeminiVision-->>-App: Returns coordinates of each panel
80
+ App->>User: Displays final, split-panel comic
81
+ ```
82
+
83
+ ---
84
+
85
+ ## 3. Setup & Installation
86
+
87
+ ### 3.1. Prerequisites
88
+
89
+ * **Python:** Version 3.9 or higher.
90
+ * **API Keys:**
91
+ * An active OpenAI API key.
92
+ * An active Google API key with access to the Gemini family of models.
93
+
94
+ ### 3.2. Installation Steps
95
+
96
+ 1. **Clone the Repository:**
97
+ ```bash
98
+ git clone https://github.com/yourusername/Comic-Story-Generator.git
99
+ cd Comic-Story-Generator
100
+ ```
101
+
102
+ 2. **Create and Activate a Virtual Environment:**
103
+ ```bash
104
+ # Create the environment
105
+ python -m venv venv
106
+
107
+ # Activate the environment (macOS/Linux)
108
+ source venv/bin/activate
109
+
110
+ # Or, activate on Windows
111
+ # venv\Scripts\activate
112
+ ```
113
+
114
+ 3. **Install Dependencies:**
115
+ ```bash
116
+ pip install -r requirements.txt
117
+ ```
118
+
119
+ 4. **Configure Environment Variables:**
120
+ Create a `.env` file in the project root and add your API keys.
121
+ ```bash
122
+ echo "OPENAI_API_KEY=your_openai_key" > .env
123
+ echo "GOOGLE_API_KEY=your_google_key" >> .env
124
+ ```
125
+ *Note: Ensure the `.env` file is added to your `.gitignore` file to prevent committing secrets.*
126
+
127
+ ---
128
+
129
+ ## 4. Environment Variables / Secrets
130
+
131
+ The application requires the following environment variables to be set in a `.env` file at the project's root.
132
+
133
+ | Variable | Description | Required | Example |
134
+ | :--- | :--- | :--- | :--- |
135
+ | `OPENAI_API_KEY` | API key for the OpenAI service, used for GPT-Image generation. | Yes | `sk-xxxxxxxxxxxxxxxxxxxxxxxx` |
136
+ | `GOOGLE_API_KEY` | API key for Google AI services, used for Gemini (story structure) and Gemini Vision (layout analysis). | Yes | `AIzaSyxxxxxxxxxxxxxxxxxxxxx` |
137
+
138
+ ---
139
+
140
+ ## 5. How to Run
141
+
142
+ After completing the setup and installation steps, launch the application with the following command from the project's root directory:
143
+
144
+ ```bash
145
+ python app.py
146
+ ```
147
+
148
+ The application will start a local web server, and the interface will be accessible at the URL provided in the console (typically `http://127.0.0.1:7860`).
149
+
150
+ ---
151
+
152
+ ## 6. Deployment Instructions
153
+
154
+ [TODO] This section requires documentation for deploying the application to a production environment. Steps should include:
155
+ * Recommended hosting provider (e.g., AWS, Heroku, DigitalOcean).
156
+ * Instructions for setting up a production-grade web server (e.g., Gunicorn).
157
+ * Configuration of a reverse proxy (e.g., Nginx).
158
+ * Management of production environment variables/secrets.
159
+ * Process management (e.g., using `systemd`).
160
+
161
+ ---
162
+
163
+ ## 7. Core Components & Logic
164
+
165
+ The application logic is encapsulated in three main classes.
166
+
167
+ ### 7.1. `StoryGenerator`
168
+
169
+ * **Responsibility:** Handles the narrative creation phase.
170
+ * **`generate_story()`:** Takes the raw user description as input. It constructs a prompt for the Gemini model to elicit a structured response containing a list of detailed text descriptions, one for each comic panel.
171
+ * **`enhance_visuals()`:** Processes the panel descriptions to add specific visual cues and optimizations, particularly for the "Accessible" style, ensuring high contrast and simplified object representation.
172
+
173
+ ### 7.2. `ComicGenerator`
174
+
175
+ * **Responsibility:** Manages the visual generation and processing of the comic page.
176
+ * **`generate_page()`:** Aggregates the panel descriptions from `StoryGenerator` into a single, complex prompt for the GPT-Image model. This prompt instructs the AI to create one composite image with all panels laid out in a grid.
177
+ * **`split_panels()`:** Receives the generated page image and the layout data from `GeminiVision`. It uses this data to crop the page into individual panel images with high precision.
178
+
179
+ ### 7.3. `GeminiVision`
180
+
181
+ * **Responsibility:** Performs visual analysis on the generated comic page.
182
+ * **`analyze_layout()`:** This is the core of the intelligent panel-splitting feature. It takes the full-page image as input and uses the Gemini Vision model to visually identify the boundaries of each panel. It returns a dictionary containing the coordinates and dimensions of the detected grid, which is more robust than assuming a fixed grid layout.
183
+
184
+ ---
185
+
186
+ ## 8. Third-party Dependencies
187
+
188
+ The complete list of Python packages is specified in `requirements.txt`. Key dependencies include:
189
+
190
+ * **`openai`**: Python client for the OpenAI API.
191
+ * **`google-generativeai`**: Python client for the Google AI (Gemini) API.
192
+ * **`python-dotenv`**: For loading environment variables from the `.env` file.
193
+ * **`Pillow`**: For image manipulation (cropping and saving).
194
+ * **[Info Needed]**: The web framework used to build `app.py` (e.g., `gradio`, `flask`, `fastapi`).
195
+
196
+ ---
197
+
198
+ ## 9. Testing Instructions
199
+
200
+ [TODO] A testing framework has not been established for this project. Future work should include:
201
+ * **Test Suite Setup:** Choose and configure a testing framework (e.g., `pytest`).
202
+ * **Unit Tests:** Create unit tests for individual methods in `StoryGenerator`, `ComicGenerator`, and `GeminiVision`. This should involve mocking the API calls to AI services to test the data processing logic in isolation.
203
+ * **Integration Tests:** Develop tests for the entire generation pipeline, from user input to final split panels.
204
+ * **Continuous Integration:** Set up a CI pipeline (e.g., using GitHub Actions) to run tests automatically on pull requests.
205
+
206
+ ---
207
+
208
+ ## 10. Troubleshooting & Common Issues
209
+
210
+ [TODO] This section should be populated as common issues are identified. Potential areas to document include:
211
+ * **API Key Errors:** Steps to verify that API keys are correctly configured and have the necessary permissions.
212
+ * **Incoherent Stories:** Guidance on how to write effective initial descriptions to improve narrative quality.
213
+ * **Poor Panel Splitting:** Troubleshooting steps for when Gemini Vision fails to detect the layout correctly (e.g., checking image complexity, trying a different art style).
214
+ * **Long Generation Times:** Explanation of typical performance and factors that can cause delays (e.g., API provider latency, number of panels).
215
+
216
+ ---
217
+
218
+ ## 11. TODOs / Future Work
219
+
220
+ Based on the project's focus areas, the following are key areas for future development and contribution:
221
+
222
+ * **Core Generation Logic:**
223
+ * Improve character consistency across multiple pages.
224
+ * Experiment with different AI models for potentially better visual or narrative results.
225
+ * Add support for including text (dialogue, captions) as an optional feature.
226
+ * **UI/UX Enhancements:**
227
+ * Develop a more interactive interface for viewing and arranging panels.
228
+ * Allow users to regenerate individual panels without restarting the entire process.
229
+ * Add an option to export the final comic as a PDF or other formats.
230
+ * **Accessibility Improvements:**
231
+ * Further refine the "Accessible" art style based on user feedback.
232
+ * Implement ARIA attributes and ensure full keyboard navigability for the web interface.
233
+ * Add an "image description" feature where a text-to-speech engine can describe the generated panels.
234
+ * **Documentation:**
235
+ * Create a detailed API reference for developers looking to build on the platform.
236
+ * Write user-facing guides on how to get the best results from the generator.
237
+
238
+ ---
239
+
240
+ ## 12. Contact / Ownership Info
241
+
242
+ * **Source Code:** [https://github.com/yourusername/Comic-Story-Generator](https://github.com/yourusername/Comic-Story-Generator)
243
+ * **License:** This project is licensed under the **MIT License**. For full details, see the `LICENSE` file in the repository.
244
+ * **Primary Contact:** [Info Needed: Add primary maintainer's name and contact information (e.g., GitHub handle or email).]
image.jpg ADDED
kill.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo "Killing all Python processes..."
4
+ pkill -f python
models/__init__.py ADDED
File without changes
models/comic_image_generator.py ADDED
@@ -0,0 +1,1292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import base64
3
+ import os
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ import config
6
+ import warnings
7
+ import textwrap
8
+ from pathlib import Path
9
+ import time
10
+ from models.image_generation import generate_image_fn
11
+ from google.generativeai import GenerativeModel
12
+ import json
13
+ import re
14
+ import tempfile
15
+ import shutil
16
+ from google.generativeai.types import GenerationConfig
17
+ from utils.comic_panel_splitter import split_comic_panels
18
+ import cv2
19
+ import numpy as np
20
+ from datetime import datetime
21
+ warnings.filterwarnings("ignore", message="IMAGE_SAFETY is not a valid FinishReason")
22
+
23
+
24
+ def log_execution(func):
25
+ def wrapper(*args, **kwargs):
26
+ start_time = time.time()
27
+ start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
28
+
29
+ result = func(*args, **kwargs)
30
+
31
+ end_time = time.time()
32
+ end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
33
+ duration = end_time - start_time
34
+
35
+ # Write to file (works in Colab)
36
+ with open('content/logs.txt', 'a') as f:
37
+ f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
38
+
39
+ # Also print to see output immediately
40
+ print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
41
+
42
+ return result
43
+ return wrapper
44
+
45
+ class ComicImageGenerator:
46
+ """
47
+ Generates a comic-style image.
48
+ """
49
+
50
+ def __init__(self):
51
+ pass
52
+
53
+
54
+ @log_execution
55
+ def generate_comic(self, story_data, output_path=None, style=None):
56
+ """
57
+ Generate a comic-style image based on the provided story data.
58
+
59
+ Args:
60
+ story_data: Dictionary containing the story information
61
+ output_path: Optional path to save the resulting image
62
+ style: Optional comic style to use
63
+
64
+ Returns:
65
+ PIL.Image.Image: The comic image
66
+ str: Base64 encoded data URL of the image
67
+ """
68
+ title = story_data.get("title", "My Story")
69
+ description = story_data.get("description", "")
70
+ characters = story_data.get("characters", [])
71
+ settings = story_data.get("settings", [])
72
+ num_scenes = 9
73
+
74
+ prompt = self._create_comic_prompt(title, description, characters, settings, style, num_scenes)
75
+
76
+ try:
77
+ print(f"Generating comic with {num_scenes} scenes...")
78
+
79
+ comic_image = generate_image_fn(
80
+ selected_prompt=prompt,
81
+ output_path=output_path
82
+ )
83
+
84
+ if comic_image is None:
85
+ comic_image = self._create_placeholder_comic(title, description)
86
+
87
+ if output_path:
88
+ directory = os.path.dirname(output_path)
89
+ if directory and not os.path.exists(directory):
90
+ os.makedirs(directory)
91
+ comic_image.save(output_path)
92
+
93
+ buffered = io.BytesIO()
94
+ comic_image.save(buffered, format="PNG")
95
+ img_bytes = buffered.getvalue()
96
+ img_b64 = base64.b64encode(img_bytes).decode("utf-8")
97
+ data_url = f"data:image/png;base64,{img_b64}"
98
+
99
+ return comic_image, data_url
100
+
101
+ except Exception as e:
102
+ print(f"Error generating comic: {str(e)}")
103
+ placeholder = self._create_placeholder_comic(title, description)
104
+
105
+ if output_path:
106
+ directory = os.path.dirname(output_path)
107
+ if directory and not os.path.exists(directory):
108
+ os.makedirs(directory)
109
+ placeholder.save(output_path)
110
+
111
+ buffered = io.BytesIO()
112
+ placeholder.save(buffered, format="PNG")
113
+ img_bytes = buffered.getvalue()
114
+ img_b64 = base64.b64encode(img_bytes).decode("utf-8")
115
+ data_url = f"data:image/png;base64,{img_b64}"
116
+
117
+ return placeholder, data_url
118
+
119
+ @log_execution
120
+ def _create_comic_prompt(self, title, description, characters=None, settings=None, style=None, num_scenes=1):
121
+ """
122
+ Create a sophisticated, optimized prompt for comic generation with advanced visual consistency techniques.
123
+ Specialized for high-quality multi-panel storytelling with perfect character continuity.
124
+
125
+ Args:
126
+ title: Title of the story
127
+ description: Visual description of the story
128
+ characters: List of character data
129
+ settings: List of setting data
130
+ style: Optional visual style
131
+ num_scenes: Number of scenes to include (1-24)
132
+
133
+ Returns:
134
+ str: Advanced prompt optimized for professional comic generation with smart detail preservation
135
+ """
136
+
137
+ priority_sections = []
138
+
139
+ layout_specs = self._get_optimal_layout_description(num_scenes)
140
+ priority_sections.append(f"CRITICAL LAYOUT: {layout_specs}")
141
+
142
+ if num_scenes >= 20:
143
+ compact_instructions = [
144
+ "🎯 COMPACT SCENE MASTERY FOR 20 PANELS:",
145
+ "SMALL EFFICIENT SCENES: Each panel must tell its story moment with maximum visual economy - focus on ONE key action, emotion, or story beat per panel",
146
+ "CLEAR FOCAL POINTS: Every panel needs ONE main subject in sharp focus with minimal background distractions",
147
+ "ESSENTIAL ELEMENTS ONLY: Include only the most crucial visual elements needed to advance the story - remove ALL unnecessary details",
148
+ "READABLE AT SMALL SIZE: No text text, expressions, and actions must be clearly visible even when the panel is small - use bold, simple compositions"
149
+ ]
150
+ priority_sections.extend(compact_instructions)
151
+
152
+ if characters:
153
+ character_details = self._create_detailed_character_specifications(characters, num_scenes)
154
+ priority_sections.extend(character_details)
155
+
156
+ enhanced_story = self._create_detailed_story_description(description, title)
157
+ priority_sections.append(enhanced_story)
158
+
159
+ if settings:
160
+ environment_details = self._create_detailed_environment_specifications(settings, num_scenes)
161
+ priority_sections.extend(environment_details)
162
+
163
+ technical_specs = self._create_comprehensive_technical_specifications(style, num_scenes)
164
+ priority_sections.extend(technical_specs)
165
+
166
+ quality_flow = self._create_advanced_quality_and_flow_instructions(num_scenes)
167
+ priority_sections.extend(quality_flow)
168
+
169
+ assembled_prompt = self._assemble_prompt_with_smart_truncation(priority_sections)
170
+
171
+ if self.generate_panel_descriptions(assembled_prompt):
172
+ final_prompt = self.generate_panel_descriptions(assembled_prompt)
173
+ else :
174
+ final_prompt = assembled_prompt
175
+
176
+ print(f"\n XXXXXX {final_prompt} XXXXXX \n")
177
+
178
+ return final_prompt
179
+
180
+ @log_execution
181
+ def _create_detailed_character_specifications(self, characters, num_scenes):
182
+ """Create extremely detailed character specifications prioritizing visual consistency."""
183
+ char_specs = []
184
+
185
+ char_specs.append("🎭 CRITICAL CHARACTER CONSISTENCY PROTOCOL:")
186
+ char_specs.append("ABSOLUTE REQUIREMENT: Characters MUST look identical in every single panel - same face, hair, clothes, proportions, expressions style")
187
+
188
+ for i, character in enumerate(characters[:3]):
189
+ if isinstance(character, dict) and "visual_description" in character:
190
+ char_name = character.get("name", f"Character_{i+1}")
191
+ char_desc = character["visual_description"]
192
+
193
+ char_spec = f"CHARACTER {i+1} - {char_name}: {char_desc}"
194
+
195
+ if "traits" in character and character["traits"]:
196
+ traits = character["traits"][:5]
197
+ char_spec += f" | DISTINCTIVE FEATURES: {', '.join(traits)}"
198
+
199
+ char_spec += f" | CONSISTENCY RULE: This exact appearance must be maintained across all {num_scenes} panels with zero variation in facial features, hair, clothing, or body proportions"
200
+
201
+ char_specs.append(char_spec)
202
+
203
+ if len([c for c in characters[:3] if isinstance(c, dict) and 'visual_description' in c]) > 1:
204
+ char_specs.append(f"MULTI-CHARACTER RULE: All characters must maintain their exact individual appearances simultaneously across all {num_scenes} panels - no character design drift allowed")
205
+
206
+ return char_specs
207
+
208
+ @log_execution
209
+ def _create_detailed_story_description(self, description, title):
210
+ """Create enhanced story description with preserved important details."""
211
+ story_elements = []
212
+
213
+ enhanced_desc = f"STORY CONTENT: {title} - {description}"
214
+
215
+ enhanced_desc += " | VISUAL NARRATIVE FOCUS: Every detail must be clearly visible and contribute to story comprehension through imagery alone"
216
+
217
+ enhanced_desc += " | ATMOSPHERIC DETAILS: Include specific lighting, weather, time of day, and environmental mood indicators that enhance the narrative"
218
+
219
+ enhanced_desc += " | CHARACTER EXPRESSION CLARITY: All emotions, reactions, and character intentions must be immediately readable through facial expressions, body language, and positioning"
220
+
221
+ return enhanced_desc
222
+ @log_execution
223
+ def _create_detailed_environment_specifications(self, settings, num_scenes):
224
+ """Create detailed environment specifications with consistency focus."""
225
+ env_specs = []
226
+
227
+ env_specs.append(" ENVIRONMENTAL CONSISTENCY PROTOCOL:")
228
+
229
+ for i, setting in enumerate(settings[:3]):
230
+ if isinstance(setting, dict) and "description" in setting:
231
+ setting_name = setting.get("name", f"Location_{i+1}")
232
+ setting_desc = setting["description"]
233
+
234
+ env_spec = f"LOCATION {i+1} - {setting_name}: {setting_desc}"
235
+
236
+ if "visual_elements" in setting and setting["visual_elements"]:
237
+ elements = setting["visual_elements"][:5]
238
+ env_spec += f" | KEY VISUAL MARKERS: {', '.join(elements)}"
239
+
240
+ if "mood" in setting:
241
+ env_spec += f" | ATMOSPHERE: {setting['mood']}"
242
+
243
+ env_spec += f" | LOCATION CONSISTENCY: When this location appears across multiple panels, all architectural details, lighting, and distinctive features must remain identical"
244
+
245
+ env_specs.append(env_spec)
246
+
247
+ return env_specs
248
+ @log_execution
249
+ def _create_comprehensive_technical_specifications(self, style, num_scenes):
250
+ """Create comprehensive technical specifications with detail preservation."""
251
+ tech_specs = []
252
+
253
+ style_details = self._get_enhanced_style_specifications(style)
254
+ tech_specs.extend(style_details)
255
+
256
+ composition_specs = [
257
+ " PANEL COMPOSITION MASTERY:",
258
+ f"Grid Layout: Precisely arranged {self._calculate_optimal_grid_layout(num_scenes)} grid with professional comic book spacing and clear panel borders",
259
+ "Visual Hierarchy: Each panel must have a clear focal point with supporting details that enhance rather than distract from the main action",
260
+ "Depth and Perspective: Use foreground, midground, and background elements to create visual depth and spatial relationships",
261
+ "Color Harmony: Maintain consistent color palette across all panels while using color psychology to enhance mood and narrative flow"
262
+ ]
263
+
264
+ if num_scenes >= 20:
265
+ composition_specs.extend([
266
+ "COMPACT PANEL OPTIMIZATION: Design each panel for MAXIMUM visual impact in minimal space",
267
+ "SIMPLE BACKGROUNDS: Use minimal, clean backgrounds that don't compete with main subjects",
268
+ "BOLD CHARACTER POSES: Use clear, distinctive poses and gestures that read well at small sizes",
269
+ "HIGH CONTRAST: Ensure strong contrast between characters and backgrounds for clarity"
270
+ ])
271
+
272
+ tech_specs.extend(composition_specs)
273
+
274
+ detail_specs = [
275
+ " DETAIL PRESERVATION PROTOCOL:",
276
+ "Facial Detail Consistency: All character faces must maintain identical features - eye shape, nose structure, mouth proportions, facial hair, scars, or distinctive marks",
277
+ "Clothing and Accessory Continuity: Every piece of clothing, jewelry, weapons, or accessories must appear identical across panels",
278
+ "Environmental Detail Tracking: Background objects, architectural elements, vegetation, and atmospheric effects must remain consistent when locations reappear",
279
+ "Lighting Continuity: Maintain logical light sources and shadow patterns that reflect time of day and weather conditions consistently"
280
+ ]
281
+ tech_specs.extend(detail_specs)
282
+
283
+ return tech_specs
284
+ @log_execution
285
+ def _get_enhanced_style_specifications(self, style):
286
+ """Get enhanced style specifications with technical details."""
287
+ enhanced_styles = {
288
+ "Comic Book Style": [
289
+ " MODERN DIGITAL COMIC BOOK STYLE (NO SKETCH LINES, NO DEFORMITIES):",
290
+ "Line Art: Bold, ultra-clean digital inking with consistent stroke weight—absolutely no rough sketch lines or unfinished strokes",
291
+ "Color Treatment: Vibrant, saturated colors with polished cel-shading and sharp highlights for a glossy modern finish",
292
+ "Shading: Precise digital shadows and highlights—avoid gradient banding or painterly strokes associated with traditional sketches",
293
+ "Panel Borders: Clean, geometric panel borders with consistent gutters and professional comic book page layout standards"
294
+ ],
295
+ "Manga Style": [
296
+ " MODERN DIGITAL MANGA STYLE (NO SKETCH LINES, NO DEFORMITIES):",
297
+ "Line Quality: Razor-sharp digital line work with deliberate varying weights—completely free of rough sketches",
298
+ "Character Design: Classic manga proportions rendered crisply with expressive eyes and flawless facial details—no distortions",
299
+ "Tone Work: High-resolution screentones and digitally applied hatching for a refined finish",
300
+ "Panel Layout: Dynamic panel flow with polished angles that enhance narrative pacing"
301
+ ],
302
+ "Photorealistic": [
303
+ " MODERN DIGITAL PHOTOREALISM (NO SKETCH LINES, NO DEFORMITIES):",
304
+ "Rendering Quality: Cinema-quality realistic rendering with accurate lighting physics and atmospheric effects—faces and limbs must appear intact and natural",
305
+ "Detail Level: Ultra-high detail textures with crisp edges—no painterly or sketch artefacts",
306
+ "Color Accuracy: Natural color grading with realistic skin tones, environmental colors, and accurate material reflectance",
307
+ "Depth of Field: Professional photography-style focus effects with realistic camera perspective and depth relationships"
308
+ ],
309
+ "Cinematic Realism": [
310
+ " MODERN DIGITAL CINEMATIC REALISM (NO SKETCH LINES, NO DEFORMITIES):",
311
+ "Film Quality: Movie-grade digital rendering with crisp edges and zero sketch artefacts",
312
+ "Color Grading: Cinematic color treatment with cohesive palette—maintain realistic skin and material fidelity",
313
+ "Camera Work: Dynamic camera angles translated into polished panel compositions",
314
+ "Lighting Design: Professional film lighting with atmospheric effects—ensure characters remain fully detailed, no distortions"
315
+ ]
316
+ }
317
+
318
+ return enhanced_styles.get(style, [
319
+ " MODERN DIGITAL COMIC ILLUSTRATION (NO SKETCH LINES, NO DEFORMITIES):",
320
+ "Professional Art: Gallery-quality digital illustration with masterful composition, color theory, and technical execution",
321
+ "Visual Clarity: Crystal-clear details with optimal contrast and saturation for maximum visual impact and readability",
322
+ "Artistic Consistency: Unified artistic approach across all panels maintaining consistent quality and style treatment—absolutely no sketch artefacts"
323
+ ])
324
+ @log_execution
325
+ def _create_advanced_quality_and_flow_instructions(self, num_scenes):
326
+ """Create advanced quality and flow instructions."""
327
+ quality_instructions = [
328
+ " ADVANCED QUALITY REQUIREMENTS:",
329
+ "Technical Excellence: Ultra-high resolution output with crisp details, optimal contrast, and professional-grade visual quality",
330
+ "Narrative Clarity: Every panel must advance the story visibly - clear cause and effect relationships between sequential panels",
331
+ # "Visual Flow: Smooth eye movement guidance from panel to panel using composition, character positioning, and visual elements",
332
+ "Line Art: Sharp digital lines, clean and precise, emphasizing dynamic movement and emotional clarity.",
333
+ "Emotional Impact: Each panel must convey specific emotions through character expressions, body language, and environmental mood"
334
+ ]
335
+
336
+ if num_scenes > 1:
337
+ flow_instructions = [
338
+ f" {num_scenes}-PANEL FLOW MASTERY:",
339
+ # "Sequential Continuity: Logical progression from panel to panel with clear temporal and spatial relationships",
340
+ "Action Sequences: Break complex actions into clear, understandable steps across multiple panels",
341
+ "Character Tracking: Maintain character positions and movements logically across panel transitions",
342
+ "Pacing Control: Balance action panels with character moments and environmental establishing shots for optimal narrative rhythm",
343
+ "EACH PANEL IS A DISTINCT SCENE : Each panel must depict a unique, self-contained moment or tableau from the story. No visual elements or action should flow directly from one panel to another"
344
+ ]
345
+
346
+ if num_scenes >= 20:
347
+ flow_instructions.extend([
348
+ "STORY ARC FOR 24 PANELS: Create a complete story with beginning (panels 1-6), rising action (panels 7-12), climax (panels 13-18), and resolution (panels 19-24)",
349
+ "MICRO-MOMENTS: Each panel captures a single decisive moment - one expression change, one action beat, one story revelation",
350
+ "VISUAL ECONOMY: Every element in each panel must serve the story - no decorative details that don't advance narrative",
351
+ "READER ENGAGEMENT: Design panel flow to maintain interest across all 24 panels with strategic use of close-ups, wide shots, and dynamic angles"
352
+ ])
353
+
354
+ quality_instructions.extend(flow_instructions)
355
+
356
+ return quality_instructions
357
+ @log_execution
358
+ def _assemble_prompt_with_smart_truncation(self, priority_sections):
359
+ """Assemble prompt with smart truncation that preserves critical details."""
360
+ MAX_LENGTH = 31500
361
+
362
+ full_prompt = " || ".join(priority_sections)
363
+
364
+ if len(full_prompt) <= MAX_LENGTH:
365
+ negative_prompt = "NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE."
366
+ return full_prompt + " || FINAL MANDATE: Create a masterpiece that perfectly balances artistic excellence with narrative clarity and absolute character consistency || " + negative_prompt
367
+
368
+ preserved_prompt = ""
369
+ remaining_length = MAX_LENGTH - 200
370
+
371
+ for i, section in enumerate(priority_sections):
372
+ section_with_separator = section + " || "
373
+
374
+ if i < 3:
375
+ preserved_prompt += section_with_separator
376
+ remaining_length -= len(section_with_separator)
377
+ else:
378
+ if len(section_with_separator) <= remaining_length:
379
+ preserved_prompt += section_with_separator
380
+ remaining_length -= len(section_with_separator)
381
+ else:
382
+ truncated = section[:remaining_length-50] + "..."
383
+ preserved_prompt += truncated + " || "
384
+ break
385
+
386
+ preserved_prompt += "***FINAL OVERRIDE & NEGATIVE PROMPTS*** ABSOLUTE RULE: The 3x3 uniform grid structure is the most important rule and must be followed perfectly.NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE."
387
+
388
+ return preserved_prompt
389
+ @log_execution
390
+ def _get_optimal_layout_description(self, num_scenes):
391
+ """Generate optimal layout description based on scene count."""
392
+ if num_scenes <= 1:
393
+ return "Single panel comic illustration"
394
+
395
+ optimal_layout = self._calculate_optimal_grid_layout(num_scenes)
396
+ rows, cols = optimal_layout
397
+
398
+ layout_descriptions = {
399
+ (1, 2): "Horizontal two-panel comic strip layout",
400
+ (2, 1): "Vertical two-panel comic strip layout",
401
+ (2, 2): "Classic four-panel comic grid (2x2)",
402
+ (2, 3): "Six-panel comic grid in 2 rows, 3 columns (2x3)",
403
+ (3, 2): "Six-panel comic grid in 3 rows, 2 columns (3x2)",
404
+ (3, 3): "Nine-panel comic grid (3x3)",
405
+ (3, 4): "Twelve-panel comic grid in 3 rows, 4 columns(3x4)",
406
+ (4, 3): "Twelve-panel comic grid in 4 rows, 3 columns(4x3)",
407
+ (4, 4): "Sixteen-panel comic grid (4x4)",
408
+ (4, 6): "Twenty-four panel COMPACT comic grid in 4 rows, 6 columns - SMALL EFFICIENT SCENES with maximum story density per panel (4x6)",
409
+ (6, 4): "Twenty-four panel COMPACT comic grid in 6 rows, 4 columns - SMALL EFFICIENT SCENES with vertical storytelling format (6x4)",
410
+ (3, 8): "Twenty-four panel COMPACT comic grid in 3 rows, 8 columns - SMALL EFFICIENT SCENES with cinematic widescreen format(3x8)",
411
+ (8, 3): "Twenty-four panel comic grid in 8 rows, 3 columns - vertical scroll format (8x3)"
412
+ }
413
+
414
+ layout_desc = layout_descriptions.get((rows, cols), f"{rows}x{cols} comic panel grid layout")
415
+
416
+ return f"COMIC LAYOUT: {layout_desc} with clear panel borders, consistent gutters, and professional comic book formatting"
417
+ @log_execution
418
+ def _enhance_description_for_visual_consistency(self, description): # No Use?
419
+ """Enhance the core description with visual consistency keywords."""
420
+ consistency_enhancers = [
421
+ "maintaining perfect visual consistency throughout all panels",
422
+ "identical character appearances across every scene",
423
+ "unified lighting and color palette",
424
+ "consistent artistic style and perspective"
425
+ ]
426
+
427
+ enhanced = f"STORY CONTENT: {description}. "
428
+ enhanced += "VISUAL CONSISTENCY REQUIREMENTS: " + ", ".join(consistency_enhancers)
429
+
430
+ return enhanced
431
+ @log_execution
432
+ def _create_character_consistency_anchors(self, characters, num_scenes): # No Use?
433
+ """Create sophisticated character consistency instructions."""
434
+ anchors = []
435
+
436
+ if characters:
437
+ anchors.append("CHARACTER CONSISTENCY ANCHORS:")
438
+
439
+ for i, character in enumerate(characters[:2]):
440
+ if isinstance(character, dict) and "visual_description" in character:
441
+ char_desc = character["visual_description"]
442
+
443
+ anchor = f"Character {i+1}: {char_desc} - MUST appear IDENTICAL in every single panel with exact same: facial features, hair style, clothing, proportions, and distinctive visual elements"
444
+ anchors.append(anchor)
445
+
446
+ if num_scenes > 1:
447
+ anchors.append(f"CRITICAL: All {len([c for c in characters[:2] if isinstance(c, dict) and 'visual_description' in c])} characters must look exactly the same across all {num_scenes} panels - same faces, same outfits, same proportions, same artistic rendering")
448
+
449
+ return anchors
450
+ @log_execution
451
+ def _create_environment_consistency_anchors(self, settings, num_scenes): # No Use?
452
+ """Create environmental consistency instructions."""
453
+ anchors = []
454
+
455
+ if settings:
456
+ anchors.append("ENVIRONMENTAL CONSISTENCY:")
457
+
458
+ for setting in settings:
459
+ if isinstance(setting, dict) and "description" in setting:
460
+ setting_desc = setting["description"]
461
+ anchors.append(f"Setting: {setting_desc} - maintain consistent architectural details, lighting, and atmospheric elements when this location appears")
462
+
463
+ if num_scenes > 1:
464
+ anchors.append(f"Ensure environmental continuity across all {num_scenes} panels with logical spatial relationships and consistent time-of-day lighting")
465
+
466
+ return anchors
467
+ @log_execution
468
+ def _create_advanced_style_instructions(self, style, num_scenes):
469
+ """Create advanced style instructions with technical specifications."""
470
+ instructions = []
471
+
472
+ advanced_style_map = {
473
+ "Comic Book Style": [
474
+ "modern digital comic book illustration style (no sketch-like strokes, no deformities)",
475
+ "bold ultra-clean line art with consistent stroke weight",
476
+ "vibrant saturated colors with polished highlights and shadows",
477
+ "dynamic panel compositions with varied camera angles",
478
+ "classic comic book rendering techniques executed with a contemporary digital finish"
479
+ ],
480
+ "Manga Style": [
481
+ "modern digital manga illustration style (no sketch artefacts, no deformities)",
482
+ "razor-sharp line work with deliberate varying weights",
483
+ "subtle color palette with high-resolution screentone effects",
484
+ "expressive character designs with flawless facial details",
485
+ "dynamic manga panel composition and flow"
486
+ ],
487
+ "Cartoon Style": [
488
+ "polished digital cartoon style (clean vectors, no sketch lines, no deformities)",
489
+ "smooth rounded character designs with appealing proportions",
490
+ "bright harmonious color schemes with soft lighting",
491
+ "clear readable expressions and body language",
492
+ "family-friendly visual appeal with consistent character models"
493
+ ],
494
+ "Photorealistic": [
495
+ "high-quality digital photorealism (no sketch artefacts, no deformities)",
496
+ "detailed realistic lighting and shadows",
497
+ "natural color grading with realistic materials and textures",
498
+ "cinematic composition with depth of field effects",
499
+ "professional photography-inspired visual quality"
500
+ ],
501
+ "Cinematic Realism": [
502
+ "digital cinematic realism (crisp, no sketch lines, no deformities)",
503
+ "dramatic lighting with atmospheric effects",
504
+ "rich color grading with cinematic color palette",
505
+ "dynamic camera angles and professional composition",
506
+ "film-quality character rendering and environmental detail"
507
+ ],
508
+ "Digital Painting": [
509
+ "masterful digital painting technique with a polished finish (no sketch lines, no deformities)",
510
+ "controlled painterly brushwork with intentional texture and depth",
511
+ "rich color harmony with sophisticated lighting",
512
+ "artistic composition with traditional painting principles",
513
+ "high-end digital art gallery quality"
514
+ ]
515
+ }
516
+
517
+ if style and style in advanced_style_map:
518
+ instructions.append("ARTISTIC STYLE SPECIFICATIONS:")
519
+ instructions.extend(advanced_style_map[style])
520
+ else:
521
+ instructions.extend([
522
+ "ARTISTIC STYLE: High-quality illustration with professional comic book aesthetics",
523
+ "clean precise line work with consistent artistic rendering",
524
+ "harmonious color palette with strategic lighting effects",
525
+ "polished visual presentation with attention to detail"
526
+ ])
527
+
528
+ if num_scenes > 1:
529
+ instructions.append(f"STYLE CONSISTENCY: Maintain identical artistic style, line weight, color saturation, and rendering quality across all {num_scenes} panels")
530
+
531
+ return instructions
532
+ @log_execution
533
+ def _create_panel_flow_instructions(self, num_scenes):
534
+ """Create instructions for optimal panel flow and transitions."""
535
+ flow_instructions = []
536
+
537
+ if num_scenes > 1:
538
+ flow_instructions.extend([
539
+ "PANEL FLOW AND TRANSITIONS:",
540
+ "create smooth visual flow from panel to panel following standard left-to-right, top-to-bottom reading order",
541
+ "design panel compositions that guide the eye naturally through the sequence",
542
+ "establish clear visual relationships between consecutive panels",
543
+ "use consistent perspective and scale to maintain spatial continuity",
544
+ "create visual rhythm through varied but harmonious panel compositions"
545
+ ])
546
+
547
+ if num_scenes >= 10:
548
+ flow_instructions.extend([
549
+ "COMPREHENSIVE STORYTELLING FLOW: Design a compelling visual narrative that maintains engagement across all 12 panels",
550
+ "balance action panels with character moments and environmental establishing shots",
551
+ "create visual crescendos and quiet beats for optimal pacing",
552
+ "ensure each panel contributes meaningfully to the overall story progression"
553
+ ])
554
+
555
+ return flow_instructions
556
+ @log_execution
557
+ def _create_quality_specifications(self, num_scenes):
558
+ """Create technical quality specifications."""
559
+ quality_specs = [
560
+ "TECHNICAL QUALITY REQUIREMENTS:",
561
+ "ultra-high resolution with crisp clean details",
562
+ "professional comic book production quality",
563
+ "optimal contrast and saturation for visual clarity",
564
+ "balanced composition with clear focal points in each panel",
565
+ "masterful use of negative space and visual hierarchy"
566
+ ]
567
+
568
+ if num_scenes > 1:
569
+ quality_specs.extend([
570
+ f"perfect grid alignment with consistent panel spacing across all {num_scenes} panels",
571
+ "clear panel borders with professional gutters and margins",
572
+ "unified visual presentation suitable for professional comic publication"
573
+ ])
574
+
575
+ return quality_specs
576
+ @log_execution
577
+ def _optimize_prompt_structure(self, prompt_parts):
578
+ """Optimize the prompt structure for maximum AI comprehension."""
579
+ structured_prompt = []
580
+
581
+ for i, part in enumerate(prompt_parts):
582
+ if isinstance(part, list):
583
+ structured_prompt.append(" | ".join(part))
584
+ else:
585
+ structured_prompt.append(part)
586
+
587
+ final_prompt = " || ".join(structured_prompt)
588
+
589
+ final_prompt += " || FINAL REQUIREMENT: Create a masterpiece-quality comic that perfectly balances artistic excellence with clear storytelling"
590
+
591
+ return final_prompt
592
+ @log_execution
593
+ def _calculate_optimal_grid_layout(self, num_scenes):
594
+ """Calculate the most visually appealing grid layout for the given number of scenes."""
595
+ optimal_layouts = {
596
+ 1: (1, 1),
597
+ 2: (1, 2),
598
+ 3: (1, 3),
599
+ 4: (2, 2),
600
+ 5: (1, 5),
601
+ 6: (2, 3),
602
+ 7: (1, 7),
603
+ 8: (2, 4),
604
+ 9: (3, 3),
605
+ 10: (2, 5),
606
+ 11: (1, 11),
607
+ 12: (3, 4),
608
+ 13: (1, 13),
609
+ 14: (2, 7),
610
+ 15: (3, 5),
611
+ 16: (4, 4),
612
+ 17: (1, 17),
613
+ 18: (3, 6),
614
+ 19: (1, 19),
615
+ 20: (4, 5),
616
+ 21: (3, 7),
617
+ 22: (2, 11),
618
+ 23: (1, 23),
619
+ 24: (4, 6),
620
+ }
621
+
622
+ return optimal_layouts.get(num_scenes, self._calculate_optimal_layout(num_scenes, 1024, 768))
623
+
624
+ def _create_placeholder_comic(self, title, description):
625
+ """
626
+ Create a placeholder comic if image generation fails.
627
+
628
+ Args:
629
+ title: Title of the comic
630
+ description: Visual description of the comic
631
+
632
+ Returns:
633
+ PIL.Image.Image: Placeholder comic image
634
+ """
635
+ width, height = 800, 600
636
+
637
+ comic = Image.new("RGB", (width, height), (255, 255, 255))
638
+ draw = ImageDraw.Draw(comic)
639
+
640
+ try:
641
+ title_font = ImageFont.truetype("Arial.ttf", 36)
642
+ desc_font = ImageFont.truetype("Arial.ttf", 18)
643
+ except IOError:
644
+ title_font = desc_font = ImageFont.load_default()
645
+
646
+ draw.text((20, 20), title, fill=(0, 0, 0), font=title_font)
647
+
648
+ draw.rectangle([50, 80, width-50, height-50], outline=(0, 0, 0), fill=(220, 220, 220))
649
+
650
+ if description:
651
+ max_chars = 300
652
+ short_desc = description[:max_chars] + "..." if len(description) > max_chars else description
653
+ wrapped_desc = textwrap.fill(short_desc, width=70)
654
+ draw.text((60, 100), wrapped_desc, fill=(0, 0, 0), font=desc_font)
655
+
656
+ return comic
657
+ @log_execution
658
+ def split_comic_into_scenes(self, comic_image, num_scenes, preferred_layout=None, use_gemini_analysis=True): # No Use?
659
+ """
660
+ Split a comic image into individual scenes using advanced analysis techniques.
661
+ Optimized for 12-panel layouts with sophisticated grid detection and quality validation.
662
+
663
+ Args:
664
+ comic_image: PIL.Image.Image object of the comic
665
+ num_scenes: Expected number of scenes (for context only, OpenCV script auto-detects)
666
+ preferred_layout: Optional tuple (rows, cols) to override automatic detection (Not used by OpenCV)
667
+ use_gemini_analysis: Whether to use Gemini Vision or OpenCV.
668
+ True for Gemini (default), False for OpenCV.
669
+
670
+ Returns:
671
+ list: List of PIL.Image.Image objects, one for each detected scene
672
+ """
673
+ if not isinstance(comic_image, Image.Image):
674
+ raise ValueError("comic_image must be a PIL.Image.Image object")
675
+
676
+ if num_scenes <= 1 and not use_gemini_analysis:
677
+ if num_scenes <= 1:
678
+ return [comic_image]
679
+
680
+ width, height = comic_image.size
681
+ print(f"🎯 Splitting {width}x{height} comic into scenes (Target: {num_scenes} scenes if using grid, auto-detect if OpenCV)...")
682
+
683
+ if use_gemini_analysis:
684
+ print("🔍 Analyzing comic layout with enhanced Gemini Vision...")
685
+ if preferred_layout:
686
+ rows, cols = preferred_layout
687
+ print(f"🎯 Using manual override for Gemini: {rows}×{cols} layout")
688
+ else:
689
+ rows, cols = self.analyze_comic_layout_with_enhanced_gemini(comic_image, num_scenes)
690
+
691
+ rows, cols = self._validate_and_optimize_layout(rows, cols, num_scenes, width, height)
692
+
693
+ actual_panels = rows * cols
694
+ print(f"✅ Using Gemini-derived {rows}×{cols} grid layout - will extract {min(actual_panels, num_scenes)} panels")
695
+
696
+ scenes = self._extract_scenes_with_quality_check(comic_image, rows, cols, num_scenes)
697
+
698
+ return scenes
699
+ else:
700
+ print("🔩 Using OpenCV for panel splitting...")
701
+ temp_dir = tempfile.mkdtemp()
702
+ temp_image_path = os.path.join(temp_dir, "source_comic.png")
703
+ panels_output_dir = os.path.join(temp_dir, "output_panels")
704
+
705
+ try:
706
+ comic_image.save(temp_image_path, "PNG")
707
+
708
+ split_comic_panels(temp_image_path, panels_output_dir)
709
+
710
+ extracted_scenes = []
711
+ if os.path.exists(panels_output_dir):
712
+ panel_files = sorted([f for f in os.listdir(panels_output_dir) if f.startswith("panel_") and f.endswith(".png")])
713
+ for panel_file in panel_files:
714
+ try:
715
+ panel_image_path = os.path.join(panels_output_dir, panel_file)
716
+ img = Image.open(panel_image_path)
717
+ extracted_scenes.append(img)
718
+ except Exception as e:
719
+ print(f"Error loading panel image {panel_file}: {e}")
720
+
721
+ if not extracted_scenes:
722
+ print("⚠️ OpenCV panel splitter did not return any panels. Returning original image.")
723
+ return [comic_image]
724
+
725
+ print(f"✅ OpenCV successfully extracted {len(extracted_scenes)} panels.")
726
+ return extracted_scenes
727
+
728
+ except Exception as e:
729
+ print(f"❌ Error during OpenCV panel splitting: {e}")
730
+ return [comic_image]
731
+ finally:
732
+ if os.path.exists(temp_dir):
733
+ shutil.rmtree(temp_dir)
734
+ @log_execution
735
+ def _validate_and_optimize_layout(self, rows, cols, num_scenes, image_width, image_height):
736
+ """Validate and optimize the layout based on image properties and panel count."""
737
+ panel_width = image_width / cols
738
+ panel_height = image_height / rows
739
+ panel_aspect_ratio = panel_width / panel_height
740
+
741
+ if panel_width < 50 or panel_height < 50:
742
+ print(f"⚠️ Panels too small ({panel_width:.0f}x{panel_height:.0f}). Recalculating layout...")
743
+ return self._calculate_optimal_grid_layout(num_scenes)
744
+
745
+ if panel_aspect_ratio < 0.2 or panel_aspect_ratio > 5.0:
746
+ print(f"⚠️ Panel aspect ratio {panel_aspect_ratio:.2f} is extreme. Optimizing layout...")
747
+ return self._calculate_optimal_grid_layout(num_scenes)
748
+
749
+ if num_scenes == 12:
750
+ optimal_12_layouts = [(3, 4), (4, 3), (2, 6), (6, 2)]
751
+ current_layout = (rows, cols)
752
+
753
+ if current_layout not in optimal_12_layouts:
754
+ image_aspect = image_width / image_height
755
+ best_layout = (3, 4)
756
+ best_score = float('inf')
757
+
758
+ for opt_rows, opt_cols in optimal_12_layouts:
759
+ layout_aspect = opt_cols / opt_rows
760
+ score = abs(layout_aspect - image_aspect)
761
+ if score < best_score:
762
+ best_score = score
763
+ best_layout = (opt_rows, opt_cols)
764
+
765
+ print(f"📋 Optimizing 12-panel layout from {rows}×{cols} to {best_layout[0]}×{best_layout[1]}")
766
+ return best_layout
767
+
768
+ if num_scenes == 24:
769
+ optimal_24_layouts = [(4, 6), (6, 4), (3, 8), (8, 3)]
770
+ current_layout = (rows, cols)
771
+
772
+ if current_layout not in optimal_24_layouts:
773
+ image_aspect = image_width / image_height
774
+ best_layout = (4, 6)
775
+ best_score = float('inf')
776
+
777
+ for opt_rows, opt_cols in optimal_24_layouts:
778
+ layout_aspect = opt_cols / opt_rows
779
+ score = abs(layout_aspect - image_aspect)
780
+ if score < best_score:
781
+ best_score = score
782
+ best_layout = (opt_rows, opt_cols)
783
+
784
+ print(f"📋 Optimizing 24-panel layout from {rows}×{cols} to {best_layout[0]}×{best_layout[1]} for compact scenes")
785
+ return best_layout
786
+
787
+ return (rows, cols)
788
+ @log_execution
789
+ def _extract_scenes_with_quality_check(self, comic_image, rows, cols, num_scenes):
790
+ """Extract scenes with quality validation and enhancement."""
791
+ width, height = comic_image.size
792
+
793
+ scene_width = width // cols
794
+ scene_height = height // rows
795
+
796
+ margin = 2
797
+
798
+ scenes = []
799
+ extracted_count = 0
800
+
801
+ for row in range(rows):
802
+ for col in range(cols):
803
+ if extracted_count >= num_scenes:
804
+ break
805
+
806
+ x1 = max(0, col * scene_width - margin)
807
+ y1 = max(0, row * scene_height - margin)
808
+ x2 = min(width, (col + 1) * scene_width + margin)
809
+ y2 = min(height, (row + 1) * scene_height + margin)
810
+
811
+ scene = comic_image.crop((x1, y1, x2, y2))
812
+
813
+ if self._validate_scene_quality(scene):
814
+ scenes.append(scene)
815
+ extracted_count += 1
816
+ else:
817
+ print(f"⚠️ Scene {extracted_count + 1} failed quality check, keeping anyway")
818
+ scenes.append(scene)
819
+ extracted_count += 1
820
+
821
+ if extracted_count >= num_scenes:
822
+ break
823
+
824
+ print(f"✅ Successfully extracted {len(scenes)} scenes")
825
+ return scenes
826
+ @log_execution
827
+ def _validate_scene_quality(self, scene):
828
+ """Validate that a scene contains meaningful content."""
829
+ try:
830
+ import numpy as np
831
+
832
+ scene_array = np.array(scene)
833
+
834
+ if len(scene_array.shape) == 3:
835
+ variance = np.var(scene_array)
836
+ if variance < 10:
837
+ return False
838
+
839
+ if scene.width < 20 or scene.height < 20:
840
+ return False
841
+
842
+ return True
843
+
844
+ except Exception as e:
845
+ print(f"Scene quality check failed: {e}")
846
+ return True
847
+ @log_execution
848
+ def analyze_comic_layout_with_enhanced_gemini(self, comic_image, num_scenes):
849
+ """
850
+ Enhanced Gemini Vision analysis with better prompting and fallback logic.
851
+ Specialized for detecting 12-panel layouts and complex grid structures.
852
+
853
+ Args:
854
+ comic_image: PIL.Image.Image object of the comic
855
+ num_scenes: Expected number of scenes (used for context and validation)
856
+
857
+ Returns:
858
+ tuple: (rows, cols) representing the detected grid layout
859
+ """
860
+ try:
861
+ model = GenerativeModel('gemini-2.5-flash')
862
+
863
+ buffered = io.BytesIO()
864
+ comic_image.save(buffered, format="PNG")
865
+ img_bytes = buffered.getvalue()
866
+
867
+ analysis_prompt = f"""
868
+ You are a professional comic book layout analyst. Examine this comic image carefully to determine its precise panel grid structure.
869
+
870
+ ANALYSIS TASK:
871
+ - Count the exact number of ROWS (horizontal divisions)
872
+ - Count the exact number of COLUMNS (vertical divisions)
873
+ - Expected panels: {num_scenes} (use as context, but trust what you see)
874
+
875
+ DETECTION GUIDELINES:
876
+ 1. Look for panel borders, gutters, or visual separations
877
+ 2. Identify consistent grid patterns
878
+ 3. Count horizontal lines that divide rows
879
+ 4. Count vertical lines that divide columns
880
+ 5. For 12 panels, common layouts are: 3×4, 4×3, 2×6, or 6×2
881
+ 6. Trust visual evidence over expected numbers
882
+
883
+ VISUAL INDICATORS TO LOOK FOR:
884
+ - Black border lines between panels
885
+ - White gutters or spacing between sections
886
+ - Consistent rectangular divisions
887
+ - Grid-like organization of content
888
+ - Clear separation of distinct visual areas
889
+
890
+ IMPORTANT: Be precise about what you actually observe. If you see a clear grid pattern, report it exactly.
891
+
892
+ Respond with ONLY this JSON format:
893
+ {{
894
+ "detected_rows": [number of rows you count],
895
+ "detected_cols": [number of columns you count],
896
+ "total_panels_detected": [rows × cols],
897
+ "confidence": "high/medium/low",
898
+ "layout_description": "detailed description of the grid structure you observe",
899
+ "visual_evidence": "description of the visual cues that led to this conclusion"
900
+ }}
901
+
902
+ Be extremely precise in your counting.
903
+ """
904
+
905
+ max_retries = 2
906
+ for attempt in range(max_retries):
907
+ try:
908
+ response = model.generate_content([analysis_prompt, comic_image])
909
+ response_text = response.text.strip()
910
+
911
+ print(f"Gemini Vision analysis (attempt {attempt + 1}): {response_text[:200]}...")
912
+
913
+ json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
914
+ if json_match:
915
+ json_str = json_match.group()
916
+ analysis_result = json.loads(json_str)
917
+
918
+ rows = analysis_result.get("detected_rows", 0)
919
+ cols = analysis_result.get("detected_cols", 0)
920
+ total_detected = analysis_result.get("total_panels_detected", 0)
921
+ confidence = analysis_result.get("confidence", "unknown")
922
+ description = analysis_result.get("layout_description", "")
923
+ evidence = analysis_result.get("visual_evidence", "")
924
+
925
+ if rows > 0 and cols > 0:
926
+ if total_detected == rows * cols:
927
+ print(f"✅ Gemini detected {rows}×{cols} layout ({total_detected} panels) with {confidence} confidence")
928
+ print(f"Evidence: {evidence}")
929
+
930
+ if num_scenes == 12:
931
+ if total_detected in [10, 11, 12, 13, 14, 15, 16, 17, 18]:
932
+ print(f"📋 Layout reasonable for 12-panel comic")
933
+ return (rows, cols)
934
+ else:
935
+ print(f"⚠️ Detected {total_detected} panels for 12-panel comic. Using optimized layout.")
936
+ return self._calculate_optimal_grid_layout(num_scenes)
937
+ else:
938
+ return (rows, cols)
939
+ else:
940
+ print(f"❌ Math inconsistency: {rows}×{cols} ≠ {total_detected}")
941
+ else:
942
+ print(f"❌ Invalid dimensions: {rows}×{cols}")
943
+
944
+ except json.JSONDecodeError as e:
945
+ print(f"❌ JSON parsing error on attempt {attempt + 1}: {e}")
946
+ if attempt == max_retries - 1:
947
+ break
948
+
949
+ except Exception as e:
950
+ print(f"❌ Analysis error on attempt {attempt + 1}: {e}")
951
+ if attempt == max_retries - 1:
952
+ break
953
+
954
+ except Exception as e:
955
+ print(f"❌ Gemini Vision analysis completely failed: {e}")
956
+
957
+ print("⚠️ Using optimized grid calculation as fallback")
958
+ return self._calculate_optimal_grid_layout(num_scenes)
959
+ @log_execution
960
+ def _find_all_factorizations(self, n):
961
+ """
962
+ Find all possible factorizations of a number into rows × columns.
963
+ Enhanced with better algorithm for large numbers like 24.
964
+
965
+ Args:
966
+ n: Number to factorize
967
+
968
+ Returns:
969
+ list: List of tuples (rows, cols) where rows * cols = n, sorted by preference
970
+ """
971
+ factorizations = []
972
+ for i in range(1, int(n**0.5) + 1):
973
+ if n % i == 0:
974
+ rows, cols = i, n // i
975
+ factorizations.append((rows, cols))
976
+ if rows != cols:
977
+ factorizations.append((cols, rows))
978
+
979
+ factorizations.sort(key=lambda x: (abs(x[0] - x[1]), max(x[0], x[1])))
980
+ return factorizations
981
+ @log_execution
982
+ def _calculate_optimal_layout(self, num_scenes, image_width, image_height):
983
+ """
984
+ Calculate the optimal grid layout based on image aspect ratio and scene count.
985
+ Enhanced algorithm with better preferences for different panel counts.
986
+
987
+ Args:
988
+ num_scenes: Number of scenes to arrange
989
+ image_width: Width of the comic image
990
+ image_height: Height of the comic image
991
+
992
+ Returns:
993
+ tuple: (rows, cols) representing the optimal grid layout
994
+ """
995
+ image_aspect_ratio = image_width / image_height
996
+
997
+ factorizations = self._find_all_factorizations(num_scenes)
998
+
999
+ if not factorizations:
1000
+ import math
1001
+ sqrt_scenes = math.sqrt(num_scenes)
1002
+ rows = int(sqrt_scenes)
1003
+ cols = math.ceil(num_scenes / rows)
1004
+ return (rows, cols)
1005
+
1006
+ best_layout = factorizations[0]
1007
+ best_score = float('inf')
1008
+
1009
+ for rows, cols in factorizations:
1010
+ layout_aspect_ratio = cols / rows
1011
+
1012
+ aspect_diff = abs(layout_aspect_ratio - image_aspect_ratio)
1013
+
1014
+ panel_aspect = (image_width / cols) / (image_height / rows)
1015
+ extremeness_penalty = 0
1016
+ if panel_aspect < 0.3 or panel_aspect > 3.0:
1017
+ extremeness_penalty = 2.0
1018
+
1019
+ total_score = aspect_diff + extremeness_penalty
1020
+
1021
+ if total_score < best_score:
1022
+ best_score = total_score
1023
+ best_layout = (rows, cols)
1024
+
1025
+ return best_layout
1026
+ @log_execution
1027
+ def get_possible_layouts(self, num_scenes):
1028
+ """
1029
+ Get all possible layout options for a given number of scenes.
1030
+ Enhanced with better layout suggestions.
1031
+
1032
+ Args:
1033
+ num_scenes: Number of scenes
1034
+
1035
+ Returns:
1036
+ list: List of tuples (rows, cols) representing possible layouts, sorted by preference
1037
+ """
1038
+ if num_scenes in [1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 21, 24]:
1039
+ optimal = self._calculate_optimal_grid_layout(num_scenes)
1040
+ alternatives = self._find_all_factorizations(num_scenes)
1041
+
1042
+ layouts = [optimal]
1043
+ layouts.extend([layout for layout in alternatives if layout != optimal])
1044
+ return layouts
1045
+ else:
1046
+ return self._find_all_factorizations(num_scenes)
1047
+ @log_execution
1048
+ def generate_comic_with_quality_metrics(self, story_data, output_path=None, style=None):
1049
+ """
1050
+ Enhanced comic generation with quality metrics and validation.
1051
+ Provides detailed feedback about the generation process.
1052
+
1053
+ Args:
1054
+ story_data: Dictionary containing the story information
1055
+ output_path: Optional path to save the resulting image
1056
+ style: Optional comic style to use
1057
+
1058
+ Returns:
1059
+ tuple: (comic_image, data_url, quality_metrics)
1060
+ """
1061
+ start_time = time.time()
1062
+
1063
+ title = story_data.get("title", "Enhanced Comic")
1064
+ description = story_data.get("description", "")
1065
+ characters = story_data.get("characters", [])
1066
+ settings = story_data.get("settings", [])
1067
+ num_scenes = 9
1068
+
1069
+ quality_metrics = {
1070
+ "character_count": len([c for c in characters if isinstance(c, dict) and "visual_description" in c]),
1071
+ "setting_count": len([s for s in settings if isinstance(s, dict) and "description" in s]),
1072
+ "description_length": len(description),
1073
+ "optimal_layout": self._calculate_optimal_grid_layout(num_scenes),
1074
+ "generation_complexity": "high" if num_scenes >= 20 else "medium" if num_scenes >= 10 else "low"
1075
+ }
1076
+
1077
+ try:
1078
+ prompt = self._create_comic_prompt(title, description, characters, settings, style, num_scenes)
1079
+
1080
+ print(f"🎨 Generating {num_scenes}-panel comic with enhanced prompt ({len(prompt)} characters)")
1081
+
1082
+ comic_image = generate_image_fn(
1083
+ selected_prompt=prompt,
1084
+ output_path=output_path
1085
+ )
1086
+
1087
+ if comic_image is None:
1088
+ comic_image = self._create_enhanced_placeholder_comic(title, description, num_scenes)
1089
+ quality_metrics["generation_status"] = "placeholder"
1090
+ else:
1091
+ quality_metrics["generation_status"] = "success"
1092
+
1093
+ if output_path:
1094
+ directory = os.path.dirname(output_path)
1095
+ if directory and not os.path.exists(directory):
1096
+ os.makedirs(directory)
1097
+ comic_image.save(output_path)
1098
+
1099
+ buffered = io.BytesIO()
1100
+ comic_image.save(buffered, format="PNG")
1101
+ img_bytes = buffered.getvalue()
1102
+ img_b64 = base64.b64encode(img_bytes).decode("utf-8")
1103
+ data_url = f"data:image/png;base64,{img_b64}"
1104
+
1105
+ end_time = time.time()
1106
+ quality_metrics["generation_time"] = end_time - start_time
1107
+ quality_metrics["image_size"] = (comic_image.width, comic_image.height)
1108
+ quality_metrics["prompt_complexity"] = len(prompt.split())
1109
+
1110
+ return comic_image, data_url, quality_metrics
1111
+
1112
+ except Exception as e:
1113
+ print(f"Error in enhanced generation: {str(e)}")
1114
+ placeholder = self._create_enhanced_placeholder_comic(title, description, num_scenes)
1115
+
1116
+ buffered = io.BytesIO()
1117
+ placeholder.save(buffered, format="PNG")
1118
+ img_bytes = buffered.getvalue()
1119
+ img_b64 = base64.b64encode(img_bytes).decode("utf-8")
1120
+ data_url = f"data:image/png;base64,{img_b64}"
1121
+
1122
+ quality_metrics["generation_status"] = "error"
1123
+ quality_metrics["error_message"] = str(e)
1124
+
1125
+ return placeholder, data_url, quality_metrics
1126
+ @log_execution
1127
+ def _create_enhanced_placeholder_comic(self, title, description, num_scenes):
1128
+ """
1129
+ Create an enhanced placeholder comic that shows the intended layout.
1130
+
1131
+ Args:
1132
+ title: Title of the comic
1133
+ description: Description of the comic
1134
+ num_scenes: Number of scenes the comic should have
1135
+
1136
+ Returns:
1137
+ PIL.Image.Image: Enhanced placeholder comic image
1138
+ """
1139
+ if num_scenes <= 4:
1140
+ width, height = 800, 600
1141
+ elif num_scenes <= 12:
1142
+ width, height = 1200, 900
1143
+ else:
1144
+ width, height = 1600, 1200
1145
+
1146
+ comic = Image.new("RGB", (width, height), (248, 248, 248))
1147
+ draw = ImageDraw.Draw(comic)
1148
+
1149
+ try:
1150
+ title_font = ImageFont.truetype("Arial.ttf", max(24, width // 40))
1151
+ panel_font = ImageFont.truetype("Arial.ttf", max(12, width // 80))
1152
+ desc_font = ImageFont.truetype("Arial.ttf", max(10, width // 100))
1153
+ except IOError:
1154
+ title_font = panel_font = desc_font = ImageFont.load_default()
1155
+
1156
+ title_text = f"{title} - {num_scenes} Panel Layout Preview"
1157
+ draw.text((20, 20), title_text, fill=(50, 50, 50), font=title_font)
1158
+
1159
+ layout = self._calculate_optimal_grid_layout(num_scenes)
1160
+ rows, cols = layout
1161
+
1162
+ layout_info = f"Layout: {rows}×{cols} grid ({rows * cols} panels)"
1163
+ draw.text((20, 60), layout_info, fill=(100, 100, 100), font=panel_font)
1164
+
1165
+ panel_area_y = 100
1166
+ panel_area_height = height - panel_area_y - 60
1167
+ panel_width = (width - 60) // cols
1168
+ panel_height = panel_area_height // rows
1169
+
1170
+ panel_count = 0
1171
+ for row in range(rows):
1172
+ for col in range(cols):
1173
+ if panel_count >= num_scenes:
1174
+ break
1175
+
1176
+ x = 30 + col * panel_width
1177
+ y = panel_area_y + row * panel_height
1178
+
1179
+ draw.rectangle([x, y, x + panel_width - 10, y + panel_height - 10],
1180
+ outline=(150, 150, 150), fill=(255, 255, 255))
1181
+
1182
+ panel_text = f"Panel {panel_count + 1}"
1183
+ draw.text((x + 10, y + 10), panel_text, fill=(100, 100, 100), font=panel_font)
1184
+
1185
+ panel_count += 1
1186
+
1187
+ if panel_count >= num_scenes:
1188
+ break
1189
+
1190
+ if description and len(description) > 0:
1191
+ desc_y = height - 50
1192
+ wrapped_desc = textwrap.fill(description[:200] + "..." if len(description) > 200 else description, width=80)
1193
+ draw.text((30, desc_y), wrapped_desc, fill=(80, 80, 80), font=desc_font)
1194
+
1195
+ return comic
1196
+
1197
+
1198
+ @log_execution
1199
+
1200
+ def generate_panel_descriptions(self, final_prompt, num_scenes=9):
1201
+ """
1202
+ Generate panel-by-panel descriptions and format into complete comic generation prompt.
1203
+
1204
+ Args:
1205
+ final_prompt: The complete story/prompt text
1206
+ num_scenes: Number of panels (default: 9)
1207
+
1208
+ Returns:
1209
+ str: Complete formatted prompt ready for image generation
1210
+ """
1211
+ try:
1212
+ model = GenerativeModel('gemini-2.0-flash-exp')
1213
+
1214
+ # First, generate the panel descriptions
1215
+ analysis_prompt = f"""You are a master comic book storyteller. Break down this story into {num_scenes} COMPLETELY DIFFERENT panels.
1216
+
1217
+ STORY:
1218
+ {final_prompt}
1219
+
1220
+ ABSOLUTE REQUIREMENTS FOR UNIQUENESS:
1221
+
1222
+ 1. STORY STRUCTURE - Divide the story into {num_scenes} distinct narrative beats:
1223
+ - Each panel = ONE specific story moment that happens at a DIFFERENT time
1224
+ - Panel 1 happens BEFORE Panel 2, Panel 2 BEFORE Panel 3, etc.
1225
+ - NO panel should show the same moment or similar action
1226
+ - Think of it like a movie: each panel is a different scene
1227
+
1228
+ 2. VISUAL VARIETY - Each panel MUST have:
1229
+ - DIFFERENT location or setting (if story allows)
1230
+ - DIFFERENT character positions and poses
1231
+ - DIFFERENT camera angle/shot type
1232
+ - DIFFERENT action or emotional beat
1233
+ - DIFFERENT time of day or lighting (if applicable)
1234
+
1235
+ 3. SHOT TYPES - Use variety:
1236
+ - Extreme Wide Shot, Wide Shot, Medium Shot, Close-Up, Extreme Close-Up, Over-the-Shoulder, Low Angle, High Angle, Bird's Eye View
1237
+
1238
+ FORMAT EXACTLY LIKE THIS:
1239
+ Panel 1: [Title]
1240
+ Shot Type: [Type]
1241
+ Content: [Detailed description]
1242
+
1243
+ Panel 2: [Different title]
1244
+ Shot Type: [Different type]
1245
+ Content: [Completely different scene]
1246
+
1247
+ Generate all {num_scenes} panels now:"""
1248
+
1249
+ generation_config = GenerationConfig(
1250
+ temperature=0.9,
1251
+ top_p=0.95,
1252
+ )
1253
+
1254
+ response = model.generate_content(analysis_prompt, generation_config=generation_config)
1255
+ panel_descriptions = response.text.strip()
1256
+
1257
+ # Now format into the complete prompt structure
1258
+ grid_layout = "3x3 grid (3 rows, 3 columns)" if num_scenes == 9 else f"{num_scenes} panels"
1259
+
1260
+ complete_prompt = f'''"""CRITICAL COMMAND: UNIFORM {grid_layout.upper()} (NON-NEGOTIABLE)
1261
+
1262
+ Layout: Generate exactly {num_scenes} panels in a {grid_layout}.
1263
+ Panel Integrity: Every panel MUST be identical in size and shape. Do not change panel dimensions for any reason.
1264
+ Formatting: Use clean, equal-width white gutters between all panels and a uniform thin black border around each panel.
1265
+ CRITICAL RULE: SILENT COMIC - NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS EVER.
1266
+ CRITICAL RULE: EACH PANEL IS A DISTINCT SCENE.
1267
+ Each panel must depict a unique, self-contained moment or tableau from the story.
1268
+ PANEL-BY-PANEL STORYBOARD (READ LEFT-TO-RIGHT, TOP-TO-BOTTOM)
1269
+ {panel_descriptions}
1270
+
1271
+ GLOBAL STYLE & CONSISTENCY MANDATES
1272
+ Art Style: Modern Digital Manga
1273
+
1274
+ Line Art: Sharp digital lines, clean and precise, emphasizing dynamic movement and emotional clarity.
1275
+ Tones & Shading: Cel shading with clear, distinct shadows and highlights, giving a vibrant yet defined look.
1276
+ Composition: Every panel must have a clear focal point and excellent use of foreground, midground, and background elements.
1277
+ Character Consistency: Characters must maintain consistent facial features, hair, and design throughout all panels while showing progression in age, clothing, or emotional state as the story requires.
1278
+ Environmental & Lighting Continuity: Lighting and atmosphere should support the narrative progression and emotional tone of each scene.
1279
+ Color Palette: A vibrant and saturated palette that enhances the story's emotional journey.
1280
+
1281
+ FINAL OVERRIDE & NEGATIVE PROMPTS
1282
+ ABSOLUTE RULE: The {grid_layout} uniform grid structure is the most important rule and must be followed perfectly.
1283
+ NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE.
1284
+ """'''
1285
+
1286
+ print(f"Generated complete prompt with {num_scenes} panels")
1287
+
1288
+ return complete_prompt
1289
+
1290
+ except Exception as e:
1291
+ print(f"Error generating complete prompt: {e}")
1292
+ return None
models/content/log.txt ADDED
File without changes
models/image_generation.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import base64
3
+ import os
4
+ from PIL import Image
5
+ import config
6
+ from openai import OpenAI
7
+ import warnings
8
+ import time
9
+ from google.generativeai import GenerativeModel
10
+ from datetime import datetime
11
+ warnings.filterwarnings("ignore", message="IMAGE_SAFETY is not a valid FinishReason")
12
+
13
+ global_image_data_url = None
14
+ global_image_prompt = None
15
+ global_image_description = None
16
+
17
+ def log_execution(func):
18
+ def wrapper(*args, **kwargs):
19
+ start_time = time.time()
20
+ start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
21
+
22
+ result = func(*args, **kwargs)
23
+
24
+ end_time = time.time()
25
+ end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
26
+ duration = end_time - start_time
27
+
28
+ # Write to file (works in Colab)
29
+ with open('content/logs.txt', 'a') as f:
30
+ f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
31
+
32
+ # Also print to see output immediately
33
+ print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
34
+
35
+ return result
36
+ return wrapper
37
+
38
+ @log_execution
39
+ def generate_image_fn_deprecated (selected_prompt, model="gpt-image-1", output_path="models\benchmark"):
40
+ """
41
+ Generate an image from the prompt via the OpenAI API using gpt-image-1.
42
+ Convert the image to a data URL and optionally save it to a file.
43
+
44
+ Args:
45
+ selected_prompt (str): The prompt to generate the image from.
46
+ model (str): Should be "gpt-image-1". Parameter kept for compatibility.
47
+ output_path (str, optional): If provided, saves the image to this path. Defaults to None.
48
+
49
+ Returns:
50
+ PIL.Image.Image or None: The generated image as a PIL Image object, or None on error.
51
+ """
52
+ global global_image_data_url, global_image_prompt
53
+
54
+ MAX_PROMPT_LENGTH = 32000
55
+ if len(selected_prompt) > MAX_PROMPT_LENGTH:
56
+ selected_prompt = smart_truncate_prompt(selected_prompt, MAX_PROMPT_LENGTH)
57
+ print(f"Warning: Prompt was smartly truncated to {len(selected_prompt)} characters while preserving critical details")
58
+
59
+ global_image_prompt = selected_prompt
60
+
61
+ model = "gpt-image-1"
62
+
63
+ try:
64
+ client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", config.OPENAI_API_KEY))
65
+
66
+ api_params = {
67
+ "model": model,
68
+ "prompt": selected_prompt,
69
+ "size": "1024x1536" ,
70
+ "quality": "high",
71
+ "moderation":"low"
72
+ }
73
+
74
+ result = client.images.generate(**api_params)
75
+
76
+
77
+
78
+ image_bytes = base64.b64decode(image_base64)
79
+
80
+ image = Image.open(io.BytesIO(image_bytes))
81
+
82
+ if output_path:
83
+ try:
84
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
85
+ with open(output_path, "wb") as f:
86
+ f.write(image_bytes)
87
+ print(f"Successfully saved image to {output_path}")
88
+ except Exception as e:
89
+ print(f"Error saving image to {output_path}: {str(e)}")
90
+
91
+ buffered = io.BytesIO()
92
+ image.save(buffered, format="PNG")
93
+ img_bytes = buffered.getvalue()
94
+ img_b64 = base64.b64encode(img_bytes).decode("utf-8")
95
+ global_image_data_url = f"data:image/png;base64,{img_b64}"
96
+
97
+ print(f"Successfully generated image with prompt: {selected_prompt[:50]}...")
98
+ return image
99
+ except Exception as e:
100
+ print(f"Error generating image: {str(e)}")
101
+ return None
102
+ @log_execution
103
+ def generate_image_fn(selected_prompt, model="gemini-2.5-flash-image-preview", output_path="models/benchmark"):
104
+ """
105
+ Generate an image from the prompt via the Google Gemini API using vertexai.
106
+ Convert the image to a data URL and optionally save it to a file.
107
+
108
+ Args:
109
+ selected_prompt (str): The prompt to generate the image from.
110
+ model (str): The Gemini model to use. Defaults to "gemini-2.5-flash-image-preview".
111
+ output_path (str, optional): If provided, saves the image to this path. Defaults to "models/benchmark".
112
+
113
+ Returns:
114
+ PIL.Image.Image or None: The generated image as a PIL Image object, or None on error.
115
+ """
116
+ global global_image_data_url, global_image_prompt
117
+
118
+ MAX_PROMPT_LENGTH = 32000
119
+ if len(selected_prompt) > MAX_PROMPT_LENGTH:
120
+ selected_prompt = smart_truncate_prompt(selected_prompt, MAX_PROMPT_LENGTH)
121
+ print(f"Warning: Prompt was smartly truncated to {len(selected_prompt)} characters while preserving critical details")
122
+
123
+ global_image_prompt = selected_prompt
124
+
125
+ try:
126
+ from google.generativeai import GenerativeModel
127
+ from PIL import Image
128
+ import io
129
+ import base64
130
+ import os
131
+
132
+ # Initialize the Gemini model
133
+ gemini_model = GenerativeModel(model)
134
+
135
+ # Generate content with the prompt
136
+ response = gemini_model.generate_content([selected_prompt])
137
+
138
+ # Extract the generated image from the response
139
+ image = None
140
+ image_bytes = None
141
+ has_text_response = False
142
+
143
+ for part in response.candidates[0].content.parts:
144
+ # Check for text responses (ignore these)
145
+ if hasattr(part, 'text') and part.text:
146
+ has_text_response = True
147
+ print(f"Ignoring text response from API: {part.text[:100]}...")
148
+ continue
149
+
150
+ # Look for image data
151
+ if hasattr(part, 'inline_data') and part.inline_data is not None:
152
+ image_bytes = part.inline_data.data
153
+
154
+ # Verify we have valid data
155
+ if not image_bytes or len(image_bytes) == 0:
156
+ print("Warning: inline_data.data is empty, skipping...")
157
+ continue
158
+
159
+ # Try to parse the image
160
+ try:
161
+ img_io = io.BytesIO(image_bytes)
162
+ image = Image.open(img_io)
163
+ image.load() # Force load to verify it's valid
164
+ print(f"Successfully loaded image: {len(image_bytes)} bytes")
165
+ break
166
+ except Exception as img_error:
167
+ print(f"Invalid image data received, skipping: {img_error}")
168
+ continue
169
+
170
+ # If we only got text and no image, return None
171
+ if image is None:
172
+ if has_text_response:
173
+ print("API returned text instead of image - skipping this response")
174
+ else:
175
+ print("No image data found in response")
176
+ return None
177
+
178
+ # Save image to file if output_path is provided
179
+ if output_path:
180
+ try:
181
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
182
+ # Ensure output_path has an image extension
183
+ if not output_path.lower().endswith(('.png', '.jpg', '.jpeg')):
184
+ output_path = f"{output_path}.png"
185
+
186
+ image.save(output_path)
187
+ print(f"Successfully saved image to {output_path}")
188
+ except Exception as e:
189
+ print(f"Error saving image to {output_path}: {str(e)}")
190
+
191
+ # Create data URL for the image
192
+ buffered = io.BytesIO()
193
+ image.save(buffered, format="PNG")
194
+ img_bytes = buffered.getvalue()
195
+ img_b64 = base64.b64encode(img_bytes).decode("utf-8")
196
+ global_image_data_url = f"data:image/png;base64,{img_b64}"
197
+
198
+ print(f"Successfully generated image with prompt: {selected_prompt[:50]}...")
199
+ return image
200
+
201
+ except Exception as e:
202
+ print(f"Error generating image: {str(e)}")
203
+ import traceback
204
+ traceback.print_exc()
205
+ return None
206
+
207
+
208
+ @log_execution
209
+ def smart_truncate_prompt(prompt, max_length):
210
+ """
211
+ Smart truncation that preserves critical details and visual consistency information.
212
+ Prioritizes character descriptions, layout specifications, and technical requirements.
213
+ """
214
+ if len(prompt) <= max_length:
215
+ return prompt
216
+
217
+ critical_sections = [
218
+ "CRITICAL LAYOUT:",
219
+ "🎭 CRITICAL CHARACTER CONSISTENCY PROTOCOL:",
220
+ "CHARACTER 1",
221
+ "CHARACTER 2",
222
+ "CHARACTER 3",
223
+ "STORY CONTENT:",
224
+ "🏗️ ENVIRONMENTAL CONSISTENCY PROTOCOL:",
225
+ "🎨 COMIC BOOK STYLE MASTERY:",
226
+ "🎨 AUTHENTIC MANGA STYLE:",
227
+ "🎨 PHOTOREALISTIC EXCELLENCE:",
228
+ "🎨 CINEMATIC VISUAL MASTERY:",
229
+ "🎨 HIGH-QUALITY ILLUSTRATION:",
230
+ "📐 PANEL COMPOSITION MASTERY:",
231
+ "🔍 DETAIL PRESERVATION PROTOCOL:",
232
+ "⚡ ADVANCED QUALITY REQUIREMENTS:"
233
+ ]
234
+
235
+ sections = prompt.split(" || ")
236
+
237
+ preserved_sections = []
238
+ preserved_length = 0
239
+
240
+ for section in sections:
241
+ section_trimmed = section.strip()
242
+ if not section_trimmed:
243
+ continue
244
+
245
+ is_critical = any(critical_marker in section_trimmed for critical_marker in critical_sections[:8])
246
+
247
+ if is_critical or (preserved_length + len(section_trimmed) + 4 < max_length - 200):
248
+ preserved_sections.append(section_trimmed)
249
+ preserved_length += len(section_trimmed) + 4
250
+ elif preserved_length < max_length * 0.7:
251
+ available_space = max_length - preserved_length - 200
252
+ if available_space > 100:
253
+ truncated_section = section_trimmed[:available_space-20] + "..."
254
+ preserved_sections.append(truncated_section)
255
+ break
256
+
257
+ preserved_prompt = " || ".join(preserved_sections)
258
+
259
+ final_mandate = " || FINAL MANDATE: Create a masterpiece with perfect character consistency and narrative clarity"
260
+ if len(preserved_prompt) + len(final_mandate) <= max_length:
261
+ preserved_prompt += final_mandate
262
+
263
+ return preserved_prompt
264
+
models/story_generator.py ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.generativeai import GenerativeModel
2
+ import json
3
+ import re
4
+ import os
5
+ import datetime
6
+ import openai
7
+ import config
8
+ import time
9
+ from datetime import datetime
10
+
11
+
12
+ def log_execution(func):
13
+ def wrapper(*args, **kwargs):
14
+ start_time = time.time()
15
+ start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
16
+
17
+ result = func(*args, **kwargs)
18
+
19
+ end_time = time.time()
20
+ end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
21
+ duration = end_time - start_time
22
+
23
+ # Write to file (works in Colab)
24
+ with open('content/logs.txt', 'a') as f:
25
+ f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
26
+
27
+ # Also print to see output immediately
28
+ print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
29
+
30
+ return result
31
+ return wrapper
32
+
33
+ class StoryGenerator:
34
+ """
35
+ Direct story generator that creates comic panel style stories from user input.
36
+ """
37
+
38
+ def __init__(self):
39
+ self.model = GenerativeModel('gemini-2.5-flash')
40
+ @log_execution
41
+ def log_prompt(self, prompt, log_file="story_prompt_logs.jsonl"):
42
+ """Log the prompt to a file for debugging and improvement purposes."""
43
+ log_entry = {
44
+ "timestamp": datetime.datetime.now().isoformat(),
45
+ "prompt": prompt
46
+ }
47
+ with open(log_file, "a", encoding="utf-8") as f:
48
+ f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
49
+ @log_execution
50
+ def enhance_user_story(self, user_description, max_retries=3, current_retry=0):
51
+ """
52
+ Enhance the user's story with more vibrancy, detail, and narrative richness using
53
+ optimized AI prompting techniques for visual storytelling with smart detail preservation.
54
+
55
+ Args:
56
+ user_description: The user's original story idea or prompt
57
+ max_retries: Maximum number of retry attempts (default: 3)
58
+ current_retry: Current retry attempt number (default: 0)
59
+
60
+ Returns:
61
+ enhanced_story: A more vibrant and detailed version of the story with preserved key elements
62
+ """
63
+ print(f"[StoryGenerator] Enhancing user story (attempt {current_retry + 1}/{max_retries}): {user_description[:100]}...")
64
+
65
+ if current_retry >= max_retries:
66
+ print(f"[StoryGenerator] Max retries reached, returning original description")
67
+ return user_description
68
+
69
+ try:
70
+ enhancement_prompt = self._create_detail_focused_enhancement_prompt(user_description)
71
+
72
+ self.log_prompt(enhancement_prompt)
73
+
74
+ try:
75
+ response = self.model.generate_content(enhancement_prompt)
76
+ enhanced_story = response.text.strip()
77
+
78
+ if self._validate_enhancement_quality(enhanced_story, user_description):
79
+ print(f"[StoryGenerator] Story successfully enhanced with detail preservation")
80
+ return enhanced_story
81
+ else:
82
+ print(f"[StoryGenerator] Enhancement quality insufficient, using original with minimal enhancement")
83
+ return self._create_minimal_enhancement(user_description)
84
+
85
+ except Exception as gemini_error:
86
+ print(f"[StoryGenerator] Gemini API error: {gemini_error}")
87
+ if current_retry < max_retries - 1:
88
+ print(f"[StoryGenerator] Retrying with simplified approach...")
89
+ return self._simplified_enhancement(user_description)
90
+ else:
91
+ raise gemini_error
92
+
93
+ except Exception as e:
94
+ print(f"[StoryGenerator] Enhancement error: {e}")
95
+ if current_retry < max_retries - 1:
96
+ import time
97
+ time.sleep(1 * (current_retry + 1))
98
+ return self.enhance_user_story(user_description, max_retries, current_retry + 1)
99
+ else:
100
+ print(f"[StoryGenerator] All enhancement attempts failed, returning original")
101
+ return user_description
102
+ @log_execution
103
+
104
+ def _create_detail_focused_enhancement_prompt(self, user_description):
105
+ """Create a concise enhancement prompt that adds coherence and enough detail for the required number of scenes."""
106
+ return f"""
107
+ You are an expert visual storytelling assistant. Enhance the user's story concept to create a rich visual narrative.
108
+
109
+ ORIGINAL STORY: "{user_description}"
110
+
111
+ ENHANCEMENT GOALS:
112
+ • Define key character appearances (visual features, clothing).
113
+ • Establish a clear setting and atmosphere.
114
+ • Outline a logical scene progression that can be broken down into multiple action-focused panels.
115
+ • Ensure visual consistency for characters and locations.
116
+ • Descriptions should be concise yet vivid, focusing on elements crucial for an action-oriented digital comic.
117
+
118
+ OUTPUT: Enhanced story description (2-3 paragraphs maximum) that provides a strong foundation for a multi-panel, action-focused visual story. Ensure the tone is suitable for a modern digital comic.
119
+ """
120
+
121
+ def _validate_enhancement_quality(self, enhanced_story, original_story):
122
+ """Validate that the enhancement adds coherence and appropriate detail."""
123
+ if not enhanced_story or len(enhanced_story) < 50:
124
+ return False
125
+
126
+ enhanced_words = len(enhanced_story.split())
127
+ original_words = len(original_story.split())
128
+
129
+ if enhanced_words < original_words or enhanced_words > original_words * 5:
130
+ return False
131
+
132
+ story_elements = ['character', 'scene', 'story', 'visual', 'setting', 'action']
133
+ has_story_elements = sum(1 for element in story_elements if element.lower() in enhanced_story.lower())
134
+
135
+ if has_story_elements < 2:
136
+ return False
137
+
138
+ similarity_threshold = 0.8
139
+ original_lower = original_story.lower()
140
+ enhanced_lower = enhanced_story.lower()
141
+
142
+ common_words = set(original_lower.split()) & set(enhanced_lower.split())
143
+ original_unique = len(set(original_lower.split()))
144
+
145
+ if original_unique > 0:
146
+ similarity = len(common_words) / original_unique
147
+ if similarity > similarity_threshold and enhanced_words < original_words * 1.5:
148
+ return False
149
+
150
+ return True
151
+ @log_execution
152
+
153
+ def _create_minimal_enhancement(self, user_description):
154
+ """Create minimal enhancement that preserves original while adding basic coherence for the required number of scenes."""
155
+
156
+ enhanced = f"""
157
+ Enhanced Story: {user_description}
158
+
159
+ Visual Coherence Elements:
160
+ - Main character with consistent appearance throughout all scenes
161
+ - Clear setting that remains visually consistent
162
+ - Logical progression suitable for the required number of sequential panels
163
+ - Simple but complete story arc with beginning, middle, and end
164
+
165
+ This story will unfold across the required number of scenes showing the character's journey with visual consistency and narrative coherence.
166
+ """
167
+
168
+ return enhanced.strip()
169
+ @log_execution
170
+
171
+ def _simplified_enhancement(self, user_description):
172
+ """
173
+ Simplified enhancement fallback when the main enhancement fails.
174
+
175
+ Args:
176
+ user_description: Original user story description
177
+
178
+ Returns:
179
+ str: Simplified enhanced description focused on coherence for the required number of scenes.
180
+ """
181
+ try:
182
+ simplified_prompt = f"""
183
+ Briefly enhance this story for an action-focused visual narrative. Keep it concise and coherent.
184
+
185
+ Original: "{user_description}"
186
+
187
+ Focus on:
188
+ - Core character appearance notes.
189
+ - Main setting description.
190
+ - Basic story flow suitable for action scenes.
191
+ - Visual consistency hints.
192
+
193
+ Enhanced story (1-2 sentences):
194
+ """
195
+
196
+ response = self.model.generate_content(simplified_prompt)
197
+ enhanced_story = response.text.strip()
198
+
199
+ if enhanced_story and len(enhanced_story) > 20:
200
+ print(f"[StoryGenerator] Used simplified enhancement successfully")
201
+ return enhanced_story
202
+ else:
203
+ return user_description
204
+
205
+ except Exception as e:
206
+ print(f"[StoryGenerator] Simplified enhancement also failed: {e}")
207
+ return user_description
208
+ @log_execution
209
+ def generate_story(self, user_description, panels_per_page=9, num_pages=1):
210
+ """
211
+ Generate a comic panel style story directly from user input.
212
+
213
+ Args:
214
+ user_description: The user's story idea or prompt
215
+ panels_per_page: Number of panels per comic page (default is 8)
216
+ num_pages: Number of pages to generate (default is 1)
217
+
218
+ Returns:
219
+ story_data: Structured data for the story with panels organized by pages
220
+ """
221
+ enhanced_story = self.enhance_user_story(user_description)
222
+
223
+ panels_per_page = 9
224
+ total_panels = panels_per_page * num_pages
225
+ print(f"[StoryGenerator] Generating comic story with {num_pages} pages, {panels_per_page} panels per page ({total_panels} total panels) from enhanced story...")
226
+
227
+ query = f"""
228
+ You are a world-class comic book writer and visual storyteller. Your task is to create a SINGLE CONTINUOUS STORY.
229
+ The story will span exactly {num_pages} pages. Each page must contain exactly {panels_per_page} sequential action-focused panels (total of {total_panels} panels).
230
+ The final output must be a modern, digital-style comic with high quality and resolution, suitable for a 1024x1536 image size. **All {panels_per_page} panels must fit entirely within the page with clear gutters—no panel content may be cropped or cut off.**
231
+ Avoid any deformities, missing limbs, distorted or missing facial features, blurry visuals, or sketch styles. Ensure all panels are exactly the same size.
232
+
233
+ STORY CONCEPT:
234
+ "{enhanced_story}"
235
+
236
+ KEY REQUIREMENTS:
237
+ 1. **Panel Count & Style**: Strictly {panels_per_page} action scenes per page. No filler. All scenes must be dynamic and contribute to the story's momentum.
238
+ 2. **Visual Quality**: Generate ultra-high quality, modern digital comic art. Ensure no visual defects (deformities, missing limbs, distorted faces). All panels must be suitable for a combined 1024x1536 page layout.
239
+ 3. **Continuity**:
240
+ * Story must flow seamlessly page-to-page and panel-to-panel.
241
+ * Maintain consistent character appearances (detailed in a character sheet you will generate) and settings (detailed in a setting guide you will generate).
242
+ * Logical plot progression: actions have clear causes and effects.
243
+ * Show passage of time clearly (e.g., "later," "next day").
244
+ 4. **Narrative Structure**:
245
+ * Complete arc: beginning, rising action, climax, resolution.
246
+ * Meaningful character development and motivations.
247
+ 5. **Visual Storytelling Focus**:
248
+ * Descriptions should emphasize actions, expressions, and settings to make the story understandable through visuals alone.
249
+ * Each panel description needs: camera angle, character positions, expressions, environment details, color palette, and mood.
250
+ * Focus on clear, dynamic action sequences.
251
+
252
+ JSON OUTPUT STRUCTURE:
253
+ {{
254
+ "title": "Overall Story Title",
255
+ "premise": "Brief story overview, themes, and setting.",
256
+ "characters": [
257
+ {{
258
+ "name": "Character Name",
259
+ "visual_description": "DETAILED visual description: height, build, face, hair, clothing. CRITICAL for consistency.",
260
+ "traits": ["Key visual trait 1", "Key visual trait 2"],
261
+ "background": "Brief backstory.",
262
+ "arc": "Character's journey/change."
263
+ }}
264
+ // ... (add more characters as needed)
265
+ ],
266
+ "settings": [
267
+ {{
268
+ "name": "Setting Name",
269
+ "description": "DETAILED visual description of the location, including key elements for consistency.",
270
+ "visual_elements": ["Notable visual element 1", "Notable visual trait 2"],
271
+ "mood": "Atmosphere of the location."
272
+ }}
273
+ // ... (add more settings as needed)
274
+ ],
275
+ "pages": [
276
+ {{
277
+ "page_number": 1,
278
+ "panels": [ // Exactly {panels_per_page} panels
279
+ {{
280
+ "panel_number": 1,
281
+ "title": "Action-Oriented Panel Title",
282
+ "visual_description": "ACTION-FOCUSED, extremely detailed description: character actions, expressions, positions, environment, lighting, colors, camera angle. Ensure it fits 1024x1536 page context. NO FILLER.",
283
+ "text": "Dialogue/narration (context only, not for image)",
284
+ "purpose": "How this ACTION panel drives the story.",
285
+ "symbolism": "Any visual symbols."
286
+ }}
287
+ // ... (repeat for all {panels_per_page} panels on page 1)
288
+ ]
289
+ }}
290
+ // ... (repeat for all {num_pages} pages)
291
+ ]
292
+ }}
293
+
294
+ REMEMBER:
295
+ - Focus on ACTION scenes. Eliminate all filler.
296
+ - Visuals are paramount. Descriptions must be rich and allow for image generation that tells the story without text.
297
+ - Adhere strictly to {panels_per_page} panels per page.
298
+ - Ensure top-tier digital art quality with no visual errors.
299
+ - All panels on a page contribute to a single 1024x1536 image.
300
+ """
301
+
302
+ self.log_prompt(query)
303
+ response = self.model.generate_content(query)
304
+
305
+ try:
306
+ json_match = re.search(r'\{[\s\S]*\}', response.text, re.DOTALL)
307
+ if json_match:
308
+ json_str = json_match.group(0)
309
+
310
+ json_str = self._fix_json(json_str)
311
+
312
+ story_data = json.loads(json_str)
313
+
314
+ story_data = self._validate_and_fix_structure(story_data, panels_per_page, num_pages)
315
+
316
+ print(f"[StoryGenerator] Successfully generated story: {story_data.get('title', 'Untitled')}")
317
+ return story_data
318
+ else:
319
+ print("[StoryGenerator] No valid JSON found in response.")
320
+ raise ValueError("No valid JSON found in response")
321
+ except Exception as e:
322
+ print(f"Error in StoryGenerator: {e}")
323
+ return self._create_fallback_story(user_description, panels_per_page, num_pages)
324
+ @log_execution
325
+ def _validate_and_fix_structure(self, story_data, panels_per_page, num_pages):
326
+ """Validate and fix the story structure if needed."""
327
+ if "title" not in story_data:
328
+ story_data["title"] = "Untitled Comic"
329
+
330
+ if "premise" not in story_data:
331
+ story_data["premise"] = "A visual story."
332
+
333
+ if "characters" not in story_data:
334
+ story_data["characters"] = []
335
+
336
+ for character in story_data.get("characters", []):
337
+ if "visual_description" not in character:
338
+ character["visual_description"] = "A character in the story."
339
+ if "traits" not in character:
340
+ character["traits"] = []
341
+ if "background" not in character:
342
+ character["background"] = "Unknown background."
343
+ if "arc" not in character:
344
+ character["arc"] = "Experiences events throughout the story."
345
+
346
+ if "settings" not in story_data:
347
+ story_data["settings"] = []
348
+
349
+ for setting in story_data.get("settings", []):
350
+ if "description" not in setting:
351
+ setting["description"] = "A location in the story."
352
+ if "visual_elements" not in setting:
353
+ setting["visual_elements"] = []
354
+ if "mood" not in setting:
355
+ setting["mood"] = "Neutral."
356
+
357
+ if "pages" not in story_data:
358
+ if "panels" in story_data:
359
+ panels = story_data.pop("panels")
360
+ story_data["pages"] = []
361
+
362
+ for i in range(num_pages):
363
+ start_idx = i * panels_per_page
364
+ end_idx = start_idx + panels_per_page
365
+ page_panels = panels[start_idx:end_idx] if start_idx < len(panels) else []
366
+
367
+ while len(page_panels) < panels_per_page:
368
+ panel_num = len(page_panels) + 1 + (i * panels_per_page)
369
+ page_panels.append({
370
+ "panel_number": panel_num,
371
+ "title": f"Panel {panel_num}",
372
+ "visual_description": "A placeholder panel",
373
+ "text": "",
374
+ "purpose": "Continuation of the story",
375
+ "symbolism": ""
376
+ })
377
+
378
+ story_data["pages"].append({
379
+ "page_number": i + 1,
380
+ "panels": page_panels
381
+ })
382
+ else:
383
+ story_data["pages"] = []
384
+ for i in range(num_pages):
385
+ page_panels = []
386
+ for j in range(panels_per_page):
387
+ panel_num = j + 1 + (i * panels_per_page)
388
+ page_panels.append({
389
+ "panel_number": panel_num,
390
+ "title": f"Panel {panel_num}",
391
+ "visual_description": "A placeholder panel",
392
+ "text": "",
393
+ "purpose": "Continuation of the story",
394
+ "symbolism": ""
395
+ })
396
+
397
+ story_data["pages"].append({
398
+ "page_number": i + 1,
399
+ "panels": page_panels
400
+ })
401
+
402
+ for i in range(len(story_data["pages"]) - 1):
403
+ current_page = story_data["pages"][i]
404
+ next_page = story_data["pages"][i + 1]
405
+
406
+ if "panels" in current_page and "panels" in next_page and current_page["panels"] and next_page["panels"]:
407
+ last_panel = current_page["panels"][-1]
408
+ first_panel = next_page["panels"][0]
409
+
410
+ last_panel_desc = last_panel.get("visual_description", "")
411
+ last_panel_action = last_panel.get("text", "")
412
+
413
+ continuity_note = f"Continues directly from page {current_page.get('page_number', i+1)}, panel {last_panel.get('panel_number', len(current_page['panels']))}: {last_panel_desc[:100]}..."
414
+
415
+ first_panel["continuity_note"] = continuity_note
416
+
417
+ if "visual_description" in first_panel:
418
+ if not first_panel["visual_description"].startswith("CONTINUING DIRECTLY"):
419
+ first_panel["visual_description"] = "CONTINUING DIRECTLY from previous page: " + first_panel["visual_description"]
420
+
421
+ for i, page in enumerate(story_data["pages"]):
422
+ if "page_number" not in page:
423
+ page["page_number"] = i + 1
424
+
425
+ if "panels" not in page:
426
+ page["panels"] = []
427
+
428
+ if len(page["panels"]) > panels_per_page:
429
+ page["panels"] = page["panels"][:panels_per_page]
430
+
431
+ while len(page["panels"]) < panels_per_page:
432
+ panel_num = len(page["panels"]) + 1 + (i * panels_per_page)
433
+
434
+ context_desc = ""
435
+ if page["panels"]:
436
+ prev_panel = page["panels"][-1]
437
+ prev_desc = prev_panel.get("visual_description", "")
438
+ context_desc = f"Continuing from previous panel: {prev_desc[:50]}... "
439
+
440
+ page["panels"].append({
441
+ "panel_number": panel_num,
442
+ "title": f"Panel {panel_num}",
443
+ "visual_description": f"{context_desc}A scene related to the story, moving the narrative forward.",
444
+ "text": "",
445
+ "purpose": "Continuation of the story progression",
446
+ "symbolism": ""
447
+ })
448
+
449
+ for j, panel in enumerate(page["panels"]):
450
+ panel_num = j + 1 + (i * panels_per_page)
451
+
452
+ if "panel_number" not in panel:
453
+ panel["panel_number"] = panel_num
454
+
455
+ if "title" not in panel or not panel["title"]:
456
+ panel["title"] = f"Panel {panel_num}"
457
+
458
+ if "visual_description" not in panel or not panel["visual_description"]:
459
+ context_desc = ""
460
+ if j > 0:
461
+ prev_panel = page["panels"][j-1]
462
+ prev_desc = prev_panel.get("visual_description", "")
463
+ context_desc = f"Following from previous panel: {prev_desc[:50]}... "
464
+
465
+ panel["visual_description"] = f"{context_desc}A scene that advances the story narrative."
466
+
467
+ if "text" not in panel:
468
+ panel["text"] = ""
469
+
470
+ if "purpose" not in panel:
471
+ panel["purpose"] = "Advancing the story progression"
472
+
473
+ if "symbolism" not in panel:
474
+ panel["symbolism"] = ""
475
+
476
+ while len(story_data["pages"]) < num_pages:
477
+ page_num = len(story_data["pages"]) + 1
478
+ page_panels = []
479
+
480
+ context_from_prev_page = ""
481
+ if story_data["pages"]:
482
+ prev_page = story_data["pages"][-1]
483
+ if prev_page.get("panels"):
484
+ last_panel = prev_page["panels"][-1]
485
+ last_desc = last_panel.get("visual_description", "")
486
+ context_from_prev_page = f"Continuing directly from the previous page: {last_desc[:100]}... "
487
+
488
+ for j in range(panels_per_page):
489
+ panel_num = j + 1 + ((page_num - 1) * panels_per_page)
490
+
491
+ panel_desc = "A scene that advances the story narrative."
492
+ if j == 0 and context_from_prev_page:
493
+ panel_desc = context_from_prev_page + panel_desc
494
+ elif j > 0 and page_panels:
495
+ prev_panel = page_panels[j-1]
496
+ prev_desc = prev_panel.get("visual_description", "")
497
+ panel_desc = f"Following from previous panel: {prev_desc[:50]}... " + panel_desc
498
+
499
+ page_panels.append({
500
+ "panel_number": panel_num,
501
+ "title": f"Panel {panel_num}",
502
+ "visual_description": panel_desc,
503
+ "text": "",
504
+ "purpose": "Advancing the story progression",
505
+ "symbolism": ""
506
+ })
507
+
508
+ story_data["pages"].append({
509
+ "page_number": page_num,
510
+ "panels": page_panels
511
+ })
512
+
513
+ return story_data
514
+ @log_execution
515
+ def _create_fallback_story(self, user_description, panels_per_page, num_pages):
516
+ """Create a basic fallback story structure if generation fails."""
517
+ pages = []
518
+
519
+ for i in range(num_pages):
520
+ page_panels = []
521
+ for j in range(panels_per_page):
522
+ panel_num = j + 1 + (i * panels_per_page)
523
+ page_panels.append({
524
+ "panel_number": panel_num,
525
+ "title": f"Panel {panel_num}",
526
+ "visual_description": f"A scene related to {user_description[:30]}...",
527
+ "text": f"Text for panel {panel_num}",
528
+ "purpose": f"Part of the story progression",
529
+ "symbolism": ""
530
+ })
531
+
532
+ pages.append({
533
+ "page_number": i + 1,
534
+ "panels": page_panels
535
+ })
536
+
537
+ return {
538
+ "title": f"A Story About {user_description[:30]}...",
539
+ "premise": f"A comic story about {user_description[:50]}...",
540
+ "pages": pages
541
+ }
542
+ @log_execution
543
+
544
+ def _fix_json(self, json_str):
545
+ """Attempt to fix common JSON issues from LLM responses."""
546
+ json_str = re.sub(r'//.*?', '', json_str)
547
+ json_str = re.sub(r'/\*[\s\S]*?\*/', '', json_str, flags=re.DOTALL)
548
+
549
+ json_str = re.sub(r'([{, ]\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\s*:)', r'\1"\2"\3', json_str)
550
+
551
+ json_str = re.sub(r',(\s*[}\\]])', r'\1', json_str)
552
+ return json_str
553
+ @log_execution
554
+
555
+ def generate_panel_image_prompt(self, panel_data, style=None):
556
+ """Generate a prompt for image generation from panel data."""
557
+ style_text = f" in {style} style" if style else ""
558
+
559
+ prompt = f"Create a comic book panel{style_text} showing: {panel_data['visual_description']}. "
560
+ if 'text' in panel_data and panel_data['text']:
561
+ prompt += f"The panel includes the dialogue: '{panel_data['text']}'. "
562
+ return prompt
new_image_splitting.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import os
4
+ from pathlib import Path
5
+ from PIL import Image
6
+
7
+ class AutomatedCollageSplitter:
8
+ def __init__(self):
9
+ self.min_segment_area_ratio = 0.01
10
+ self.max_segment_area_ratio = 0.95
11
+ self.min_aspect_ratio = 0.2
12
+ self.max_aspect_ratio = 5.0
13
+ self.min_solidity = 0.9
14
+ self.nms_threshold = 0.3
15
+
16
+ self.upscaler = None
17
+ self._initialize_upscaler()
18
+
19
+ def _initialize_upscaler(self):
20
+ if self.upscaler is not None:
21
+ return
22
+ try:
23
+ model_name = 'fsrcnn'
24
+ model_path = os.path.join('models', 'weights', 'FSRCNN-small_x4.pb')
25
+ scale = 4
26
+ if not os.path.isfile(model_path):
27
+ raise FileNotFoundError(f"Model file not found at {model_path}")
28
+ self.upscaler = cv2.dnn_superres.DnnSuperResImpl_create()
29
+ self.upscaler.readModel(model_path)
30
+ self.upscaler.setModel(model_name, scale)
31
+ print(f"✓ OpenCV DNN upscaler model loaded: {model_name} with scale x{scale}")
32
+ except Exception as e:
33
+ print(f"⚠ Could not initialize OpenCV DNN upscaler: {e}. Proceeding without upscaling.")
34
+ self.upscaler = None
35
+
36
+ def _upscale_image(self, image_array):
37
+ if not self.upscaler:
38
+ print("❌ Upscaling skipped because the upscaler is not available.")
39
+ return image_array
40
+ try:
41
+ return self.upscaler.upsample(image_array)
42
+ except Exception as e:
43
+ print(f"❌ Error during image upscaling: {e}")
44
+ return image_array
45
+
46
+ def preprocess_for_contours(self, image):
47
+ """Pre-process the image to make panel borders stand out for contour detection."""
48
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
49
+
50
+ binary = cv2.adaptiveThreshold(
51
+ gray, 255,
52
+ cv2.ADAPTIVE_THRESH_MEAN_C,
53
+ cv2.THRESH_BINARY_INV,
54
+ 21,
55
+ 8
56
+ )
57
+
58
+ kernel = np.ones((5, 5), np.uint8)
59
+ closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
60
+
61
+ return closed
62
+
63
+ def find_panel_contours(self, processed_image, original_shape):
64
+ """Find and filter contours that are likely to be comic panels."""
65
+ contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
66
+
67
+ height, width = original_shape[:2]
68
+ total_area = height * width
69
+ min_area = total_area * self.min_segment_area_ratio
70
+ max_area = total_area * self.max_segment_area_ratio
71
+
72
+ potential_panels = []
73
+ for contour in contours:
74
+ area = cv2.contourArea(contour)
75
+
76
+ if not (min_area < area < max_area):
77
+ continue
78
+
79
+ x, y, w, h = cv2.boundingRect(contour)
80
+
81
+ if h == 0: continue
82
+ aspect_ratio = w / h
83
+ if not (self.min_aspect_ratio < aspect_ratio < self.max_aspect_ratio):
84
+ continue
85
+
86
+ hull = cv2.convexHull(contour)
87
+ hull_area = cv2.contourArea(hull)
88
+ if hull_area == 0: continue
89
+ solidity = float(area) / hull_area
90
+ if solidity < self.min_solidity:
91
+ continue
92
+
93
+ potential_panels.append([x, y, x + w, y + h, area])
94
+
95
+ return np.array(potential_panels)
96
+
97
+ def apply_non_maximum_suppression(self, boxes):
98
+ """Apply NMS to merge overlapping bounding boxes."""
99
+ if len(boxes) == 0:
100
+ return []
101
+
102
+ boxes = boxes[boxes[:, 4].argsort()[::-1]]
103
+
104
+ picked_boxes = []
105
+ while len(boxes) > 0:
106
+ best_box = boxes[0]
107
+ picked_boxes.append(best_box)
108
+
109
+ remaining_boxes = boxes[1:]
110
+
111
+ x1 = np.maximum(best_box[0], remaining_boxes[:, 0])
112
+ y1 = np.maximum(best_box[1], remaining_boxes[:, 1])
113
+ x2 = np.minimum(best_box[2], remaining_boxes[:, 2])
114
+ y2 = np.minimum(best_box[3], remaining_boxes[:, 3])
115
+
116
+ inter_w = np.maximum(0, x2 - x1)
117
+ inter_h = np.maximum(0, y2 - y1)
118
+ intersection_area = inter_w * inter_h
119
+
120
+ best_box_area = (best_box[2] - best_box[0]) * (best_box[3] - best_box[1])
121
+ remaining_boxes_area = (remaining_boxes[:, 2] - remaining_boxes[:, 0]) * (remaining_boxes[:, 3] - remaining_boxes[:, 1])
122
+
123
+ union_area = best_box_area + remaining_boxes_area - intersection_area
124
+ iou = intersection_area / union_area
125
+
126
+ boxes = remaining_boxes[iou < self.nms_threshold]
127
+
128
+ return np.array(picked_boxes)
129
+
130
+ def split_collage(self, image_path, output_dir=None, debug=False):
131
+ """Main function to automatically split collage using contour detection and NMS."""
132
+ img = cv2.imread(str(image_path))
133
+ if img is None:
134
+ raise ValueError(f"Could not load image from {image_path}")
135
+
136
+ print(f"Processing image: {image_path}")
137
+ print(f"Image dimensions: {img.shape[1]}x{img.shape[0]}")
138
+
139
+ processed_image = self.preprocess_for_contours(img)
140
+ print("✓ Preprocessed image for contour detection")
141
+
142
+ potential_panels = self.find_panel_contours(processed_image, img.shape)
143
+ print(f"✓ Found {len(potential_panels)} potential panel contours")
144
+
145
+ final_panels = self.apply_non_maximum_suppression(potential_panels)
146
+ print(f"✓ Refined to {len(final_panels)} panels after Non-Maximum Suppression")
147
+
148
+ if len(final_panels) > 0:
149
+ img_height = img.shape[0]
150
+ panel_heights = [box[3] - box[1] for box in final_panels]
151
+ if panel_heights:
152
+ max_panel_height = max(panel_heights)
153
+
154
+ if max_panel_height > 0:
155
+ bottom_margin = 10
156
+ height_threshold_ratio = 0.8
157
+
158
+ truly_final_panels = []
159
+ for box in final_panels:
160
+ h = box[3] - box[1]
161
+ y2 = box[3]
162
+
163
+ is_at_bottom = y2 >= (img_height - bottom_margin)
164
+ is_too_short = h < (max_panel_height * height_threshold_ratio)
165
+
166
+ if is_at_bottom and is_too_short:
167
+ print(f"Skipping potentially incomplete panel at the bottom (h={h} vs max_h={max_panel_height})")
168
+ continue
169
+ truly_final_panels.append(box)
170
+
171
+ final_panels = truly_final_panels
172
+
173
+ if len(final_panels) < 4:
174
+ print("⚠ Contour detection found too few panels. Creating fallback 2x2 grid...")
175
+ h, w = img.shape[:2]
176
+ final_panels = np.array([
177
+ [0, 0, w//2, h//2, 0],
178
+ [w//2, 0, w, h//2, 0],
179
+ [0, h//2, w//2, h, 0],
180
+ [w//2, h//2, w, h, 0]
181
+ ])
182
+
183
+ final_panels = sorted(final_panels, key=lambda b: (b[1], b[0]))
184
+
185
+ if output_dir is None:
186
+ output_dir = Path(image_path).parent / f"{Path(image_path).stem}_segments"
187
+ output_dir = Path(output_dir)
188
+ output_dir.mkdir(exist_ok=True)
189
+
190
+ saved_segments_info = []
191
+ for i, box in enumerate(final_panels):
192
+ x1, y1, x2, y2, _ = map(int, box)
193
+
194
+ padding = 3
195
+ x1 = max(0, x1 - padding)
196
+ y1 = max(0, y1 - padding)
197
+ x2 = min(img.shape[1], x2 + padding)
198
+ y2 = min(img.shape[0], y2 + padding)
199
+
200
+ segment = img[y1:y2, x1:x2]
201
+ original_dims = (segment.shape[1], segment.shape[0])
202
+
203
+ upscaled_segment_np = self._upscale_image(segment)
204
+
205
+ final_image = Image.fromarray(cv2.cvtColor(upscaled_segment_np, cv2.COLOR_BGR2RGB))
206
+
207
+ final_dims = (final_image.width, final_image.height)
208
+ output_path = output_dir / f"segment_{i+1:02d}.png"
209
+ final_image.save(str(output_path))
210
+
211
+ caption = (
212
+ f"Panel {i+1}<br>"
213
+ f"Original: {original_dims[0]}x{original_dims[1]}<br>"
214
+ f"Upscaled: {final_dims[0]}x{final_dims[1]}"
215
+ )
216
+
217
+ saved_segments_info.append({
218
+ "path": str(output_path),
219
+ "caption": caption
220
+ })
221
+ print(f" Saved segment {i+1}: {final_dims[0]}x{final_dims[1]} pixels to {output_path}")
222
+
223
+ if debug:
224
+ debug_potential_panels = self.find_panel_contours(processed_image, img.shape)
225
+ self.create_debug_images(img, processed_image, debug_potential_panels, final_panels, output_dir)
226
+
227
+ print(f"\n🎉 Successfully split collage into {len(saved_segments_info)} segments!")
228
+ print(f"📁 Segments saved in: {output_dir}")
229
+ return saved_segments_info
230
+
231
+ def create_debug_images(self, original, processed, potential_boxes, final_boxes, output_dir):
232
+ """Create debug images showing the processing steps."""
233
+ cv2.imwrite(str(output_dir / "debug_01_binary_closed.png"), processed)
234
+
235
+ potential_img = original.copy()
236
+ if len(potential_boxes) > 0:
237
+ for x1, y1, x2, y2, _ in potential_boxes:
238
+ cv2.rectangle(potential_img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 255), 2)
239
+ cv2.imwrite(str(output_dir / "debug_02_potential_boxes.png"), potential_img)
240
+
241
+ final_img = original.copy()
242
+ if len(final_boxes) > 0:
243
+ for x1, y1, x2, y2, _ in final_boxes:
244
+ cv2.rectangle(final_img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 3)
245
+ cv2.imwrite(str(output_dir / "debug_03_final_panels.png"), final_img)
246
+
247
+ print("🔍 Debug images saved:")
248
+ print(" - debug_01_binary_closed.png (preprocessed)")
249
+ print(" - debug_02_potential_boxes.png (before NMS)")
250
+ print(" - debug_03_final_panels.png (after NMS)")
251
+
252
+ def main():
253
+ """Example usage"""
254
+ splitter = AutomatedCollageSplitter()
255
+
256
+ image_path = "path/to/your/comic_image.png"
257
+
258
+ try:
259
+ if not Path(image_path).exists():
260
+ print(f"❌ Image file not found: {image_path}")
261
+ print("Please update the image_path variable with the correct path to your collage image.")
262
+ return
263
+
264
+ segments = splitter.split_collage(
265
+ image_path=image_path,
266
+ debug=True
267
+ )
268
+
269
+ print(f"\n📊 Processing complete!")
270
+ print(f"Generated {len(segments)} separate images from the collage")
271
+
272
+ except Exception as e:
273
+ print(f"❌ Error processing image: {e}")
274
+ import traceback
275
+ traceback.print_exc()
276
+
277
+ if __name__ == "__main__":
278
+ main()
notes.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Image Processing Pipeline Notes
2
+
3
+ ## Task Tracking
4
+
5
+ | Task | Status |
6
+ |------|--------|
7
+ | Split images correctly and discard half-generated images | ✅ |
8
+ | Increase split images sizes | ✅ |
9
+ | Upscale images and increase resolution | ✅ |
10
+ | Ensure everything is working correctly | ✅ |
11
+ | Review and adjust images | ✅ |
12
+ | Use better Image processing techniques and models | ⬜ |
13
+
14
+ ## Original Task List
15
+ 1. Split the images correctly and discard half generated images.
16
+ 2. Increase split images sizes.
17
+ 3. Upscale images and increase resolution.
18
+ 4. Ensure everything is working correctly.
19
+
20
+ ## Process Flow
21
+
22
+ ```mermaid
23
+ graph TD
24
+ A[Input Image] --> B[Split Images]
25
+ B --> C{Quality Check}
26
+ C -->|Good Quality| D[Size Increase]
27
+ C -->|Poor Quality| E[Discard]
28
+ D --> F[Upscale]
29
+ F --> G[Resolution Enhancement]
30
+ G --> H[Final Quality Check]
31
+ H -->|Pass| I[Final Image]
32
+ H -->|Fail| J[Review & Adjust]
33
+ J --> B
34
+ ```
35
+
36
+ ## Notes
37
+ - ⬜ = Not Started/In Progress
38
+ - ✅ = Completed
39
+ - Update checkboxes as tasks are completed
40
+ - Follow the process flow for each image processing task
41
+ - Ensure quality checks are performed at each stage
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==5.35.0
2
+ pillow==11.2.1
3
+ python-dotenv==1.1.0
4
+ google-generativeai==0.8.4
5
+ google-auth-oauthlib==1.2.1
6
+ google-api-python-client==2.166.0
7
+ requests==2.32.3
8
+ numpy==2.2.5
9
+ openai==1.78.0
10
+ opencv-contrib-python
11
+ matplotlib
start.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+
4
+ nohup python3 app.py > output.log 2>&1 &
5
+
6
+ echo "starting server"
7
+
token.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d9f3572ff848d00c9fdface71ac47338c510ae286d1dfd7128625eb75a5956a
3
+ size 1002
ui/Compumacy-Logo-Trans2.png ADDED
ui/__init__.py ADDED
File without changes
ui/content/log.txt ADDED
File without changes
ui/story_interface.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ from utils.story_management import (
4
+ generate_direct_comic,
5
+ extract_comic_scenes,
6
+ load_narration_from_file
7
+ )
8
+ from config import IMAGE_STYLES, IMAGE_STYLE_INFO, AGE_GROUPS
9
+ from datetime import datetime
10
+
11
+ def log_execution(func):
12
+ def wrapper(*args, **kwargs):
13
+ start_time = time.time()
14
+ start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
15
+
16
+ result = func(*args, **kwargs)
17
+
18
+ end_time = time.time()
19
+ end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
20
+ duration = end_time - start_time
21
+
22
+ # Write to file (works in Colab)
23
+ with open('content/logs.txt', 'a') as f:
24
+ f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
25
+
26
+ # Also print to see output immediately
27
+ print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
28
+
29
+ return result
30
+ return wrapper
31
+ @log_execution
32
+ def create_story_interface(demo: gr.Blocks) -> gr.Blocks:
33
+ """Create the main story interface with comic generation functionality.
34
+
35
+ This function initializes the primary UI interface for the comic generation system,
36
+ setting up the main tab structure and components.
37
+
38
+ Args:
39
+ demo (gr.Blocks): The Gradio Blocks instance to build the interface on
40
+
41
+ Returns:
42
+ gr.Blocks: The configured Gradio interface with all components initialized
43
+ """
44
+
45
+ create_quick_comic_tab()
46
+
47
+ return demo
48
+ def create_quick_comic_tab() -> None:
49
+ """Create a simple tab for direct prompt-to-image comic generation.
50
+
51
+ Sets up the main comic generation interface with the following components:
52
+ - Story prompt input field
53
+ - AI prompt enhancement option
54
+ - Visual style selection
55
+ - Number of scenes selector
56
+ - Generation controls
57
+ - Image display area
58
+ - Scene navigation system
59
+
60
+ The interface allows users to:
61
+ 1. Input their story description
62
+ 2. Configure generation parameters
63
+ 3. Generate a multi-panel comic
64
+ 4. View and navigate through individual scenes
65
+ """
66
+
67
+ with gr.Column():
68
+
69
+ gr.Markdown("Welcome to Hekaya ")
70
+
71
+ with gr.Row():
72
+ with gr.Column(scale=3):
73
+ user_prompt = gr.Textbox(
74
+ label="What Hekaya story would you like to visualize?",
75
+ placeholder="Describe your story with main characters and settings... (e.g., 'A young wizard learning magic in an ancient castle')",
76
+ lines=4
77
+ )
78
+
79
+ enrich_prompt = gr.Checkbox(
80
+ label="Enhance prompt with AI for coherence",
81
+ value=True,
82
+ info="Use AI to add just enough detail and coherence for consistent visual storytelling across all scenes"
83
+ )
84
+
85
+ with gr.Column(scale=1):
86
+ comic_style = gr.Dropdown(
87
+ label="Visual Style",
88
+ choices=IMAGE_STYLES,
89
+ value="Comic Book Style"
90
+ )
91
+
92
+ style_description = gr.Markdown(
93
+ value=f"*{IMAGE_STYLE_INFO['Comic Book Style']}*",
94
+ label="Style Description"
95
+ )
96
+
97
+ age_group = gr.Dropdown(
98
+ label="Target Age Group",
99
+ choices=AGE_GROUPS,
100
+ value="9-12 (Pre-teen)",
101
+ info="Select the audience age group. Narration language, detail, and length will adapt automatically."
102
+ )
103
+
104
+ image_quality = gr.Dropdown(
105
+ label="Image Quality",
106
+ choices=["Low", "Medium", "High"],
107
+ value="Low",
108
+ info="Select the quality level for generated images. Higher quality may take longer to generate."
109
+ )
110
+
111
+ generate_btn = gr.Button("Generate Hekaya Story", variant="primary")
112
+ status_display = gr.Markdown("")
113
+
114
+ with gr.Row():
115
+ with gr.Column(scale=2):
116
+ comic_image = gr.Image(label="Generated Hekaya Story", type="filepath")
117
+
118
+ with gr.Column(scale=1, elem_id="save_info_container"):
119
+
120
+ gr.Markdown("Your generated story images are automatically saved locally.")
121
+ save_path_display = gr.Markdown("", elem_id="save_path_info")
122
+
123
+ narration_display = gr.Markdown(
124
+
125
+ visible=True,
126
+ elem_id="story_narration",
127
+ elem_classes=["story-narration-box"]
128
+ )
129
+
130
+ with gr.Column(visible=False) as scene_viewer_container:
131
+
132
+ gr.Markdown("Use the navigation buttons to view each upscaled scene individually.")
133
+
134
+ with gr.Row(equal_height=True):
135
+ prev_scene_btn = gr.Button("← Previous Scene", variant="secondary")
136
+ scene_counter = gr.Markdown("Scene 1 of 1", elem_id="scene_counter")
137
+ next_scene_btn = gr.Button("Next Scene →", variant="secondary")
138
+
139
+ scene_image = gr.Image(label="Current Scene", type="filepath", height=768)
140
+ scene_caption_display = gr.Markdown("", elem_id="scene_caption", elem_classes=["scene-caption-box"])
141
+ scene_save_path = gr.Markdown("", elem_id="scene_save_path_info")
142
+
143
+ scene_info = gr.State([])
144
+ current_scene_index = gr.State(0)
145
+
146
+
147
+ def update_style_description(style: str) -> str:
148
+ """Update the style description text when a new style is selected."""
149
+ return f"*{IMAGE_STYLE_INFO[style]}*"
150
+
151
+ def show_generating_message() -> str:
152
+ """Display a loading message while story scenes are being generated."""
153
+ return "🔄 Generating your story scenes... Please wait..."
154
+
155
+ def generate_comic_with_length(user_prompt, comic_style, enrich_prompt, age_group, image_quality):
156
+ """Wrapper that handles the fixed num_scenes value while passing the age group and image quality."""
157
+ comic_image, save_path_display, status_display, narration = generate_direct_comic(
158
+ user_prompt,
159
+ comic_style,
160
+ 12,
161
+ enrich_prompt,
162
+ 3,
163
+ age_group,
164
+ )
165
+
166
+ if narration and narration.strip():
167
+ narration_formatted = f""
168
+ narration_update = gr.update(visible=True, value=narration_formatted)
169
+ else:
170
+ narration_update = gr.update(visible=True, value="")
171
+
172
+ return comic_image, save_path_display, status_display, narration_update
173
+
174
+ def init_scene_viewer(comic_path: str | None) -> tuple:
175
+ """Initialize the scene viewer with extracted scenes from the comic image."""
176
+ if not comic_path:
177
+ return [], 0, gr.update(visible=False), None, "", "Scene 0 of 0", "No story image generated"
178
+
179
+ scene_data, save_message = extract_comic_scenes(comic_path, 0)
180
+
181
+ if not scene_data:
182
+ return [], 0, gr.update(visible=False), None, "", "Scene 0 of 0", "Failed to extract scenes"
183
+
184
+ first_scene = scene_data[0]
185
+
186
+ return (
187
+ scene_data,
188
+ 0,
189
+ gr.update(visible=True),
190
+ first_scene['path'],
191
+ first_scene['caption'],
192
+ f"Scene 1 of {len(scene_data)}",
193
+ save_message
194
+ )
195
+
196
+ def update_scene_display(scene_data: list, current_index: int) -> tuple:
197
+ """Update the scene viewer display with the current scene."""
198
+ if not scene_data:
199
+ return None, "", "Scene 0 of 0"
200
+
201
+ index = max(0, min(current_index, len(scene_data) - 1))
202
+ scene = scene_data[index]
203
+
204
+ return scene['path'], scene['caption'], f"Scene {index + 1} of {len(scene_data)}"
205
+
206
+ def navigate_to_previous_scene(idx: int) -> int:
207
+ """Navigate to the previous scene in the sequence."""
208
+ return max(0, idx - 1)
209
+
210
+ def navigate_to_next_scene(paths: list, idx: int) -> int:
211
+ """Navigate to the next scene in the sequence."""
212
+ return min(len(paths) - 1, idx + 1) if paths else 0
213
+
214
+
215
+ comic_style.change(
216
+ fn=update_style_description,
217
+ inputs=[comic_style],
218
+ outputs=[style_description]
219
+ )
220
+
221
+ generate_btn.click(
222
+ fn=show_generating_message,
223
+ inputs=None,
224
+ outputs=status_display
225
+ ).then(
226
+ fn=generate_comic_with_length,
227
+ inputs=[user_prompt, comic_style, enrich_prompt, age_group, image_quality],
228
+ outputs=[comic_image, save_path_display, status_display, narration_display]
229
+ ).then(
230
+ fn=init_scene_viewer,
231
+ inputs=[comic_image],
232
+ outputs=[
233
+ scene_info,
234
+ current_scene_index,
235
+ scene_viewer_container,
236
+ scene_image,
237
+ scene_caption_display,
238
+ scene_counter,
239
+ scene_save_path
240
+ ]
241
+ )
242
+
243
+ prev_scene_btn.click(
244
+ fn=navigate_to_previous_scene,
245
+ inputs=[current_scene_index],
246
+ outputs=[current_scene_index]
247
+ ).then(
248
+ fn=update_scene_display,
249
+ inputs=[scene_info, current_scene_index],
250
+ outputs=[scene_image, scene_caption_display, scene_counter]
251
+ )
252
+
253
+ next_scene_btn.click(
254
+ fn=navigate_to_next_scene,
255
+ inputs=[scene_info, current_scene_index],
256
+ outputs=[current_scene_index]
257
+ ).then(
258
+ fn=update_scene_display,
259
+ inputs=[scene_info, current_scene_index],
260
+ outputs=[scene_image, scene_caption_display, scene_counter]
261
+ )
utils/__init__.py ADDED
File without changes
utils/comic_panel_splitter.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import os
4
+ import time
5
+ from datetime import datetime
6
+
7
+
8
+ def log_execution(func):
9
+ def wrapper(*args, **kwargs):
10
+ start_time = time.time()
11
+ start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
12
+
13
+ result = func(*args, **kwargs)
14
+
15
+ end_time = time.time()
16
+ end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
17
+ duration = end_time - start_time
18
+
19
+ # Write to file (works in Colab)
20
+ with open('content/logs.txt', 'a') as f:
21
+ f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
22
+
23
+ # Also print to see output immediately
24
+ print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
25
+
26
+ return result
27
+ return wrapper
28
+
29
+ @log_execution
30
+ def split_comic_panels(image_path, output_dir):
31
+ img = cv2.imread(image_path)
32
+ if img is None:
33
+ print(f"Error: Could not read image from {image_path}")
34
+ return
35
+
36
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
+
38
+ th = cv2.adaptiveThreshold(gray, 255,
39
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,
40
+ blockSize=51, C=10)
41
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,15))
42
+ clean = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel)
43
+
44
+ cnts, _ = cv2.findContours(clean, cv2.RETR_EXTERNAL,
45
+ cv2.CHAIN_APPROX_SIMPLE)
46
+ panels = []
47
+ for c in cnts:
48
+ area = cv2.contourArea(c)
49
+ if area < 10000:
50
+ continue
51
+ peri = cv2.arcLength(c, True)
52
+ approx = cv2.approxPolyDP(c, 0.02 * peri, True)
53
+ panels.append((cv2.boundingRect(approx), c))
54
+ @log_execution
55
+ def panel_key(item):
56
+ (x,y,w,h), _ = item
57
+ return (y//50, x)
58
+
59
+ panels_sorted = sorted(panels, key=panel_key)
60
+
61
+ if not os.path.exists(output_dir):
62
+ os.makedirs(output_dir)
63
+
64
+ for i, ((x,y,w,h), contour) in enumerate(panels_sorted, 1):
65
+ crop = img[y:y+h, x:x+w]
66
+ cv2.imwrite(os.path.join(output_dir, f"panel_{i:02d}.png"), crop)
67
+
68
+ if __name__ == "__main__":
69
+ if not os.path.exists("comic_page.jpg"):
70
+ dummy_image = np.zeros((600, 800, 3), dtype=np.uint8)
71
+ cv2.putText(dummy_image, "Comic Page", (300, 300), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
72
+ cv2.rectangle(dummy_image, (50, 50), (350, 250), (255,255,255), 5)
73
+ cv2.rectangle(dummy_image, (400, 50), (750, 250), (255,255,255), 5)
74
+ cv2.rectangle(dummy_image, (50, 300), (750, 550), (255,255,255), 5)
75
+ cv2.imwrite("comic_page.jpg", dummy_image)
76
+
77
+ if not os.path.exists("output_panels"):
78
+ os.makedirs("output_panels")
79
+ split_comic_panels("comic_page.jpg", "output_panels")
80
+ print("Comic panels split and saved to output_panels directory.")
utils/content/log.txt ADDED
File without changes
utils/story_management.py ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import base64
4
+ import time
5
+ from models.story_generator import StoryGenerator
6
+ from models.comic_image_generator import ComicImageGenerator
7
+ from datetime import datetime
8
+ from PIL import Image
9
+ import io
10
+ from google.generativeai import GenerativeModel, configure
11
+ import config
12
+ from new_image_splitting import AutomatedCollageSplitter
13
+
14
+
15
+ def log_execution(func):
16
+ def wrapper(*args, **kwargs):
17
+ start_time = time.time()
18
+ start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
19
+
20
+ result = func(*args, **kwargs)
21
+
22
+ end_time = time.time()
23
+ end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
24
+ duration = end_time - start_time
25
+
26
+ # Write to file (works in Colab)
27
+ with open('content/logs.txt', 'a') as f:
28
+ f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
29
+
30
+ # Also print to see output immediately
31
+ print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
32
+
33
+ return result
34
+ return wrapper
35
+ @log_execution
36
+ def save_image_from_data_url(data_url, filename):
37
+ """Save an image from a data URL to a file."""
38
+ if not data_url or not data_url.startswith("data:image"):
39
+ print(f"Invalid data URL: {data_url[:30] if data_url else None}")
40
+ return False
41
+
42
+ try:
43
+ image_data = data_url.split(",")[1]
44
+ image_bytes = base64.b64decode(image_data)
45
+
46
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
47
+
48
+ with open(filename, "wb") as f:
49
+ f.write(image_bytes)
50
+
51
+ return True
52
+ except Exception as e:
53
+ print(f"Error saving image: {e}")
54
+ return False
55
+ @log_execution
56
+ def generate_direct_comic(
57
+ user_prompt,
58
+ image_style=None,
59
+ num_scenes: int = 12,
60
+ enrich_prompt=True,
61
+ narration_length=3,
62
+ age_group: str | None = None,
63
+ ):
64
+ """
65
+ Generate a comic directly from a user prompt with optional story enhancement.
66
+
67
+ NOTE: The system is now standardized to generate exactly 12 scenes arranged in a 3×4 grid. Any value passed into
68
+ `num_scenes` will be overridden to maintain this consistency.
69
+
70
+ Args:
71
+ user_prompt: The user's description of what they want in the comic
72
+ image_style: Style to use for comic generation
73
+ num_scenes: Ignored—kept for backward compatibility. Always forced to 12.
74
+ enrich_prompt: Whether to enhance the prompt using Gemini AI
75
+ narration_length: Length of narration (1=Brief, 2=Medium, 3=Detailed, 4=Very Detailed)
76
+ age_group: Target age group that determines the language complexity (e.g., "6-8 (Kids)")
77
+
78
+ Returns:
79
+ comic_output_path: Path to the generated comic image
80
+ save_path_display: Display message about where the image was saved
81
+ status_display: Status message about generation success/failure
82
+ narration: Generated story narration
83
+ """
84
+ start_time = time.time()
85
+
86
+ try:
87
+ if not user_prompt or len(user_prompt.strip()) == 0:
88
+ raise ValueError("User prompt cannot be empty")
89
+
90
+ user_prompt = user_prompt.strip()
91
+ if len(user_prompt) > 10000:
92
+ user_prompt = user_prompt[:10000] + "..."
93
+ print("⚠️ Prompt truncated to prevent processing issues")
94
+
95
+ num_scenes = 12
96
+
97
+ if enrich_prompt:
98
+ try:
99
+ story_generator = StoryGenerator()
100
+
101
+ print(f"Enhancing user prompt: {user_prompt[:100]}...")
102
+ enhanced_description = story_generator.enhance_user_story(user_prompt)
103
+
104
+ if enhanced_description and len(enhanced_description.strip()) > 0:
105
+ print(f"Prompt successfully enhanced")
106
+ final_prompt = enhanced_description
107
+ else:
108
+ print("⚠️ Enhancement failed, using original prompt")
109
+ final_prompt = user_prompt
110
+
111
+ except Exception as enhancement_error:
112
+ print(f"⚠️ Story enhancement failed: {enhancement_error}")
113
+ print("Using original prompt without enhancement")
114
+ final_prompt = user_prompt
115
+ else:
116
+ print(f"Using original prompt without enhancement: {user_prompt[:100]}...")
117
+ final_prompt = user_prompt
118
+
119
+ try:
120
+ image_generator = ComicImageGenerator()
121
+ except Exception as generator_error:
122
+ print(f"❌ Failed to create image generator: {generator_error}")
123
+ raise generator_error
124
+
125
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
126
+ safe_title = "".join(c if c.isalnum() or c in [' ', '_', '-'] else '_' for c in user_prompt[:20])
127
+ safe_title = safe_title.strip().replace(" ", "_")
128
+ if not safe_title:
129
+ safe_title = "story"
130
+
131
+ image_dir = f"Story-Generation/generated/images/{safe_title}_{timestamp}"
132
+ try:
133
+ os.makedirs(image_dir, exist_ok=True)
134
+ except Exception as dir_error:
135
+ print(f"❌ Failed to create directory: {dir_error}")
136
+ image_dir = f"{safe_title}_{timestamp}"
137
+ os.makedirs(image_dir, exist_ok=True)
138
+
139
+ comic_output_path = os.path.join(image_dir, f"story_scenes.png")
140
+
141
+ try:
142
+ comic_image, data_url = image_generator.generate_comic(
143
+ {
144
+ "title": safe_title,
145
+ "description": final_prompt,
146
+ "num_scenes": num_scenes
147
+ },
148
+ output_path=comic_output_path,
149
+ style=image_style
150
+ )
151
+
152
+ if comic_image is None and data_url is None:
153
+ raise ValueError("Comic generation returned no results")
154
+
155
+ except Exception as generation_error:
156
+ print(f"❌ Comic generation failed: {generation_error}")
157
+ raise generation_error
158
+
159
+ if age_group:
160
+ age_to_length = {
161
+ "3-5 (Pre-school)": 1,
162
+ "6-8 (Kids)": 2,
163
+ "9-12 (Pre-teen)": 3,
164
+ "13-18 (Teen)": 3,
165
+ "18+ (Adult)": 4,
166
+ }
167
+
168
+ mapped_length = age_to_length.get(age_group)
169
+ if mapped_length is not None:
170
+ narration_length = mapped_length
171
+
172
+ narration = ""
173
+ if comic_output_path and os.path.exists(comic_output_path):
174
+ try:
175
+ narration = generate_image_narration(
176
+ comic_output_path, narration_length, age_group=age_group
177
+ )
178
+ if narration:
179
+ narration_path = os.path.join(image_dir, "narration.txt")
180
+ with open(narration_path, "w", encoding="utf-8") as f:
181
+ f.write(narration)
182
+ print(f"💾 Narration saved to: {narration_path}")
183
+ except Exception as narration_error:
184
+ print(f"⚠️ Narration generation failed: {narration_error}")
185
+ narration = ""
186
+
187
+ end_time = time.time()
188
+ generation_time = end_time - start_time
189
+
190
+ try:
191
+ absolute_path = os.path.abspath(comic_output_path)
192
+ except Exception:
193
+ absolute_path = comic_output_path
194
+
195
+ enhancement_status = "with AI enhancement" if enrich_prompt else "using original prompt"
196
+ save_message = f"""
197
+ <div style="padding: 10px; border: 1px solid
198
+ <p><strong>💾 Story Scenes Saved Successfully</strong></p>
199
+ <p>Generated {enhancement_status}</p>
200
+ <p>Location: <code>{absolute_path}</code></p>
201
+ <p>You can find all your saved story images in the images directory.</p>
202
+ </div>
203
+ """
204
+
205
+ return (
206
+ comic_output_path,
207
+ save_message,
208
+ f"✅ Story scenes generated successfully in {generation_time:.2f} seconds!",
209
+ narration
210
+ )
211
+
212
+ except Exception as e:
213
+ import traceback
214
+ error_details = traceback.format_exc()
215
+ print(f"❌ Error generating story scenes: {e}")
216
+ print(f"Full error details: {error_details}")
217
+
218
+ return None, "", f"❌ Error generating story scenes: {str(e)}", ""
219
+
220
+ @log_execution
221
+ def extract_comic_scenes(comic_image_path, num_scenes):
222
+ """
223
+ Extract individual scenes from a story image and save them as separate files.
224
+ This version uses the AutomatedCollageSplitter.
225
+
226
+ Args:
227
+ comic_image_path: Path to the story image
228
+ num_scenes: This parameter is kept for compatibility but is not used
229
+ by the automated splitter.
230
+
231
+ Returns:
232
+ list: List of paths to the individual scene images
233
+ str: Display message about where the scenes were saved
234
+ """
235
+ if not comic_image_path or not os.path.exists(comic_image_path):
236
+ return [], "Error: Comic image not found."
237
+
238
+ try:
239
+ base_dir = os.path.dirname(comic_image_path)
240
+ image_stem = os.path.splitext(os.path.basename(comic_image_path))[0]
241
+ output_dir = os.path.join(base_dir, f"{image_stem}_segments")
242
+
243
+ splitter = AutomatedCollageSplitter()
244
+
245
+ scene_info = splitter.split_collage(
246
+ image_path=comic_image_path,
247
+ output_dir=output_dir,
248
+ debug=False
249
+ )
250
+
251
+ if not scene_info:
252
+ return [], "Automated splitting failed to find any panels."
253
+
254
+ scene_paths = [info['path'] for info in scene_info]
255
+
256
+ save_message = f"""
257
+ <div style="padding: 10px; border: 1px solid
258
+ <p><strong>💾 Individual Scenes Saved Successfully</strong></p>
259
+ <p>Found and saved {len(scene_paths)} scenes.</p>
260
+ <p>Location: <code>{os.path.abspath(output_dir)}</code></p>
261
+ </div>
262
+ """
263
+
264
+ return scene_info, save_message
265
+
266
+ except Exception as e:
267
+ import traceback
268
+ error_details = traceback.format_exc()
269
+ print(f"❌ Error extracting scenes: {e}")
270
+ print(f"Full error details: {error_details}")
271
+ return [], f"Error extracting scenes: {e}"
272
+ @log_execution
273
+ def generate_image_narration(image_path, narration_length=3, age_group: str | None = None):
274
+ """
275
+ Generate detailed story narration that actually tells the story shown in the comic panels.
276
+
277
+ Args:
278
+ image_path: Path to the generated comic image
279
+ narration_length: Length of narration (1=Brief, 2=Medium, 3=Detailed, 4=Very Detailed)
280
+ age_group: Optional target age group string that informs the language difficulty
281
+
282
+ Returns:
283
+ str: Generated detailed narration text or empty string if failed
284
+ """
285
+ try:
286
+ configure(api_key=config.GOOGLE_API_KEY)
287
+
288
+ model = GenerativeModel('gemini-2.5-flash')
289
+
290
+ comic_image = Image.open(image_path)
291
+
292
+ length_specs = {
293
+ 1: {
294
+ "description": "Quick scene summary",
295
+ "min_chars": 100,
296
+ "style": "concise, 2-3 lines per scene - capture the essence and emotion of each moment"
297
+ },
298
+ 2: {
299
+ "description": "Brief scene descriptions",
300
+ "min_chars": 200,
301
+ "style": "short and engaging, 2-3 lines per scene - focus on key actions and feelings"
302
+ },
303
+ 3: {
304
+ "description": "Quick narrative",
305
+ "min_chars": 300,
306
+ "style": "brief but vivid, 2-3 lines per scene - capture emotion, action, and atmosphere"
307
+ },
308
+ 4: {
309
+ "description": "Concise story flow",
310
+ "min_chars": 400,
311
+ "style": "quick and immersive, 2-3 lines per scene - paint rich pictures efficiently"
312
+ }
313
+ }
314
+
315
+ spec = length_specs.get(narration_length, length_specs[3])
316
+
317
+ age_group_specs = {
318
+ "3-5 (Pre-school)": {
319
+ "audience": "very young children ages 3-5",
320
+ "language": "Narrate like a playful adult reading aloud. Use extremely simple words, cheerful tone, repetition, and lots of sound effects and emotions."
321
+ },
322
+ "6-8 (Kids)": {
323
+ "audience": "children ages 6-8",
324
+ "language": "Use an enthusiastic and descriptive tone. Keep vocabulary simple but expressive. Include action, feelings, and dialogue they can relate to."
325
+ },
326
+ "9-12 (Pre-teen)": {
327
+ "audience": "pre-teens ages 9-12",
328
+ "language": "Use a lively, adventurous tone with accessible vocabulary. Add humor, suspense, and vivid action to keep engagement high."
329
+ },
330
+ "13-18 (Teen)": {
331
+ "audience": "teenagers ages 13-18",
332
+ "language": "Use present-tense narration with emotional depth and realism. Include internal thoughts, intense scenes, and natural dialogue."
333
+ },
334
+ "18+ (Adult)": {
335
+ "audience": "adults",
336
+ "language": "Use mature, nuanced storytelling. Allow complex emotions, sensory detail, and deeper themes to emerge naturally."
337
+ }
338
+ }
339
+
340
+ selected_age_spec = age_group_specs.get(age_group) if age_group else None
341
+
342
+ audience_desc = selected_age_spec['audience'] if selected_age_spec else "a general audience"
343
+ language_guideline = selected_age_spec['language'] if selected_age_spec else "Use vivid, engaging language appropriate to the story."
344
+
345
+ narration_mode = "Provide quick, engaging narration with 2-3 lines per scene. Each scene gets 2-3 concise, standalone sentences that capture the key moment, emotion, and atmosphere. Do NOT prefix scenes with 'Scene'"
346
+
347
+ narration_prompt = f"""
348
+ You are a concise narrator creating quick scene descriptions for {audience_desc}.
349
+
350
+ **NARRATION STYLE:**
351
+ {narration_mode}
352
+
353
+ **FORMAT REQUIREMENTS:**
354
+ • Identify each distinct scene and write 2-3 sentences (2 minimum, 3 maximum) **without** any numeric prefixes or headings.
355
+ • First sentence: What's happening (action/event)
356
+ • Second sentence: How it feels or what it means (emotion/impact)
357
+ • Optional third sentence: Additional detail or atmosphere
358
+ • Keep each sentence under 20 words
359
+ • Use present tense and active voice
360
+ • Separate scenes with a blank line for readability
361
+ • Make it flow naturally from scene to scene
362
+
363
+ **EXAMPLE FORMAT:**
364
+ The cat stretches lazily in the morning sunlight streaming through the window. A new day full of adventure awaits her curious spirit.
365
+
366
+ She bounds toward the garden gate with graceful excitement. Something magical calls to her curious heart.
367
+
368
+ **STYLE:** {spec['style']}
369
+ **LANGUAGE:** {language_guideline}
370
+
371
+ Now provide your quick, 2-line narration for each scene. Remember: NO headings or prefixes, just the narration lines separated by blank lines:
372
+ """
373
+
374
+ print(f"🔍 Generating {spec['description']} using Gemini Vision...")
375
+
376
+ response = model.generate_content([narration_prompt, comic_image])
377
+ narration = response.text.strip()
378
+
379
+ if narration:
380
+ narration = narration.strip('"\'`')
381
+
382
+ lines = narration.split('\n')
383
+ clean_lines = []
384
+ import re
385
+ for line in lines:
386
+ line = line.strip()
387
+ if not line:
388
+ continue
389
+
390
+ if line.startswith(('Note:', 'Remember:', 'Format:', 'Your narration should:', 'Think of this as')):
391
+ continue
392
+
393
+ line = re.sub(r'^Scene\s*\d+\s*[:.\-]\s*', '', line, flags=re.IGNORECASE)
394
+
395
+ clean_lines.append(line)
396
+
397
+ narration = '\n\n'.join(clean_lines)
398
+
399
+ if len(narration) < spec['min_chars']:
400
+ print(f"⚠️ Initial narration too short for selected length, requesting more detail...")
401
+
402
+ detailed_prompt = f"""
403
+ The narration needs to be more detailed while keeping the concise 2-line-per-scene format.
404
+
405
+ CURRENT NARRATION:
406
+ {narration}
407
+
408
+ Expand this following the same format:
409
+ • **Style:** {narration_mode}
410
+ • **Format:** 2-3 lines per scene, but make them more vivid and detailed
411
+ • **Voice:** {spec['style']}
412
+ • **Language:** {language_guideline}
413
+ • **Focus:** Keep it concise but add more sensory details, emotion, and atmosphere
414
+
415
+ Provide the enhanced 2-3-lines-per-scene narration:
416
+ """
417
+
418
+ response = model.generate_content(detailed_prompt)
419
+ expanded_narration = response.text.strip()
420
+ if expanded_narration and len(expanded_narration) > len(narration):
421
+ narration = expanded_narration.strip('"\'`')
422
+
423
+ print(f"✅ Generated {spec['description']}: {len(narration)} characters")
424
+ return narration
425
+
426
+ except Exception as e:
427
+ print(f"⚠️ Failed to generate narration: {e}")
428
+
429
+ return ""
430
+ @log_execution
431
+ def load_narration_from_file(comic_image_path):
432
+ """
433
+ Load narration from the saved narration.txt file for a given comic image.
434
+
435
+ Args:
436
+ comic_image_path: Path to the comic image
437
+
438
+ Returns:
439
+ str: Loaded narration text or empty string if not found
440
+ """
441
+ try:
442
+ if not comic_image_path:
443
+ return ""
444
+
445
+ image_dir = os.path.dirname(comic_image_path)
446
+ narration_path = os.path.join(image_dir, "narration.txt")
447
+
448
+ if os.path.exists(narration_path):
449
+ with open(narration_path, "r", encoding="utf-8") as f:
450
+ narration = f.read().strip()
451
+ return narration
452
+ except Exception as e:
453
+ print(f"⚠️ Could not load narration: {e}")
454
+
455
+ return ""