First
Browse files- .dockerignore +46 -0
- .gitignore +182 -0
- .idea/.gitignore +5 -0
- .idea/Hekaya3.iml +12 -0
- .idea/inspectionProfiles/Project_Default.xml +19 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- Dockerfile +34 -0
- LICENSE +201 -0
- TimeLog.md +34 -0
- TimeLog2.md +50 -0
- app.py +102 -0
- config.py +35 -0
- content/log.txt +0 -0
- content/logs.txt +0 -0
- handover.md +244 -0
- image.jpg +0 -0
- kill.sh +4 -0
- models/__init__.py +0 -0
- models/comic_image_generator.py +1292 -0
- models/content/log.txt +0 -0
- models/image_generation.py +264 -0
- models/story_generator.py +562 -0
- new_image_splitting.py +278 -0
- notes.md +41 -0
- requirements.txt +11 -0
- start.sh +7 -0
- token.pickle +3 -0
- ui/Compumacy-Logo-Trans2.png +0 -0
- ui/__init__.py +0 -0
- ui/content/log.txt +0 -0
- ui/story_interface.py +261 -0
- utils/__init__.py +0 -0
- utils/comic_panel_splitter.py +80 -0
- utils/content/log.txt +0 -0
- utils/story_management.py +455 -0
.dockerignore
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
venv/
|
| 8 |
+
env/
|
| 9 |
+
ENV/
|
| 10 |
+
|
| 11 |
+
# Git
|
| 12 |
+
.git/
|
| 13 |
+
.gitignore
|
| 14 |
+
|
| 15 |
+
# Environment files
|
| 16 |
+
.env
|
| 17 |
+
|
| 18 |
+
# IDE
|
| 19 |
+
.vscode/
|
| 20 |
+
.idea/
|
| 21 |
+
*.swp
|
| 22 |
+
*.swo
|
| 23 |
+
|
| 24 |
+
# OS
|
| 25 |
+
.DS_Store
|
| 26 |
+
Thumbs.db
|
| 27 |
+
|
| 28 |
+
# Documentation and notes
|
| 29 |
+
*.md
|
| 30 |
+
!README.md
|
| 31 |
+
|
| 32 |
+
# Images and media (optional - remove if needed in the app)
|
| 33 |
+
*.jpg
|
| 34 |
+
*.jpeg
|
| 35 |
+
*.png
|
| 36 |
+
image.jpg
|
| 37 |
+
|
| 38 |
+
# Temporary files
|
| 39 |
+
*.pickle
|
| 40 |
+
token.pickle
|
| 41 |
+
|
| 42 |
+
# Scripts
|
| 43 |
+
*.sh
|
| 44 |
+
|
| 45 |
+
# Logs
|
| 46 |
+
*.log
|
.gitignore
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
dooms_day.sh
|
| 6 |
+
/dooms_day.sh
|
| 7 |
+
dooms_day.sh/
|
| 8 |
+
dooms_day.sh*
|
| 9 |
+
/Story Sessions
|
| 10 |
+
/Story-Generation
|
| 11 |
+
asd/
|
| 12 |
+
/asd/
|
| 13 |
+
/asd
|
| 14 |
+
# C extensions
|
| 15 |
+
*.so
|
| 16 |
+
venv
|
| 17 |
+
# Distribution / packaging
|
| 18 |
+
.Python
|
| 19 |
+
build/
|
| 20 |
+
develop-eggs/
|
| 21 |
+
dist/
|
| 22 |
+
downloads/
|
| 23 |
+
eggs/
|
| 24 |
+
.eggs/
|
| 25 |
+
lib/
|
| 26 |
+
lib64/
|
| 27 |
+
parts/
|
| 28 |
+
sdist/
|
| 29 |
+
var/
|
| 30 |
+
wheels/
|
| 31 |
+
share/python-wheels/
|
| 32 |
+
*.egg-info/
|
| 33 |
+
.installed.cfg
|
| 34 |
+
*.egg
|
| 35 |
+
MANIFEST
|
| 36 |
+
|
| 37 |
+
# PyInstaller
|
| 38 |
+
# Usually these files are written by a python script from a template
|
| 39 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 40 |
+
*.manifest
|
| 41 |
+
*.spec
|
| 42 |
+
|
| 43 |
+
# Installer logs
|
| 44 |
+
pip-log.txt
|
| 45 |
+
pip-delete-this-directory.txt
|
| 46 |
+
|
| 47 |
+
# Unit test / coverage reports
|
| 48 |
+
htmlcov/
|
| 49 |
+
.tox/
|
| 50 |
+
.nox/
|
| 51 |
+
.coverage
|
| 52 |
+
.coverage.*
|
| 53 |
+
.cache
|
| 54 |
+
nosetests.xml
|
| 55 |
+
coverage.xml
|
| 56 |
+
*.cover
|
| 57 |
+
*.py,cover
|
| 58 |
+
.hypothesis/
|
| 59 |
+
.pytest_cache/
|
| 60 |
+
cover/
|
| 61 |
+
|
| 62 |
+
# Translations
|
| 63 |
+
*.mo
|
| 64 |
+
*.pot
|
| 65 |
+
|
| 66 |
+
# Django stuff:
|
| 67 |
+
*.log
|
| 68 |
+
local_settings.py
|
| 69 |
+
db.sqlite3
|
| 70 |
+
db.sqlite3-journal
|
| 71 |
+
|
| 72 |
+
# Flask stuff:
|
| 73 |
+
instance/
|
| 74 |
+
.webassets-cache
|
| 75 |
+
|
| 76 |
+
# Scrapy stuff:
|
| 77 |
+
.scrapy
|
| 78 |
+
|
| 79 |
+
# Sphinx documentation
|
| 80 |
+
docs/_build/
|
| 81 |
+
|
| 82 |
+
# PyBuilder
|
| 83 |
+
.pybuilder/
|
| 84 |
+
target/
|
| 85 |
+
|
| 86 |
+
# Jupyter Notebook
|
| 87 |
+
.ipynb_checkpoints
|
| 88 |
+
|
| 89 |
+
# IPython
|
| 90 |
+
profile_default/
|
| 91 |
+
ipython_config.py
|
| 92 |
+
|
| 93 |
+
# pyenv
|
| 94 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 95 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 96 |
+
# .python-version
|
| 97 |
+
|
| 98 |
+
# pipenv
|
| 99 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 100 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 101 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 102 |
+
# install all needed dependencies.
|
| 103 |
+
#Pipfile.lock
|
| 104 |
+
|
| 105 |
+
# UV
|
| 106 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 107 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 108 |
+
# commonly ignored for libraries.
|
| 109 |
+
#uv.lock
|
| 110 |
+
|
| 111 |
+
# poetry
|
| 112 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 113 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 114 |
+
# commonly ignored for libraries.
|
| 115 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 116 |
+
#poetry.lock
|
| 117 |
+
|
| 118 |
+
# pdm
|
| 119 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 120 |
+
#pdm.lock
|
| 121 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 122 |
+
# in version control.
|
| 123 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 124 |
+
.pdm.toml
|
| 125 |
+
.pdm-python
|
| 126 |
+
.pdm-build/
|
| 127 |
+
|
| 128 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 129 |
+
__pypackages__/
|
| 130 |
+
|
| 131 |
+
# Celery stuff
|
| 132 |
+
celerybeat-schedule
|
| 133 |
+
celerybeat.pid
|
| 134 |
+
|
| 135 |
+
# SageMath parsed files
|
| 136 |
+
*.sage.py
|
| 137 |
+
|
| 138 |
+
# Environments
|
| 139 |
+
.env
|
| 140 |
+
.venv
|
| 141 |
+
env/
|
| 142 |
+
venv/
|
| 143 |
+
ENV/
|
| 144 |
+
env.bak/
|
| 145 |
+
venv.bak/
|
| 146 |
+
|
| 147 |
+
# Spyder project settings
|
| 148 |
+
.spyderproject
|
| 149 |
+
.spyproject
|
| 150 |
+
|
| 151 |
+
# Rope project settings
|
| 152 |
+
.ropeproject
|
| 153 |
+
|
| 154 |
+
# mkdocs documentation
|
| 155 |
+
/site
|
| 156 |
+
|
| 157 |
+
# mypy
|
| 158 |
+
.mypy_cache/
|
| 159 |
+
.dmypy.json
|
| 160 |
+
dmypy.json
|
| 161 |
+
|
| 162 |
+
# Pyre type checker
|
| 163 |
+
.pyre/
|
| 164 |
+
|
| 165 |
+
# pytype static type analyzer
|
| 166 |
+
.pytype/
|
| 167 |
+
|
| 168 |
+
# Cython debug symbols
|
| 169 |
+
cython_debug/
|
| 170 |
+
|
| 171 |
+
# PyCharm
|
| 172 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 173 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 174 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 175 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 176 |
+
#.idea/
|
| 177 |
+
|
| 178 |
+
# Ruff stuff:
|
| 179 |
+
.ruff_cache/
|
| 180 |
+
|
| 181 |
+
# PyPI configuration file
|
| 182 |
+
.pypirc
|
.idea/.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
| 4 |
+
# Editor-based HTTP Client requests
|
| 5 |
+
/httpRequests/
|
.idea/Hekaya3.iml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="inheritedJdk" />
|
| 6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
+
</component>
|
| 8 |
+
<component name="PyDocumentationSettings">
|
| 9 |
+
<option name="format" value="GOOGLE" />
|
| 10 |
+
<option name="myDocStringFormat" value="Google" />
|
| 11 |
+
</component>
|
| 12 |
+
</module>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<profile version="1.0">
|
| 3 |
+
<option name="myName" value="Project Default" />
|
| 4 |
+
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
|
| 5 |
+
<inspection_tool class="JSHint" enabled="true" level="ERROR" enabled_by_default="true" />
|
| 6 |
+
<inspection_tool class="JupyterPackageInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
| 7 |
+
<inspection_tool class="PyInterpreterInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
| 8 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
| 9 |
+
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
| 10 |
+
<option name="ignoredErrors">
|
| 11 |
+
<list>
|
| 12 |
+
<option value="N806" />
|
| 13 |
+
</list>
|
| 14 |
+
</option>
|
| 15 |
+
</inspection_tool>
|
| 16 |
+
<inspection_tool class="PyShadowingNamesInspection" enabled="true" level="TEXT ATTRIBUTES" enabled_by_default="true" editorAttributes="CONSIDERATION_ATTRIBUTES" />
|
| 17 |
+
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
| 18 |
+
</profile>
|
| 19 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/Hekaya3.iml" filepath="$PROJECT_DIR$/.idea/Hekaya3.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/vcs.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="" vcs="Git" />
|
| 5 |
+
</component>
|
| 6 |
+
</project>
|
Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.10 slim image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies for OpenCV and other libraries
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
libgl1 \
|
| 10 |
+
libglib2.0-0 \
|
| 11 |
+
libgomp1 \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Copy requirements first for better caching
|
| 15 |
+
COPY requirements.txt .
|
| 16 |
+
|
| 17 |
+
# Install Python dependencies
|
| 18 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 19 |
+
|
| 20 |
+
# Copy application files
|
| 21 |
+
COPY . .
|
| 22 |
+
|
| 23 |
+
# Create necessary directories
|
| 24 |
+
RUN mkdir -p content Story-Generation
|
| 25 |
+
|
| 26 |
+
# Expose Gradio default port
|
| 27 |
+
EXPOSE 7860
|
| 28 |
+
|
| 29 |
+
# Set environment variables for Gradio
|
| 30 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
| 31 |
+
ENV GRADIO_SERVER_PORT=7860
|
| 32 |
+
|
| 33 |
+
# Run the application
|
| 34 |
+
CMD ["python", "app.py"]
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
TimeLog.md
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Key Performance Insights
|
| 2 |
+
|
| 3 |
+
## 🔴 Slowest Functions
|
| 4 |
+
- **generate_direct_comic**: 118.74s total (2 calls, avg 59.37s)
|
| 5 |
+
- **generate_comic**: 76.33s total (2 calls, avg 38.16s)
|
| 6 |
+
- **generate_image_fn**: 76.20s total (2 calls, avg 38.10s)
|
| 7 |
+
- **generate_image_narration**: 36.40s total (2 calls, avg 18.20s)
|
| 8 |
+
|
| 9 |
+
## 🟡 Medium Duration Functions
|
| 10 |
+
- **enhance_user_story**: 10.01s total (6 calls, avg 1.67s)
|
| 11 |
+
- **extract_comic_scenes**: 2.71s total (2 calls, avg 1.35s)
|
| 12 |
+
|
| 13 |
+
## 🟢 Fast Functions
|
| 14 |
+
- All other functions executed in **milliseconds (< 0.01s each)**
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# Notable Observations
|
| 19 |
+
|
| 20 |
+
- **Huge Performance Difference**:
|
| 21 |
+
The second run of `generate_image_fn` took **73.82s** vs only **2.38s** in the first run – likely due to the API error in the first attempt vs successful generation in the second.
|
| 22 |
+
|
| 23 |
+
- **Error Impact**:
|
| 24 |
+
The first `generate_direct_comic` run (**22.15s**) was much faster than the second (**96.58s**) because the first failed at image generation due to the OpenAI verification error.
|
| 25 |
+
|
| 26 |
+
- **Most Function Calls**:
|
| 27 |
+
`enhance_user_story` and `_create_detail_focused_enhancement_prompt` were called **6 times each**, indicating retry logic during the enhancement process.
|
| 28 |
+
|
| 29 |
+
- **Total Runtime**:
|
| 30 |
+
The entire process took approximately **5.35 minutes (320.39s)** across **48 function calls**.
|
| 31 |
+
|
| 32 |
+
---
|
| 33 |
+
|
| 34 |
+
⚠️ **Main Bottleneck**: The **image generation and processing pipeline** accounts for **over 90%** of the total execution time.
|
TimeLog2.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ⏱️ Story Generation Time Log
|
| 2 |
+
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
## ✨ Story Enhancement
|
| 6 |
+
- 📝 **_create_detail_focused_enhancement_prompt (1)** → 0.0000s
|
| 7 |
+
- 📝 **_create_detail_focused_enhancement_prompt (2)** → 0.0000s
|
| 8 |
+
- 📝 **_create_detail_focused_enhancement_prompt (3)** → 0.0000s
|
| 9 |
+
- 🚀 **enhance_user_story (final)** → 0.0005s
|
| 10 |
+
- ⏳ **enhance_user_story (attempt span)** → 2.0014s
|
| 11 |
+
- ⏳ **enhance_user_story (overall span)** → 3.0035s
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## 🏗️ Layout & Description Generation
|
| 16 |
+
- 🔢 **_find_all_factorizations** → 0.0000s
|
| 17 |
+
- 🧮 **_calculate_optimal_layout** → 0.0000s
|
| 18 |
+
- 📐 **_calculate_optimal_grid_layout** → 0.0010s
|
| 19 |
+
- 📝 **_get_optimal_layout_description** → 0.0010s
|
| 20 |
+
- 📖 **_create_detailed_story_description** → 0.0000s
|
| 21 |
+
- 🎨 **_get_enhanced_style_specifications** → 0.0000s
|
| 22 |
+
- 🔢 **_find_all_factorizations (2nd)** → 0.0000s
|
| 23 |
+
- 🧮 **_calculate_optimal_layout (2nd)** → 0.0000s
|
| 24 |
+
- 📐 **_calculate_optimal_grid_layout (2nd)** → 0.0000s
|
| 25 |
+
- 📊 **_create_comprehensive_technical_specifications** → 0.0010s
|
| 26 |
+
- 🛠️ **_create_advanced_quality_and_flow_instructions** → 0.0000s
|
| 27 |
+
- ✂️ **_assemble_prompt_with_smart_truncation** → 0.0000s
|
| 28 |
+
- 🖼️ **_create_comic_prompt** → 0.0020s
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## 🎨 Comic Generation
|
| 33 |
+
- 🖌️ **generate_image_fn** → 69.6515s
|
| 34 |
+
- 🎭 **generate_comic** → 69.7712s
|
| 35 |
+
- 🎤 **generate_image_narration** → 29.2157s
|
| 36 |
+
- 📚 **generate_direct_comic** → 101.9924s
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
## 📸 Scene Extraction
|
| 41 |
+
- 🗂️ **extract_comic_scenes (1st run)** → 2.4218s
|
| 42 |
+
- 🗂️ **extract_comic_scenes (2nd run)** → 2.7060s
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
# 📊 Performance Overview
|
| 47 |
+
- ✨ **Enhancement Total:** ~3.0s
|
| 48 |
+
- 🏗️ **Layout/Description Total:** ~0.005s
|
| 49 |
+
- 🎨 **Comic Generation Total:** ~201.6s
|
| 50 |
+
- 📸 **Scene Extraction Total:** ~5.1s
|
app.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from google.generativeai import configure
|
| 4 |
+
from ui.story_interface import create_story_interface
|
| 5 |
+
import config
|
| 6 |
+
|
| 7 |
+
def main():
|
| 8 |
+
# Configure Google API
|
| 9 |
+
configure(api_key=config.GOOGLE_API_KEY)
|
| 10 |
+
|
| 11 |
+
# Custom CSS for better narration display
|
| 12 |
+
custom_css = """
|
| 13 |
+
.story-narration-box {
|
| 14 |
+
background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%) !important;
|
| 15 |
+
border: 3px solid #6366f1 !important;
|
| 16 |
+
border-radius: 16px !important;
|
| 17 |
+
padding: 24px !important;
|
| 18 |
+
margin: 24px 0 !important;
|
| 19 |
+
box-shadow: 0 8px 20px rgba(99, 102, 241, 0.15) !important;
|
| 20 |
+
line-height: 1.8 !important;
|
| 21 |
+
color: #1e293b !important;
|
| 22 |
+
font-size: 16px !important;
|
| 23 |
+
min-height: 120px !important;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
.story-narration-box h3 {
|
| 27 |
+
color: #4338ca !important;
|
| 28 |
+
border-bottom: 3px solid #6366f1 !important;
|
| 29 |
+
padding-bottom: 12px !important;
|
| 30 |
+
margin-bottom: 20px !important;
|
| 31 |
+
font-size: 20px !important;
|
| 32 |
+
font-weight: bold !important;
|
| 33 |
+
text-align: center !important;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
.story-narration-box p {
|
| 37 |
+
margin-bottom: 16px !important;
|
| 38 |
+
text-align: left !important;
|
| 39 |
+
color: #334155 !important;
|
| 40 |
+
font-size: 16px !important;
|
| 41 |
+
line-height: 1.7 !important;
|
| 42 |
+
font-weight: 500 !important;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.story-narration-box div {
|
| 46 |
+
color: #334155 !important;
|
| 47 |
+
font-size: 16px !important;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
#story_narration {
|
| 51 |
+
max-height: 600px !important;
|
| 52 |
+
overflow-y: auto !important;
|
| 53 |
+
border: 2px solid #e2e8f0 !important;
|
| 54 |
+
border-radius: 12px !important;
|
| 55 |
+
background-color: #f8fafc !important;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
#scene_narration {
|
| 59 |
+
max-height: 500px !important;
|
| 60 |
+
overflow-y: auto !important;
|
| 61 |
+
border: 2px solid #e2e8f0 !important;
|
| 62 |
+
border-radius: 12px !important;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
/* Ensure all text in narration boxes is visible and prominent */
|
| 66 |
+
#story_narration *, #scene_narration * {
|
| 67 |
+
color: #334155 !important;
|
| 68 |
+
font-size: 16px !important;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
#story_narration h3, #scene_narration h3 {
|
| 72 |
+
color: #4338ca !important;
|
| 73 |
+
font-size: 20px !important;
|
| 74 |
+
text-align: center !important;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
/* Style for quick narration lines */
|
| 78 |
+
.story-narration-box em {
|
| 79 |
+
color: #64748b !important;
|
| 80 |
+
font-style: italic !important;
|
| 81 |
+
text-align: center !important;
|
| 82 |
+
display: block !important;
|
| 83 |
+
margin: 20px 0 !important;
|
| 84 |
+
}
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
# Create and launch the Gradio interface
|
| 88 |
+
with gr.Blocks(
|
| 89 |
+
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
|
| 90 |
+
css=custom_css
|
| 91 |
+
) as demo:
|
| 92 |
+
create_story_interface(demo)
|
| 93 |
+
|
| 94 |
+
demo.launch(
|
| 95 |
+
server_name="0.0.0.0", # Allows external access
|
| 96 |
+
server_port=7860,
|
| 97 |
+
share=False, # Set to True if you want a public link
|
| 98 |
+
debug=True
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
main()
|
config.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
| 7 |
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
| 8 |
+
TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
|
| 9 |
+
BFL_API_KEY = os.environ.get("BFL_API_KEY")
|
| 10 |
+
IMAGE_STYLE_INFO = {
|
| 11 |
+
"Comic Book Style": "Professional comic book art with bold outlines, vibrant colors, and dynamic action poses. Perfect for multi-panel consistency with uniform character designs across panels.",
|
| 12 |
+
"Manga Style": "Japanese manga aesthetic with distinctive toning, speed lines, and expressive character eyes. Excellent consistency for sequential storytelling with consistent character designs.",
|
| 13 |
+
"Cartoon Style": "Animated cartoon style with exaggerated features, bright colors, and simplified designs. Ideal for maintaining character consistency across multiple panels in one image.",
|
| 14 |
+
"Photorealistic": "Highly detailed photographic quality with accurate lighting, textures, and proportions. Consistent realistic style perfect for single-image multi-panel storytelling.",
|
| 15 |
+
"Cinematic Realism": "Movie-like visuals with dramatic lighting, depth of field, and realistic composition. Cinematic consistency across panels with unified lighting and color grading.",
|
| 16 |
+
"Digital Painting": "Digital art with realistic elements but visible brushwork and artistic interpretation. Consistent artistic style throughout multi-panel compositions."
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
IMAGE_STYLES = list(IMAGE_STYLE_INFO.keys())
|
| 20 |
+
|
| 21 |
+
DEFAULT_COMIC_SETTINGS = {
|
| 22 |
+
"num_panels": 12,
|
| 23 |
+
"max_scenes": 12,
|
| 24 |
+
"image_style": "Comic Book Style"
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
AGE_GROUP_INFO = {
|
| 28 |
+
"3-5 (Pre-school)": "Very simple vocabulary, short sentences, playful tone suitable for pre-school children.",
|
| 29 |
+
"6-8 (Kids)": "Simple vocabulary and sentences with lively, descriptive language for early readers.",
|
| 30 |
+
"9-12 (Pre-teen)": "Balanced vocabulary with engaging narrative style appropriate for pre-teens.",
|
| 31 |
+
"13-18 (Teen)": "Richer vocabulary, deeper themes and emotions suitable for teenagers.",
|
| 32 |
+
"18+ (Adult)": "Full vocabulary, complex themes, and immersive detail suitable for adults."
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
AGE_GROUPS = list(AGE_GROUP_INFO.keys())
|
content/log.txt
ADDED
|
File without changes
|
content/logs.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
handover.md
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Comic Story Generator: Code Handover Document
|
| 2 |
+
|
| 3 |
+
**Date:** 2025-7-22
|
| 4 |
+
**Document Purpose:** This document provides a comprehensive technical handover for the Comic Story Generator project. It is intended for developers and future maintainers responsible for the deployment, maintenance, and extension of the application.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## 1. Project Overview
|
| 9 |
+
|
| 10 |
+
The Comic Story Generator is a web application that automatically creates multi-page, textless comic stories from a user-provided description. The application leverages generative AI to produce visually coherent narratives, focusing on character consistency, expressive emotion, and logical panel sequencing.
|
| 11 |
+
|
| 12 |
+
### 1.1. Core Functionality
|
| 13 |
+
|
| 14 |
+
The application is designed to translate a textual story concept into a purely visual comic strip. Key characteristics include:
|
| 15 |
+
|
| 16 |
+
* **AI-Powered Narrative:** Utilizes Google's Gemini to interpret the user's concept and break it down into a structured, panel-by-panel narrative.
|
| 17 |
+
* **Visual Generation:** Employs a GPT-based image model to render complete comic pages based on the AI-generated narrative structure.
|
| 18 |
+
* **Intelligent Panel Detection:** Uses Gemini Vision to analyze the generated full-page image and accurately detect the boundaries of each panel, ensuring precise splitting.
|
| 19 |
+
* **Customization:** Offers users control over the output, including:
|
| 20 |
+
* **Layout:** Choice of panel count (from 4 to 24).
|
| 21 |
+
* **Length:** Generation of 1 to 10 pages.
|
| 22 |
+
* **Art Style:** A selection of visual styles, including "Classic Comic," "Manga," "Cartoon," "Digital Paint," and a high-contrast "Accessible" style designed for users with special needs.
|
| 23 |
+
|
| 24 |
+
### 1.2. High-Level Workflow
|
| 25 |
+
|
| 26 |
+
The generation process follows a clear, multi-step pipeline:
|
| 27 |
+
|
| 28 |
+
1. **User Input:** The user submits a short description of the desired story.
|
| 29 |
+
2. **Story Generation:** The `StoryGenerator` component uses Gemini to create a detailed, scene-by-scene description for each comic panel.
|
| 30 |
+
3. **Page Generation:** The `ComicGenerator` takes the panel descriptions and instructs the GPT-Image model to generate a single, composite image representing a full comic page with panels arranged in a grid.
|
| 31 |
+
4. **Layout Analysis:** The generated page is passed to the `GeminiVision` component, which analyzes the image to identify the precise coordinates and boundaries of each panel.
|
| 32 |
+
5. **Panel Splitting:** The application uses the coordinates from the vision analysis to accurately split the composite image into individual panel images.
|
| 33 |
+
6. **Final Output:** The processed panels are presented to the user as a complete, multi-page visual story.
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## 2. System Architecture
|
| 38 |
+
|
| 39 |
+
The application is built on a modular architecture composed of three primary classes, each responsible for a distinct part of the generation pipeline.
|
| 40 |
+
|
| 41 |
+
### 2.1. System Diagram
|
| 42 |
+
|
| 43 |
+
```mermaid
|
| 44 |
+
classDiagram
|
| 45 |
+
class StoryGenerator{
|
| 46 |
+
+generate_story(description: string) : list[string]
|
| 47 |
+
+enhance_visuals(panel_descriptions: list) : list[string]
|
| 48 |
+
}
|
| 49 |
+
class ComicGenerator{
|
| 50 |
+
+generate_page(panel_descriptions: list) : Image
|
| 51 |
+
+split_panels(page_image: Image, grid_layout: dict) : list[Image]
|
| 52 |
+
}
|
| 53 |
+
class GeminiVision{
|
| 54 |
+
+analyze_layout(page_image: Image) : dict
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
StoryGenerator "1" -- "1" ComicGenerator : Provides panel descriptions
|
| 58 |
+
ComicGenerator "1" -- "1" GeminiVision : Uses for layout analysis
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### 2.2. Data Flow
|
| 62 |
+
|
| 63 |
+
The end-to-end data flow illustrates the interaction between the user, the application, and the underlying AI models.
|
| 64 |
+
|
| 65 |
+
```mermaid
|
| 66 |
+
sequenceDiagram
|
| 67 |
+
participant User
|
| 68 |
+
participant App
|
| 69 |
+
participant Gemini as Gemini (Text/Story)
|
| 70 |
+
participant GPTImage as GPT-Image (Visuals)
|
| 71 |
+
participant GeminiVision as Gemini Vision (Analysis)
|
| 72 |
+
|
| 73 |
+
User->>+App: Submits story description
|
| 74 |
+
App->>+Gemini: Requests story structure from description
|
| 75 |
+
Gemini-->>-App: Returns panel-by-panel text descriptions
|
| 76 |
+
App->>+GPTImage: Requests comic page generation from descriptions
|
| 77 |
+
GPTImage-->>-App: Returns single full-page image
|
| 78 |
+
App->>+GeminiVision: Requests layout analysis of the image
|
| 79 |
+
GeminiVision-->>-App: Returns coordinates of each panel
|
| 80 |
+
App->>User: Displays final, split-panel comic
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
## 3. Setup & Installation
|
| 86 |
+
|
| 87 |
+
### 3.1. Prerequisites
|
| 88 |
+
|
| 89 |
+
* **Python:** Version 3.9 or higher.
|
| 90 |
+
* **API Keys:**
|
| 91 |
+
* An active OpenAI API key.
|
| 92 |
+
* An active Google API key with access to the Gemini family of models.
|
| 93 |
+
|
| 94 |
+
### 3.2. Installation Steps
|
| 95 |
+
|
| 96 |
+
1. **Clone the Repository:**
|
| 97 |
+
```bash
|
| 98 |
+
git clone https://github.com/yourusername/Comic-Story-Generator.git
|
| 99 |
+
cd Comic-Story-Generator
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
2. **Create and Activate a Virtual Environment:**
|
| 103 |
+
```bash
|
| 104 |
+
# Create the environment
|
| 105 |
+
python -m venv venv
|
| 106 |
+
|
| 107 |
+
# Activate the environment (macOS/Linux)
|
| 108 |
+
source venv/bin/activate
|
| 109 |
+
|
| 110 |
+
# Or, activate on Windows
|
| 111 |
+
# venv\Scripts\activate
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
3. **Install Dependencies:**
|
| 115 |
+
```bash
|
| 116 |
+
pip install -r requirements.txt
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
4. **Configure Environment Variables:**
|
| 120 |
+
Create a `.env` file in the project root and add your API keys.
|
| 121 |
+
```bash
|
| 122 |
+
echo "OPENAI_API_KEY=your_openai_key" > .env
|
| 123 |
+
echo "GOOGLE_API_KEY=your_google_key" >> .env
|
| 124 |
+
```
|
| 125 |
+
*Note: Ensure the `.env` file is added to your `.gitignore` file to prevent committing secrets.*
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
## 4. Environment Variables / Secrets
|
| 130 |
+
|
| 131 |
+
The application requires the following environment variables to be set in a `.env` file at the project's root.
|
| 132 |
+
|
| 133 |
+
| Variable | Description | Required | Example |
|
| 134 |
+
| :--- | :--- | :--- | :--- |
|
| 135 |
+
| `OPENAI_API_KEY` | API key for the OpenAI service, used for GPT-Image generation. | Yes | `sk-xxxxxxxxxxxxxxxxxxxxxxxx` |
|
| 136 |
+
| `GOOGLE_API_KEY` | API key for Google AI services, used for Gemini (story structure) and Gemini Vision (layout analysis). | Yes | `AIzaSyxxxxxxxxxxxxxxxxxxxxx` |
|
| 137 |
+
|
| 138 |
+
---
|
| 139 |
+
|
| 140 |
+
## 5. How to Run
|
| 141 |
+
|
| 142 |
+
After completing the setup and installation steps, launch the application with the following command from the project's root directory:
|
| 143 |
+
|
| 144 |
+
```bash
|
| 145 |
+
python app.py
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
The application will start a local web server, and the interface will be accessible at the URL provided in the console (typically `http://127.0.0.1:7860`).
|
| 149 |
+
|
| 150 |
+
---
|
| 151 |
+
|
| 152 |
+
## 6. Deployment Instructions
|
| 153 |
+
|
| 154 |
+
[TODO] This section requires documentation for deploying the application to a production environment. Steps should include:
|
| 155 |
+
* Recommended hosting provider (e.g., AWS, Heroku, DigitalOcean).
|
| 156 |
+
* Instructions for setting up a production-grade web server (e.g., Gunicorn).
|
| 157 |
+
* Configuration of a reverse proxy (e.g., Nginx).
|
| 158 |
+
* Management of production environment variables/secrets.
|
| 159 |
+
* Process management (e.g., using `systemd`).
|
| 160 |
+
|
| 161 |
+
---
|
| 162 |
+
|
| 163 |
+
## 7. Core Components & Logic
|
| 164 |
+
|
| 165 |
+
The application logic is encapsulated in three main classes.
|
| 166 |
+
|
| 167 |
+
### 7.1. `StoryGenerator`
|
| 168 |
+
|
| 169 |
+
* **Responsibility:** Handles the narrative creation phase.
|
| 170 |
+
* **`generate_story()`:** Takes the raw user description as input. It constructs a prompt for the Gemini model to elicit a structured response containing a list of detailed text descriptions, one for each comic panel.
|
| 171 |
+
* **`enhance_visuals()`:** Processes the panel descriptions to add specific visual cues and optimizations, particularly for the "Accessible" style, ensuring high contrast and simplified object representation.
|
| 172 |
+
|
| 173 |
+
### 7.2. `ComicGenerator`
|
| 174 |
+
|
| 175 |
+
* **Responsibility:** Manages the visual generation and processing of the comic page.
|
| 176 |
+
* **`generate_page()`:** Aggregates the panel descriptions from `StoryGenerator` into a single, complex prompt for the GPT-Image model. This prompt instructs the AI to create one composite image with all panels laid out in a grid.
|
| 177 |
+
* **`split_panels()`:** Receives the generated page image and the layout data from `GeminiVision`. It uses this data to crop the page into individual panel images with high precision.
|
| 178 |
+
|
| 179 |
+
### 7.3. `GeminiVision`
|
| 180 |
+
|
| 181 |
+
* **Responsibility:** Performs visual analysis on the generated comic page.
|
| 182 |
+
* **`analyze_layout()`:** This is the core of the intelligent panel-splitting feature. It takes the full-page image as input and uses the Gemini Vision model to visually identify the boundaries of each panel. It returns a dictionary containing the coordinates and dimensions of the detected grid, which is more robust than assuming a fixed grid layout.
|
| 183 |
+
|
| 184 |
+
---
|
| 185 |
+
|
| 186 |
+
## 8. Third-party Dependencies
|
| 187 |
+
|
| 188 |
+
The complete list of Python packages is specified in `requirements.txt`. Key dependencies include:
|
| 189 |
+
|
| 190 |
+
* **`openai`**: Python client for the OpenAI API.
|
| 191 |
+
* **`google-generativeai`**: Python client for the Google AI (Gemini) API.
|
| 192 |
+
* **`python-dotenv`**: For loading environment variables from the `.env` file.
|
| 193 |
+
* **`Pillow`**: For image manipulation (cropping and saving).
|
| 194 |
+
* **[Info Needed]**: The web framework used to build `app.py` (e.g., `gradio`, `flask`, `fastapi`).
|
| 195 |
+
|
| 196 |
+
---
|
| 197 |
+
|
| 198 |
+
## 9. Testing Instructions
|
| 199 |
+
|
| 200 |
+
[TODO] A testing framework has not been established for this project. Future work should include:
|
| 201 |
+
* **Test Suite Setup:** Choose and configure a testing framework (e.g., `pytest`).
|
| 202 |
+
* **Unit Tests:** Create unit tests for individual methods in `StoryGenerator`, `ComicGenerator`, and `GeminiVision`. This should involve mocking the API calls to AI services to test the data processing logic in isolation.
|
| 203 |
+
* **Integration Tests:** Develop tests for the entire generation pipeline, from user input to final split panels.
|
| 204 |
+
* **Continuous Integration:** Set up a CI pipeline (e.g., using GitHub Actions) to run tests automatically on pull requests.
|
| 205 |
+
|
| 206 |
+
---
|
| 207 |
+
|
| 208 |
+
## 10. Troubleshooting & Common Issues
|
| 209 |
+
|
| 210 |
+
[TODO] This section should be populated as common issues are identified. Potential areas to document include:
|
| 211 |
+
* **API Key Errors:** Steps to verify that API keys are correctly configured and have the necessary permissions.
|
| 212 |
+
* **Incoherent Stories:** Guidance on how to write effective initial descriptions to improve narrative quality.
|
| 213 |
+
* **Poor Panel Splitting:** Troubleshooting steps for when Gemini Vision fails to detect the layout correctly (e.g., checking image complexity, trying a different art style).
|
| 214 |
+
* **Long Generation Times:** Explanation of typical performance and factors that can cause delays (e.g., API provider latency, number of panels).
|
| 215 |
+
|
| 216 |
+
---
|
| 217 |
+
|
| 218 |
+
## 11. TODOs / Future Work
|
| 219 |
+
|
| 220 |
+
Based on the project's focus areas, the following are key areas for future development and contribution:
|
| 221 |
+
|
| 222 |
+
* **Core Generation Logic:**
|
| 223 |
+
* Improve character consistency across multiple pages.
|
| 224 |
+
* Experiment with different AI models for potentially better visual or narrative results.
|
| 225 |
+
* Add support for including text (dialogue, captions) as an optional feature.
|
| 226 |
+
* **UI/UX Enhancements:**
|
| 227 |
+
* Develop a more interactive interface for viewing and arranging panels.
|
| 228 |
+
* Allow users to regenerate individual panels without restarting the entire process.
|
| 229 |
+
* Add an option to export the final comic as a PDF or other formats.
|
| 230 |
+
* **Accessibility Improvements:**
|
| 231 |
+
* Further refine the "Accessible" art style based on user feedback.
|
| 232 |
+
* Implement ARIA attributes and ensure full keyboard navigability for the web interface.
|
| 233 |
+
* Add an "image description" feature where a text-to-speech engine can describe the generated panels.
|
| 234 |
+
* **Documentation:**
|
| 235 |
+
* Create a detailed API reference for developers looking to build on the platform.
|
| 236 |
+
* Write user-facing guides on how to get the best results from the generator.
|
| 237 |
+
|
| 238 |
+
---
|
| 239 |
+
|
| 240 |
+
## 12. Contact / Ownership Info
|
| 241 |
+
|
| 242 |
+
* **Source Code:** [https://github.com/yourusername/Comic-Story-Generator](https://github.com/yourusername/Comic-Story-Generator)
|
| 243 |
+
* **License:** This project is licensed under the **MIT License**. For full details, see the `LICENSE` file in the repository.
|
| 244 |
+
* **Primary Contact:** [Info Needed: Add primary maintainer's name and contact information (e.g., GitHub handle or email).]
|
image.jpg
ADDED
|
kill.sh
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "Killing all Python processes..."
|
| 4 |
+
pkill -f python
|
models/__init__.py
ADDED
|
File without changes
|
models/comic_image_generator.py
ADDED
|
@@ -0,0 +1,1292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import base64
|
| 3 |
+
import os
|
| 4 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 5 |
+
import config
|
| 6 |
+
import warnings
|
| 7 |
+
import textwrap
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
import time
|
| 10 |
+
from models.image_generation import generate_image_fn
|
| 11 |
+
from google.generativeai import GenerativeModel
|
| 12 |
+
import json
|
| 13 |
+
import re
|
| 14 |
+
import tempfile
|
| 15 |
+
import shutil
|
| 16 |
+
from google.generativeai.types import GenerationConfig
|
| 17 |
+
from utils.comic_panel_splitter import split_comic_panels
|
| 18 |
+
import cv2
|
| 19 |
+
import numpy as np
|
| 20 |
+
from datetime import datetime
|
| 21 |
+
warnings.filterwarnings("ignore", message="IMAGE_SAFETY is not a valid FinishReason")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def log_execution(func):
|
| 25 |
+
def wrapper(*args, **kwargs):
|
| 26 |
+
start_time = time.time()
|
| 27 |
+
start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 28 |
+
|
| 29 |
+
result = func(*args, **kwargs)
|
| 30 |
+
|
| 31 |
+
end_time = time.time()
|
| 32 |
+
end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 33 |
+
duration = end_time - start_time
|
| 34 |
+
|
| 35 |
+
# Write to file (works in Colab)
|
| 36 |
+
with open('content/logs.txt', 'a') as f:
|
| 37 |
+
f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
|
| 38 |
+
|
| 39 |
+
# Also print to see output immediately
|
| 40 |
+
print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
|
| 41 |
+
|
| 42 |
+
return result
|
| 43 |
+
return wrapper
|
| 44 |
+
|
| 45 |
+
class ComicImageGenerator:
|
| 46 |
+
"""
|
| 47 |
+
Generates a comic-style image.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
def __init__(self):
|
| 51 |
+
pass
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@log_execution
|
| 55 |
+
def generate_comic(self, story_data, output_path=None, style=None):
|
| 56 |
+
"""
|
| 57 |
+
Generate a comic-style image based on the provided story data.
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
story_data: Dictionary containing the story information
|
| 61 |
+
output_path: Optional path to save the resulting image
|
| 62 |
+
style: Optional comic style to use
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
PIL.Image.Image: The comic image
|
| 66 |
+
str: Base64 encoded data URL of the image
|
| 67 |
+
"""
|
| 68 |
+
title = story_data.get("title", "My Story")
|
| 69 |
+
description = story_data.get("description", "")
|
| 70 |
+
characters = story_data.get("characters", [])
|
| 71 |
+
settings = story_data.get("settings", [])
|
| 72 |
+
num_scenes = 9
|
| 73 |
+
|
| 74 |
+
prompt = self._create_comic_prompt(title, description, characters, settings, style, num_scenes)
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
print(f"Generating comic with {num_scenes} scenes...")
|
| 78 |
+
|
| 79 |
+
comic_image = generate_image_fn(
|
| 80 |
+
selected_prompt=prompt,
|
| 81 |
+
output_path=output_path
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
if comic_image is None:
|
| 85 |
+
comic_image = self._create_placeholder_comic(title, description)
|
| 86 |
+
|
| 87 |
+
if output_path:
|
| 88 |
+
directory = os.path.dirname(output_path)
|
| 89 |
+
if directory and not os.path.exists(directory):
|
| 90 |
+
os.makedirs(directory)
|
| 91 |
+
comic_image.save(output_path)
|
| 92 |
+
|
| 93 |
+
buffered = io.BytesIO()
|
| 94 |
+
comic_image.save(buffered, format="PNG")
|
| 95 |
+
img_bytes = buffered.getvalue()
|
| 96 |
+
img_b64 = base64.b64encode(img_bytes).decode("utf-8")
|
| 97 |
+
data_url = f"data:image/png;base64,{img_b64}"
|
| 98 |
+
|
| 99 |
+
return comic_image, data_url
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Error generating comic: {str(e)}")
|
| 103 |
+
placeholder = self._create_placeholder_comic(title, description)
|
| 104 |
+
|
| 105 |
+
if output_path:
|
| 106 |
+
directory = os.path.dirname(output_path)
|
| 107 |
+
if directory and not os.path.exists(directory):
|
| 108 |
+
os.makedirs(directory)
|
| 109 |
+
placeholder.save(output_path)
|
| 110 |
+
|
| 111 |
+
buffered = io.BytesIO()
|
| 112 |
+
placeholder.save(buffered, format="PNG")
|
| 113 |
+
img_bytes = buffered.getvalue()
|
| 114 |
+
img_b64 = base64.b64encode(img_bytes).decode("utf-8")
|
| 115 |
+
data_url = f"data:image/png;base64,{img_b64}"
|
| 116 |
+
|
| 117 |
+
return placeholder, data_url
|
| 118 |
+
|
| 119 |
+
@log_execution
|
| 120 |
+
def _create_comic_prompt(self, title, description, characters=None, settings=None, style=None, num_scenes=1):
|
| 121 |
+
"""
|
| 122 |
+
Create a sophisticated, optimized prompt for comic generation with advanced visual consistency techniques.
|
| 123 |
+
Specialized for high-quality multi-panel storytelling with perfect character continuity.
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
title: Title of the story
|
| 127 |
+
description: Visual description of the story
|
| 128 |
+
characters: List of character data
|
| 129 |
+
settings: List of setting data
|
| 130 |
+
style: Optional visual style
|
| 131 |
+
num_scenes: Number of scenes to include (1-24)
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
str: Advanced prompt optimized for professional comic generation with smart detail preservation
|
| 135 |
+
"""
|
| 136 |
+
|
| 137 |
+
priority_sections = []
|
| 138 |
+
|
| 139 |
+
layout_specs = self._get_optimal_layout_description(num_scenes)
|
| 140 |
+
priority_sections.append(f"CRITICAL LAYOUT: {layout_specs}")
|
| 141 |
+
|
| 142 |
+
if num_scenes >= 20:
|
| 143 |
+
compact_instructions = [
|
| 144 |
+
"🎯 COMPACT SCENE MASTERY FOR 20 PANELS:",
|
| 145 |
+
"SMALL EFFICIENT SCENES: Each panel must tell its story moment with maximum visual economy - focus on ONE key action, emotion, or story beat per panel",
|
| 146 |
+
"CLEAR FOCAL POINTS: Every panel needs ONE main subject in sharp focus with minimal background distractions",
|
| 147 |
+
"ESSENTIAL ELEMENTS ONLY: Include only the most crucial visual elements needed to advance the story - remove ALL unnecessary details",
|
| 148 |
+
"READABLE AT SMALL SIZE: No text text, expressions, and actions must be clearly visible even when the panel is small - use bold, simple compositions"
|
| 149 |
+
]
|
| 150 |
+
priority_sections.extend(compact_instructions)
|
| 151 |
+
|
| 152 |
+
if characters:
|
| 153 |
+
character_details = self._create_detailed_character_specifications(characters, num_scenes)
|
| 154 |
+
priority_sections.extend(character_details)
|
| 155 |
+
|
| 156 |
+
enhanced_story = self._create_detailed_story_description(description, title)
|
| 157 |
+
priority_sections.append(enhanced_story)
|
| 158 |
+
|
| 159 |
+
if settings:
|
| 160 |
+
environment_details = self._create_detailed_environment_specifications(settings, num_scenes)
|
| 161 |
+
priority_sections.extend(environment_details)
|
| 162 |
+
|
| 163 |
+
technical_specs = self._create_comprehensive_technical_specifications(style, num_scenes)
|
| 164 |
+
priority_sections.extend(technical_specs)
|
| 165 |
+
|
| 166 |
+
quality_flow = self._create_advanced_quality_and_flow_instructions(num_scenes)
|
| 167 |
+
priority_sections.extend(quality_flow)
|
| 168 |
+
|
| 169 |
+
assembled_prompt = self._assemble_prompt_with_smart_truncation(priority_sections)
|
| 170 |
+
|
| 171 |
+
if self.generate_panel_descriptions(assembled_prompt):
|
| 172 |
+
final_prompt = self.generate_panel_descriptions(assembled_prompt)
|
| 173 |
+
else :
|
| 174 |
+
final_prompt = assembled_prompt
|
| 175 |
+
|
| 176 |
+
print(f"\n XXXXXX {final_prompt} XXXXXX \n")
|
| 177 |
+
|
| 178 |
+
return final_prompt
|
| 179 |
+
|
| 180 |
+
@log_execution
|
| 181 |
+
def _create_detailed_character_specifications(self, characters, num_scenes):
|
| 182 |
+
"""Create extremely detailed character specifications prioritizing visual consistency."""
|
| 183 |
+
char_specs = []
|
| 184 |
+
|
| 185 |
+
char_specs.append("🎭 CRITICAL CHARACTER CONSISTENCY PROTOCOL:")
|
| 186 |
+
char_specs.append("ABSOLUTE REQUIREMENT: Characters MUST look identical in every single panel - same face, hair, clothes, proportions, expressions style")
|
| 187 |
+
|
| 188 |
+
for i, character in enumerate(characters[:3]):
|
| 189 |
+
if isinstance(character, dict) and "visual_description" in character:
|
| 190 |
+
char_name = character.get("name", f"Character_{i+1}")
|
| 191 |
+
char_desc = character["visual_description"]
|
| 192 |
+
|
| 193 |
+
char_spec = f"CHARACTER {i+1} - {char_name}: {char_desc}"
|
| 194 |
+
|
| 195 |
+
if "traits" in character and character["traits"]:
|
| 196 |
+
traits = character["traits"][:5]
|
| 197 |
+
char_spec += f" | DISTINCTIVE FEATURES: {', '.join(traits)}"
|
| 198 |
+
|
| 199 |
+
char_spec += f" | CONSISTENCY RULE: This exact appearance must be maintained across all {num_scenes} panels with zero variation in facial features, hair, clothing, or body proportions"
|
| 200 |
+
|
| 201 |
+
char_specs.append(char_spec)
|
| 202 |
+
|
| 203 |
+
if len([c for c in characters[:3] if isinstance(c, dict) and 'visual_description' in c]) > 1:
|
| 204 |
+
char_specs.append(f"MULTI-CHARACTER RULE: All characters must maintain their exact individual appearances simultaneously across all {num_scenes} panels - no character design drift allowed")
|
| 205 |
+
|
| 206 |
+
return char_specs
|
| 207 |
+
|
| 208 |
+
@log_execution
|
| 209 |
+
def _create_detailed_story_description(self, description, title):
|
| 210 |
+
"""Create enhanced story description with preserved important details."""
|
| 211 |
+
story_elements = []
|
| 212 |
+
|
| 213 |
+
enhanced_desc = f"STORY CONTENT: {title} - {description}"
|
| 214 |
+
|
| 215 |
+
enhanced_desc += " | VISUAL NARRATIVE FOCUS: Every detail must be clearly visible and contribute to story comprehension through imagery alone"
|
| 216 |
+
|
| 217 |
+
enhanced_desc += " | ATMOSPHERIC DETAILS: Include specific lighting, weather, time of day, and environmental mood indicators that enhance the narrative"
|
| 218 |
+
|
| 219 |
+
enhanced_desc += " | CHARACTER EXPRESSION CLARITY: All emotions, reactions, and character intentions must be immediately readable through facial expressions, body language, and positioning"
|
| 220 |
+
|
| 221 |
+
return enhanced_desc
|
| 222 |
+
@log_execution
|
| 223 |
+
def _create_detailed_environment_specifications(self, settings, num_scenes):
|
| 224 |
+
"""Create detailed environment specifications with consistency focus."""
|
| 225 |
+
env_specs = []
|
| 226 |
+
|
| 227 |
+
env_specs.append(" ENVIRONMENTAL CONSISTENCY PROTOCOL:")
|
| 228 |
+
|
| 229 |
+
for i, setting in enumerate(settings[:3]):
|
| 230 |
+
if isinstance(setting, dict) and "description" in setting:
|
| 231 |
+
setting_name = setting.get("name", f"Location_{i+1}")
|
| 232 |
+
setting_desc = setting["description"]
|
| 233 |
+
|
| 234 |
+
env_spec = f"LOCATION {i+1} - {setting_name}: {setting_desc}"
|
| 235 |
+
|
| 236 |
+
if "visual_elements" in setting and setting["visual_elements"]:
|
| 237 |
+
elements = setting["visual_elements"][:5]
|
| 238 |
+
env_spec += f" | KEY VISUAL MARKERS: {', '.join(elements)}"
|
| 239 |
+
|
| 240 |
+
if "mood" in setting:
|
| 241 |
+
env_spec += f" | ATMOSPHERE: {setting['mood']}"
|
| 242 |
+
|
| 243 |
+
env_spec += f" | LOCATION CONSISTENCY: When this location appears across multiple panels, all architectural details, lighting, and distinctive features must remain identical"
|
| 244 |
+
|
| 245 |
+
env_specs.append(env_spec)
|
| 246 |
+
|
| 247 |
+
return env_specs
|
| 248 |
+
@log_execution
|
| 249 |
+
def _create_comprehensive_technical_specifications(self, style, num_scenes):
|
| 250 |
+
"""Create comprehensive technical specifications with detail preservation."""
|
| 251 |
+
tech_specs = []
|
| 252 |
+
|
| 253 |
+
style_details = self._get_enhanced_style_specifications(style)
|
| 254 |
+
tech_specs.extend(style_details)
|
| 255 |
+
|
| 256 |
+
composition_specs = [
|
| 257 |
+
" PANEL COMPOSITION MASTERY:",
|
| 258 |
+
f"Grid Layout: Precisely arranged {self._calculate_optimal_grid_layout(num_scenes)} grid with professional comic book spacing and clear panel borders",
|
| 259 |
+
"Visual Hierarchy: Each panel must have a clear focal point with supporting details that enhance rather than distract from the main action",
|
| 260 |
+
"Depth and Perspective: Use foreground, midground, and background elements to create visual depth and spatial relationships",
|
| 261 |
+
"Color Harmony: Maintain consistent color palette across all panels while using color psychology to enhance mood and narrative flow"
|
| 262 |
+
]
|
| 263 |
+
|
| 264 |
+
if num_scenes >= 20:
|
| 265 |
+
composition_specs.extend([
|
| 266 |
+
"COMPACT PANEL OPTIMIZATION: Design each panel for MAXIMUM visual impact in minimal space",
|
| 267 |
+
"SIMPLE BACKGROUNDS: Use minimal, clean backgrounds that don't compete with main subjects",
|
| 268 |
+
"BOLD CHARACTER POSES: Use clear, distinctive poses and gestures that read well at small sizes",
|
| 269 |
+
"HIGH CONTRAST: Ensure strong contrast between characters and backgrounds for clarity"
|
| 270 |
+
])
|
| 271 |
+
|
| 272 |
+
tech_specs.extend(composition_specs)
|
| 273 |
+
|
| 274 |
+
detail_specs = [
|
| 275 |
+
" DETAIL PRESERVATION PROTOCOL:",
|
| 276 |
+
"Facial Detail Consistency: All character faces must maintain identical features - eye shape, nose structure, mouth proportions, facial hair, scars, or distinctive marks",
|
| 277 |
+
"Clothing and Accessory Continuity: Every piece of clothing, jewelry, weapons, or accessories must appear identical across panels",
|
| 278 |
+
"Environmental Detail Tracking: Background objects, architectural elements, vegetation, and atmospheric effects must remain consistent when locations reappear",
|
| 279 |
+
"Lighting Continuity: Maintain logical light sources and shadow patterns that reflect time of day and weather conditions consistently"
|
| 280 |
+
]
|
| 281 |
+
tech_specs.extend(detail_specs)
|
| 282 |
+
|
| 283 |
+
return tech_specs
|
| 284 |
+
@log_execution
|
| 285 |
+
def _get_enhanced_style_specifications(self, style):
|
| 286 |
+
"""Get enhanced style specifications with technical details."""
|
| 287 |
+
enhanced_styles = {
|
| 288 |
+
"Comic Book Style": [
|
| 289 |
+
" MODERN DIGITAL COMIC BOOK STYLE (NO SKETCH LINES, NO DEFORMITIES):",
|
| 290 |
+
"Line Art: Bold, ultra-clean digital inking with consistent stroke weight—absolutely no rough sketch lines or unfinished strokes",
|
| 291 |
+
"Color Treatment: Vibrant, saturated colors with polished cel-shading and sharp highlights for a glossy modern finish",
|
| 292 |
+
"Shading: Precise digital shadows and highlights—avoid gradient banding or painterly strokes associated with traditional sketches",
|
| 293 |
+
"Panel Borders: Clean, geometric panel borders with consistent gutters and professional comic book page layout standards"
|
| 294 |
+
],
|
| 295 |
+
"Manga Style": [
|
| 296 |
+
" MODERN DIGITAL MANGA STYLE (NO SKETCH LINES, NO DEFORMITIES):",
|
| 297 |
+
"Line Quality: Razor-sharp digital line work with deliberate varying weights—completely free of rough sketches",
|
| 298 |
+
"Character Design: Classic manga proportions rendered crisply with expressive eyes and flawless facial details—no distortions",
|
| 299 |
+
"Tone Work: High-resolution screentones and digitally applied hatching for a refined finish",
|
| 300 |
+
"Panel Layout: Dynamic panel flow with polished angles that enhance narrative pacing"
|
| 301 |
+
],
|
| 302 |
+
"Photorealistic": [
|
| 303 |
+
" MODERN DIGITAL PHOTOREALISM (NO SKETCH LINES, NO DEFORMITIES):",
|
| 304 |
+
"Rendering Quality: Cinema-quality realistic rendering with accurate lighting physics and atmospheric effects—faces and limbs must appear intact and natural",
|
| 305 |
+
"Detail Level: Ultra-high detail textures with crisp edges—no painterly or sketch artefacts",
|
| 306 |
+
"Color Accuracy: Natural color grading with realistic skin tones, environmental colors, and accurate material reflectance",
|
| 307 |
+
"Depth of Field: Professional photography-style focus effects with realistic camera perspective and depth relationships"
|
| 308 |
+
],
|
| 309 |
+
"Cinematic Realism": [
|
| 310 |
+
" MODERN DIGITAL CINEMATIC REALISM (NO SKETCH LINES, NO DEFORMITIES):",
|
| 311 |
+
"Film Quality: Movie-grade digital rendering with crisp edges and zero sketch artefacts",
|
| 312 |
+
"Color Grading: Cinematic color treatment with cohesive palette—maintain realistic skin and material fidelity",
|
| 313 |
+
"Camera Work: Dynamic camera angles translated into polished panel compositions",
|
| 314 |
+
"Lighting Design: Professional film lighting with atmospheric effects—ensure characters remain fully detailed, no distortions"
|
| 315 |
+
]
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
return enhanced_styles.get(style, [
|
| 319 |
+
" MODERN DIGITAL COMIC ILLUSTRATION (NO SKETCH LINES, NO DEFORMITIES):",
|
| 320 |
+
"Professional Art: Gallery-quality digital illustration with masterful composition, color theory, and technical execution",
|
| 321 |
+
"Visual Clarity: Crystal-clear details with optimal contrast and saturation for maximum visual impact and readability",
|
| 322 |
+
"Artistic Consistency: Unified artistic approach across all panels maintaining consistent quality and style treatment—absolutely no sketch artefacts"
|
| 323 |
+
])
|
| 324 |
+
@log_execution
|
| 325 |
+
def _create_advanced_quality_and_flow_instructions(self, num_scenes):
|
| 326 |
+
"""Create advanced quality and flow instructions."""
|
| 327 |
+
quality_instructions = [
|
| 328 |
+
" ADVANCED QUALITY REQUIREMENTS:",
|
| 329 |
+
"Technical Excellence: Ultra-high resolution output with crisp details, optimal contrast, and professional-grade visual quality",
|
| 330 |
+
"Narrative Clarity: Every panel must advance the story visibly - clear cause and effect relationships between sequential panels",
|
| 331 |
+
# "Visual Flow: Smooth eye movement guidance from panel to panel using composition, character positioning, and visual elements",
|
| 332 |
+
"Line Art: Sharp digital lines, clean and precise, emphasizing dynamic movement and emotional clarity.",
|
| 333 |
+
"Emotional Impact: Each panel must convey specific emotions through character expressions, body language, and environmental mood"
|
| 334 |
+
]
|
| 335 |
+
|
| 336 |
+
if num_scenes > 1:
|
| 337 |
+
flow_instructions = [
|
| 338 |
+
f" {num_scenes}-PANEL FLOW MASTERY:",
|
| 339 |
+
# "Sequential Continuity: Logical progression from panel to panel with clear temporal and spatial relationships",
|
| 340 |
+
"Action Sequences: Break complex actions into clear, understandable steps across multiple panels",
|
| 341 |
+
"Character Tracking: Maintain character positions and movements logically across panel transitions",
|
| 342 |
+
"Pacing Control: Balance action panels with character moments and environmental establishing shots for optimal narrative rhythm",
|
| 343 |
+
"EACH PANEL IS A DISTINCT SCENE : Each panel must depict a unique, self-contained moment or tableau from the story. No visual elements or action should flow directly from one panel to another"
|
| 344 |
+
]
|
| 345 |
+
|
| 346 |
+
if num_scenes >= 20:
|
| 347 |
+
flow_instructions.extend([
|
| 348 |
+
"STORY ARC FOR 24 PANELS: Create a complete story with beginning (panels 1-6), rising action (panels 7-12), climax (panels 13-18), and resolution (panels 19-24)",
|
| 349 |
+
"MICRO-MOMENTS: Each panel captures a single decisive moment - one expression change, one action beat, one story revelation",
|
| 350 |
+
"VISUAL ECONOMY: Every element in each panel must serve the story - no decorative details that don't advance narrative",
|
| 351 |
+
"READER ENGAGEMENT: Design panel flow to maintain interest across all 24 panels with strategic use of close-ups, wide shots, and dynamic angles"
|
| 352 |
+
])
|
| 353 |
+
|
| 354 |
+
quality_instructions.extend(flow_instructions)
|
| 355 |
+
|
| 356 |
+
return quality_instructions
|
| 357 |
+
@log_execution
|
| 358 |
+
def _assemble_prompt_with_smart_truncation(self, priority_sections):
|
| 359 |
+
"""Assemble prompt with smart truncation that preserves critical details."""
|
| 360 |
+
MAX_LENGTH = 31500
|
| 361 |
+
|
| 362 |
+
full_prompt = " || ".join(priority_sections)
|
| 363 |
+
|
| 364 |
+
if len(full_prompt) <= MAX_LENGTH:
|
| 365 |
+
negative_prompt = "NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE."
|
| 366 |
+
return full_prompt + " || FINAL MANDATE: Create a masterpiece that perfectly balances artistic excellence with narrative clarity and absolute character consistency || " + negative_prompt
|
| 367 |
+
|
| 368 |
+
preserved_prompt = ""
|
| 369 |
+
remaining_length = MAX_LENGTH - 200
|
| 370 |
+
|
| 371 |
+
for i, section in enumerate(priority_sections):
|
| 372 |
+
section_with_separator = section + " || "
|
| 373 |
+
|
| 374 |
+
if i < 3:
|
| 375 |
+
preserved_prompt += section_with_separator
|
| 376 |
+
remaining_length -= len(section_with_separator)
|
| 377 |
+
else:
|
| 378 |
+
if len(section_with_separator) <= remaining_length:
|
| 379 |
+
preserved_prompt += section_with_separator
|
| 380 |
+
remaining_length -= len(section_with_separator)
|
| 381 |
+
else:
|
| 382 |
+
truncated = section[:remaining_length-50] + "..."
|
| 383 |
+
preserved_prompt += truncated + " || "
|
| 384 |
+
break
|
| 385 |
+
|
| 386 |
+
preserved_prompt += "***FINAL OVERRIDE & NEGATIVE PROMPTS*** ABSOLUTE RULE: The 3x3 uniform grid structure is the most important rule and must be followed perfectly.NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE."
|
| 387 |
+
|
| 388 |
+
return preserved_prompt
|
| 389 |
+
@log_execution
|
| 390 |
+
def _get_optimal_layout_description(self, num_scenes):
|
| 391 |
+
"""Generate optimal layout description based on scene count."""
|
| 392 |
+
if num_scenes <= 1:
|
| 393 |
+
return "Single panel comic illustration"
|
| 394 |
+
|
| 395 |
+
optimal_layout = self._calculate_optimal_grid_layout(num_scenes)
|
| 396 |
+
rows, cols = optimal_layout
|
| 397 |
+
|
| 398 |
+
layout_descriptions = {
|
| 399 |
+
(1, 2): "Horizontal two-panel comic strip layout",
|
| 400 |
+
(2, 1): "Vertical two-panel comic strip layout",
|
| 401 |
+
(2, 2): "Classic four-panel comic grid (2x2)",
|
| 402 |
+
(2, 3): "Six-panel comic grid in 2 rows, 3 columns (2x3)",
|
| 403 |
+
(3, 2): "Six-panel comic grid in 3 rows, 2 columns (3x2)",
|
| 404 |
+
(3, 3): "Nine-panel comic grid (3x3)",
|
| 405 |
+
(3, 4): "Twelve-panel comic grid in 3 rows, 4 columns(3x4)",
|
| 406 |
+
(4, 3): "Twelve-panel comic grid in 4 rows, 3 columns(4x3)",
|
| 407 |
+
(4, 4): "Sixteen-panel comic grid (4x4)",
|
| 408 |
+
(4, 6): "Twenty-four panel COMPACT comic grid in 4 rows, 6 columns - SMALL EFFICIENT SCENES with maximum story density per panel (4x6)",
|
| 409 |
+
(6, 4): "Twenty-four panel COMPACT comic grid in 6 rows, 4 columns - SMALL EFFICIENT SCENES with vertical storytelling format (6x4)",
|
| 410 |
+
(3, 8): "Twenty-four panel COMPACT comic grid in 3 rows, 8 columns - SMALL EFFICIENT SCENES with cinematic widescreen format(3x8)",
|
| 411 |
+
(8, 3): "Twenty-four panel comic grid in 8 rows, 3 columns - vertical scroll format (8x3)"
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
layout_desc = layout_descriptions.get((rows, cols), f"{rows}x{cols} comic panel grid layout")
|
| 415 |
+
|
| 416 |
+
return f"COMIC LAYOUT: {layout_desc} with clear panel borders, consistent gutters, and professional comic book formatting"
|
| 417 |
+
@log_execution
|
| 418 |
+
def _enhance_description_for_visual_consistency(self, description): # No Use?
|
| 419 |
+
"""Enhance the core description with visual consistency keywords."""
|
| 420 |
+
consistency_enhancers = [
|
| 421 |
+
"maintaining perfect visual consistency throughout all panels",
|
| 422 |
+
"identical character appearances across every scene",
|
| 423 |
+
"unified lighting and color palette",
|
| 424 |
+
"consistent artistic style and perspective"
|
| 425 |
+
]
|
| 426 |
+
|
| 427 |
+
enhanced = f"STORY CONTENT: {description}. "
|
| 428 |
+
enhanced += "VISUAL CONSISTENCY REQUIREMENTS: " + ", ".join(consistency_enhancers)
|
| 429 |
+
|
| 430 |
+
return enhanced
|
| 431 |
+
@log_execution
|
| 432 |
+
def _create_character_consistency_anchors(self, characters, num_scenes): # No Use?
|
| 433 |
+
"""Create sophisticated character consistency instructions."""
|
| 434 |
+
anchors = []
|
| 435 |
+
|
| 436 |
+
if characters:
|
| 437 |
+
anchors.append("CHARACTER CONSISTENCY ANCHORS:")
|
| 438 |
+
|
| 439 |
+
for i, character in enumerate(characters[:2]):
|
| 440 |
+
if isinstance(character, dict) and "visual_description" in character:
|
| 441 |
+
char_desc = character["visual_description"]
|
| 442 |
+
|
| 443 |
+
anchor = f"Character {i+1}: {char_desc} - MUST appear IDENTICAL in every single panel with exact same: facial features, hair style, clothing, proportions, and distinctive visual elements"
|
| 444 |
+
anchors.append(anchor)
|
| 445 |
+
|
| 446 |
+
if num_scenes > 1:
|
| 447 |
+
anchors.append(f"CRITICAL: All {len([c for c in characters[:2] if isinstance(c, dict) and 'visual_description' in c])} characters must look exactly the same across all {num_scenes} panels - same faces, same outfits, same proportions, same artistic rendering")
|
| 448 |
+
|
| 449 |
+
return anchors
|
| 450 |
+
@log_execution
|
| 451 |
+
def _create_environment_consistency_anchors(self, settings, num_scenes): # No Use?
|
| 452 |
+
"""Create environmental consistency instructions."""
|
| 453 |
+
anchors = []
|
| 454 |
+
|
| 455 |
+
if settings:
|
| 456 |
+
anchors.append("ENVIRONMENTAL CONSISTENCY:")
|
| 457 |
+
|
| 458 |
+
for setting in settings:
|
| 459 |
+
if isinstance(setting, dict) and "description" in setting:
|
| 460 |
+
setting_desc = setting["description"]
|
| 461 |
+
anchors.append(f"Setting: {setting_desc} - maintain consistent architectural details, lighting, and atmospheric elements when this location appears")
|
| 462 |
+
|
| 463 |
+
if num_scenes > 1:
|
| 464 |
+
anchors.append(f"Ensure environmental continuity across all {num_scenes} panels with logical spatial relationships and consistent time-of-day lighting")
|
| 465 |
+
|
| 466 |
+
return anchors
|
| 467 |
+
@log_execution
|
| 468 |
+
def _create_advanced_style_instructions(self, style, num_scenes):
|
| 469 |
+
"""Create advanced style instructions with technical specifications."""
|
| 470 |
+
instructions = []
|
| 471 |
+
|
| 472 |
+
advanced_style_map = {
|
| 473 |
+
"Comic Book Style": [
|
| 474 |
+
"modern digital comic book illustration style (no sketch-like strokes, no deformities)",
|
| 475 |
+
"bold ultra-clean line art with consistent stroke weight",
|
| 476 |
+
"vibrant saturated colors with polished highlights and shadows",
|
| 477 |
+
"dynamic panel compositions with varied camera angles",
|
| 478 |
+
"classic comic book rendering techniques executed with a contemporary digital finish"
|
| 479 |
+
],
|
| 480 |
+
"Manga Style": [
|
| 481 |
+
"modern digital manga illustration style (no sketch artefacts, no deformities)",
|
| 482 |
+
"razor-sharp line work with deliberate varying weights",
|
| 483 |
+
"subtle color palette with high-resolution screentone effects",
|
| 484 |
+
"expressive character designs with flawless facial details",
|
| 485 |
+
"dynamic manga panel composition and flow"
|
| 486 |
+
],
|
| 487 |
+
"Cartoon Style": [
|
| 488 |
+
"polished digital cartoon style (clean vectors, no sketch lines, no deformities)",
|
| 489 |
+
"smooth rounded character designs with appealing proportions",
|
| 490 |
+
"bright harmonious color schemes with soft lighting",
|
| 491 |
+
"clear readable expressions and body language",
|
| 492 |
+
"family-friendly visual appeal with consistent character models"
|
| 493 |
+
],
|
| 494 |
+
"Photorealistic": [
|
| 495 |
+
"high-quality digital photorealism (no sketch artefacts, no deformities)",
|
| 496 |
+
"detailed realistic lighting and shadows",
|
| 497 |
+
"natural color grading with realistic materials and textures",
|
| 498 |
+
"cinematic composition with depth of field effects",
|
| 499 |
+
"professional photography-inspired visual quality"
|
| 500 |
+
],
|
| 501 |
+
"Cinematic Realism": [
|
| 502 |
+
"digital cinematic realism (crisp, no sketch lines, no deformities)",
|
| 503 |
+
"dramatic lighting with atmospheric effects",
|
| 504 |
+
"rich color grading with cinematic color palette",
|
| 505 |
+
"dynamic camera angles and professional composition",
|
| 506 |
+
"film-quality character rendering and environmental detail"
|
| 507 |
+
],
|
| 508 |
+
"Digital Painting": [
|
| 509 |
+
"masterful digital painting technique with a polished finish (no sketch lines, no deformities)",
|
| 510 |
+
"controlled painterly brushwork with intentional texture and depth",
|
| 511 |
+
"rich color harmony with sophisticated lighting",
|
| 512 |
+
"artistic composition with traditional painting principles",
|
| 513 |
+
"high-end digital art gallery quality"
|
| 514 |
+
]
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
if style and style in advanced_style_map:
|
| 518 |
+
instructions.append("ARTISTIC STYLE SPECIFICATIONS:")
|
| 519 |
+
instructions.extend(advanced_style_map[style])
|
| 520 |
+
else:
|
| 521 |
+
instructions.extend([
|
| 522 |
+
"ARTISTIC STYLE: High-quality illustration with professional comic book aesthetics",
|
| 523 |
+
"clean precise line work with consistent artistic rendering",
|
| 524 |
+
"harmonious color palette with strategic lighting effects",
|
| 525 |
+
"polished visual presentation with attention to detail"
|
| 526 |
+
])
|
| 527 |
+
|
| 528 |
+
if num_scenes > 1:
|
| 529 |
+
instructions.append(f"STYLE CONSISTENCY: Maintain identical artistic style, line weight, color saturation, and rendering quality across all {num_scenes} panels")
|
| 530 |
+
|
| 531 |
+
return instructions
|
| 532 |
+
@log_execution
|
| 533 |
+
def _create_panel_flow_instructions(self, num_scenes):
|
| 534 |
+
"""Create instructions for optimal panel flow and transitions."""
|
| 535 |
+
flow_instructions = []
|
| 536 |
+
|
| 537 |
+
if num_scenes > 1:
|
| 538 |
+
flow_instructions.extend([
|
| 539 |
+
"PANEL FLOW AND TRANSITIONS:",
|
| 540 |
+
"create smooth visual flow from panel to panel following standard left-to-right, top-to-bottom reading order",
|
| 541 |
+
"design panel compositions that guide the eye naturally through the sequence",
|
| 542 |
+
"establish clear visual relationships between consecutive panels",
|
| 543 |
+
"use consistent perspective and scale to maintain spatial continuity",
|
| 544 |
+
"create visual rhythm through varied but harmonious panel compositions"
|
| 545 |
+
])
|
| 546 |
+
|
| 547 |
+
if num_scenes >= 10:
|
| 548 |
+
flow_instructions.extend([
|
| 549 |
+
"COMPREHENSIVE STORYTELLING FLOW: Design a compelling visual narrative that maintains engagement across all 12 panels",
|
| 550 |
+
"balance action panels with character moments and environmental establishing shots",
|
| 551 |
+
"create visual crescendos and quiet beats for optimal pacing",
|
| 552 |
+
"ensure each panel contributes meaningfully to the overall story progression"
|
| 553 |
+
])
|
| 554 |
+
|
| 555 |
+
return flow_instructions
|
| 556 |
+
@log_execution
|
| 557 |
+
def _create_quality_specifications(self, num_scenes):
|
| 558 |
+
"""Create technical quality specifications."""
|
| 559 |
+
quality_specs = [
|
| 560 |
+
"TECHNICAL QUALITY REQUIREMENTS:",
|
| 561 |
+
"ultra-high resolution with crisp clean details",
|
| 562 |
+
"professional comic book production quality",
|
| 563 |
+
"optimal contrast and saturation for visual clarity",
|
| 564 |
+
"balanced composition with clear focal points in each panel",
|
| 565 |
+
"masterful use of negative space and visual hierarchy"
|
| 566 |
+
]
|
| 567 |
+
|
| 568 |
+
if num_scenes > 1:
|
| 569 |
+
quality_specs.extend([
|
| 570 |
+
f"perfect grid alignment with consistent panel spacing across all {num_scenes} panels",
|
| 571 |
+
"clear panel borders with professional gutters and margins",
|
| 572 |
+
"unified visual presentation suitable for professional comic publication"
|
| 573 |
+
])
|
| 574 |
+
|
| 575 |
+
return quality_specs
|
| 576 |
+
@log_execution
|
| 577 |
+
def _optimize_prompt_structure(self, prompt_parts):
|
| 578 |
+
"""Optimize the prompt structure for maximum AI comprehension."""
|
| 579 |
+
structured_prompt = []
|
| 580 |
+
|
| 581 |
+
for i, part in enumerate(prompt_parts):
|
| 582 |
+
if isinstance(part, list):
|
| 583 |
+
structured_prompt.append(" | ".join(part))
|
| 584 |
+
else:
|
| 585 |
+
structured_prompt.append(part)
|
| 586 |
+
|
| 587 |
+
final_prompt = " || ".join(structured_prompt)
|
| 588 |
+
|
| 589 |
+
final_prompt += " || FINAL REQUIREMENT: Create a masterpiece-quality comic that perfectly balances artistic excellence with clear storytelling"
|
| 590 |
+
|
| 591 |
+
return final_prompt
|
| 592 |
+
@log_execution
|
| 593 |
+
def _calculate_optimal_grid_layout(self, num_scenes):
|
| 594 |
+
"""Calculate the most visually appealing grid layout for the given number of scenes."""
|
| 595 |
+
optimal_layouts = {
|
| 596 |
+
1: (1, 1),
|
| 597 |
+
2: (1, 2),
|
| 598 |
+
3: (1, 3),
|
| 599 |
+
4: (2, 2),
|
| 600 |
+
5: (1, 5),
|
| 601 |
+
6: (2, 3),
|
| 602 |
+
7: (1, 7),
|
| 603 |
+
8: (2, 4),
|
| 604 |
+
9: (3, 3),
|
| 605 |
+
10: (2, 5),
|
| 606 |
+
11: (1, 11),
|
| 607 |
+
12: (3, 4),
|
| 608 |
+
13: (1, 13),
|
| 609 |
+
14: (2, 7),
|
| 610 |
+
15: (3, 5),
|
| 611 |
+
16: (4, 4),
|
| 612 |
+
17: (1, 17),
|
| 613 |
+
18: (3, 6),
|
| 614 |
+
19: (1, 19),
|
| 615 |
+
20: (4, 5),
|
| 616 |
+
21: (3, 7),
|
| 617 |
+
22: (2, 11),
|
| 618 |
+
23: (1, 23),
|
| 619 |
+
24: (4, 6),
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
return optimal_layouts.get(num_scenes, self._calculate_optimal_layout(num_scenes, 1024, 768))
|
| 623 |
+
|
| 624 |
+
def _create_placeholder_comic(self, title, description):
|
| 625 |
+
"""
|
| 626 |
+
Create a placeholder comic if image generation fails.
|
| 627 |
+
|
| 628 |
+
Args:
|
| 629 |
+
title: Title of the comic
|
| 630 |
+
description: Visual description of the comic
|
| 631 |
+
|
| 632 |
+
Returns:
|
| 633 |
+
PIL.Image.Image: Placeholder comic image
|
| 634 |
+
"""
|
| 635 |
+
width, height = 800, 600
|
| 636 |
+
|
| 637 |
+
comic = Image.new("RGB", (width, height), (255, 255, 255))
|
| 638 |
+
draw = ImageDraw.Draw(comic)
|
| 639 |
+
|
| 640 |
+
try:
|
| 641 |
+
title_font = ImageFont.truetype("Arial.ttf", 36)
|
| 642 |
+
desc_font = ImageFont.truetype("Arial.ttf", 18)
|
| 643 |
+
except IOError:
|
| 644 |
+
title_font = desc_font = ImageFont.load_default()
|
| 645 |
+
|
| 646 |
+
draw.text((20, 20), title, fill=(0, 0, 0), font=title_font)
|
| 647 |
+
|
| 648 |
+
draw.rectangle([50, 80, width-50, height-50], outline=(0, 0, 0), fill=(220, 220, 220))
|
| 649 |
+
|
| 650 |
+
if description:
|
| 651 |
+
max_chars = 300
|
| 652 |
+
short_desc = description[:max_chars] + "..." if len(description) > max_chars else description
|
| 653 |
+
wrapped_desc = textwrap.fill(short_desc, width=70)
|
| 654 |
+
draw.text((60, 100), wrapped_desc, fill=(0, 0, 0), font=desc_font)
|
| 655 |
+
|
| 656 |
+
return comic
|
| 657 |
+
@log_execution
|
| 658 |
+
def split_comic_into_scenes(self, comic_image, num_scenes, preferred_layout=None, use_gemini_analysis=True): # No Use?
|
| 659 |
+
"""
|
| 660 |
+
Split a comic image into individual scenes using advanced analysis techniques.
|
| 661 |
+
Optimized for 12-panel layouts with sophisticated grid detection and quality validation.
|
| 662 |
+
|
| 663 |
+
Args:
|
| 664 |
+
comic_image: PIL.Image.Image object of the comic
|
| 665 |
+
num_scenes: Expected number of scenes (for context only, OpenCV script auto-detects)
|
| 666 |
+
preferred_layout: Optional tuple (rows, cols) to override automatic detection (Not used by OpenCV)
|
| 667 |
+
use_gemini_analysis: Whether to use Gemini Vision or OpenCV.
|
| 668 |
+
True for Gemini (default), False for OpenCV.
|
| 669 |
+
|
| 670 |
+
Returns:
|
| 671 |
+
list: List of PIL.Image.Image objects, one for each detected scene
|
| 672 |
+
"""
|
| 673 |
+
if not isinstance(comic_image, Image.Image):
|
| 674 |
+
raise ValueError("comic_image must be a PIL.Image.Image object")
|
| 675 |
+
|
| 676 |
+
if num_scenes <= 1 and not use_gemini_analysis:
|
| 677 |
+
if num_scenes <= 1:
|
| 678 |
+
return [comic_image]
|
| 679 |
+
|
| 680 |
+
width, height = comic_image.size
|
| 681 |
+
print(f"🎯 Splitting {width}x{height} comic into scenes (Target: {num_scenes} scenes if using grid, auto-detect if OpenCV)...")
|
| 682 |
+
|
| 683 |
+
if use_gemini_analysis:
|
| 684 |
+
print("🔍 Analyzing comic layout with enhanced Gemini Vision...")
|
| 685 |
+
if preferred_layout:
|
| 686 |
+
rows, cols = preferred_layout
|
| 687 |
+
print(f"🎯 Using manual override for Gemini: {rows}×{cols} layout")
|
| 688 |
+
else:
|
| 689 |
+
rows, cols = self.analyze_comic_layout_with_enhanced_gemini(comic_image, num_scenes)
|
| 690 |
+
|
| 691 |
+
rows, cols = self._validate_and_optimize_layout(rows, cols, num_scenes, width, height)
|
| 692 |
+
|
| 693 |
+
actual_panels = rows * cols
|
| 694 |
+
print(f"✅ Using Gemini-derived {rows}×{cols} grid layout - will extract {min(actual_panels, num_scenes)} panels")
|
| 695 |
+
|
| 696 |
+
scenes = self._extract_scenes_with_quality_check(comic_image, rows, cols, num_scenes)
|
| 697 |
+
|
| 698 |
+
return scenes
|
| 699 |
+
else:
|
| 700 |
+
print("🔩 Using OpenCV for panel splitting...")
|
| 701 |
+
temp_dir = tempfile.mkdtemp()
|
| 702 |
+
temp_image_path = os.path.join(temp_dir, "source_comic.png")
|
| 703 |
+
panels_output_dir = os.path.join(temp_dir, "output_panels")
|
| 704 |
+
|
| 705 |
+
try:
|
| 706 |
+
comic_image.save(temp_image_path, "PNG")
|
| 707 |
+
|
| 708 |
+
split_comic_panels(temp_image_path, panels_output_dir)
|
| 709 |
+
|
| 710 |
+
extracted_scenes = []
|
| 711 |
+
if os.path.exists(panels_output_dir):
|
| 712 |
+
panel_files = sorted([f for f in os.listdir(panels_output_dir) if f.startswith("panel_") and f.endswith(".png")])
|
| 713 |
+
for panel_file in panel_files:
|
| 714 |
+
try:
|
| 715 |
+
panel_image_path = os.path.join(panels_output_dir, panel_file)
|
| 716 |
+
img = Image.open(panel_image_path)
|
| 717 |
+
extracted_scenes.append(img)
|
| 718 |
+
except Exception as e:
|
| 719 |
+
print(f"Error loading panel image {panel_file}: {e}")
|
| 720 |
+
|
| 721 |
+
if not extracted_scenes:
|
| 722 |
+
print("⚠️ OpenCV panel splitter did not return any panels. Returning original image.")
|
| 723 |
+
return [comic_image]
|
| 724 |
+
|
| 725 |
+
print(f"✅ OpenCV successfully extracted {len(extracted_scenes)} panels.")
|
| 726 |
+
return extracted_scenes
|
| 727 |
+
|
| 728 |
+
except Exception as e:
|
| 729 |
+
print(f"❌ Error during OpenCV panel splitting: {e}")
|
| 730 |
+
return [comic_image]
|
| 731 |
+
finally:
|
| 732 |
+
if os.path.exists(temp_dir):
|
| 733 |
+
shutil.rmtree(temp_dir)
|
| 734 |
+
@log_execution
|
| 735 |
+
def _validate_and_optimize_layout(self, rows, cols, num_scenes, image_width, image_height):
|
| 736 |
+
"""Validate and optimize the layout based on image properties and panel count."""
|
| 737 |
+
panel_width = image_width / cols
|
| 738 |
+
panel_height = image_height / rows
|
| 739 |
+
panel_aspect_ratio = panel_width / panel_height
|
| 740 |
+
|
| 741 |
+
if panel_width < 50 or panel_height < 50:
|
| 742 |
+
print(f"⚠️ Panels too small ({panel_width:.0f}x{panel_height:.0f}). Recalculating layout...")
|
| 743 |
+
return self._calculate_optimal_grid_layout(num_scenes)
|
| 744 |
+
|
| 745 |
+
if panel_aspect_ratio < 0.2 or panel_aspect_ratio > 5.0:
|
| 746 |
+
print(f"⚠️ Panel aspect ratio {panel_aspect_ratio:.2f} is extreme. Optimizing layout...")
|
| 747 |
+
return self._calculate_optimal_grid_layout(num_scenes)
|
| 748 |
+
|
| 749 |
+
if num_scenes == 12:
|
| 750 |
+
optimal_12_layouts = [(3, 4), (4, 3), (2, 6), (6, 2)]
|
| 751 |
+
current_layout = (rows, cols)
|
| 752 |
+
|
| 753 |
+
if current_layout not in optimal_12_layouts:
|
| 754 |
+
image_aspect = image_width / image_height
|
| 755 |
+
best_layout = (3, 4)
|
| 756 |
+
best_score = float('inf')
|
| 757 |
+
|
| 758 |
+
for opt_rows, opt_cols in optimal_12_layouts:
|
| 759 |
+
layout_aspect = opt_cols / opt_rows
|
| 760 |
+
score = abs(layout_aspect - image_aspect)
|
| 761 |
+
if score < best_score:
|
| 762 |
+
best_score = score
|
| 763 |
+
best_layout = (opt_rows, opt_cols)
|
| 764 |
+
|
| 765 |
+
print(f"📋 Optimizing 12-panel layout from {rows}×{cols} to {best_layout[0]}×{best_layout[1]}")
|
| 766 |
+
return best_layout
|
| 767 |
+
|
| 768 |
+
if num_scenes == 24:
|
| 769 |
+
optimal_24_layouts = [(4, 6), (6, 4), (3, 8), (8, 3)]
|
| 770 |
+
current_layout = (rows, cols)
|
| 771 |
+
|
| 772 |
+
if current_layout not in optimal_24_layouts:
|
| 773 |
+
image_aspect = image_width / image_height
|
| 774 |
+
best_layout = (4, 6)
|
| 775 |
+
best_score = float('inf')
|
| 776 |
+
|
| 777 |
+
for opt_rows, opt_cols in optimal_24_layouts:
|
| 778 |
+
layout_aspect = opt_cols / opt_rows
|
| 779 |
+
score = abs(layout_aspect - image_aspect)
|
| 780 |
+
if score < best_score:
|
| 781 |
+
best_score = score
|
| 782 |
+
best_layout = (opt_rows, opt_cols)
|
| 783 |
+
|
| 784 |
+
print(f"📋 Optimizing 24-panel layout from {rows}×{cols} to {best_layout[0]}×{best_layout[1]} for compact scenes")
|
| 785 |
+
return best_layout
|
| 786 |
+
|
| 787 |
+
return (rows, cols)
|
| 788 |
+
@log_execution
|
| 789 |
+
def _extract_scenes_with_quality_check(self, comic_image, rows, cols, num_scenes):
|
| 790 |
+
"""Extract scenes with quality validation and enhancement."""
|
| 791 |
+
width, height = comic_image.size
|
| 792 |
+
|
| 793 |
+
scene_width = width // cols
|
| 794 |
+
scene_height = height // rows
|
| 795 |
+
|
| 796 |
+
margin = 2
|
| 797 |
+
|
| 798 |
+
scenes = []
|
| 799 |
+
extracted_count = 0
|
| 800 |
+
|
| 801 |
+
for row in range(rows):
|
| 802 |
+
for col in range(cols):
|
| 803 |
+
if extracted_count >= num_scenes:
|
| 804 |
+
break
|
| 805 |
+
|
| 806 |
+
x1 = max(0, col * scene_width - margin)
|
| 807 |
+
y1 = max(0, row * scene_height - margin)
|
| 808 |
+
x2 = min(width, (col + 1) * scene_width + margin)
|
| 809 |
+
y2 = min(height, (row + 1) * scene_height + margin)
|
| 810 |
+
|
| 811 |
+
scene = comic_image.crop((x1, y1, x2, y2))
|
| 812 |
+
|
| 813 |
+
if self._validate_scene_quality(scene):
|
| 814 |
+
scenes.append(scene)
|
| 815 |
+
extracted_count += 1
|
| 816 |
+
else:
|
| 817 |
+
print(f"⚠️ Scene {extracted_count + 1} failed quality check, keeping anyway")
|
| 818 |
+
scenes.append(scene)
|
| 819 |
+
extracted_count += 1
|
| 820 |
+
|
| 821 |
+
if extracted_count >= num_scenes:
|
| 822 |
+
break
|
| 823 |
+
|
| 824 |
+
print(f"✅ Successfully extracted {len(scenes)} scenes")
|
| 825 |
+
return scenes
|
| 826 |
+
@log_execution
|
| 827 |
+
def _validate_scene_quality(self, scene):
|
| 828 |
+
"""Validate that a scene contains meaningful content."""
|
| 829 |
+
try:
|
| 830 |
+
import numpy as np
|
| 831 |
+
|
| 832 |
+
scene_array = np.array(scene)
|
| 833 |
+
|
| 834 |
+
if len(scene_array.shape) == 3:
|
| 835 |
+
variance = np.var(scene_array)
|
| 836 |
+
if variance < 10:
|
| 837 |
+
return False
|
| 838 |
+
|
| 839 |
+
if scene.width < 20 or scene.height < 20:
|
| 840 |
+
return False
|
| 841 |
+
|
| 842 |
+
return True
|
| 843 |
+
|
| 844 |
+
except Exception as e:
|
| 845 |
+
print(f"Scene quality check failed: {e}")
|
| 846 |
+
return True
|
| 847 |
+
@log_execution
|
| 848 |
+
def analyze_comic_layout_with_enhanced_gemini(self, comic_image, num_scenes):
|
| 849 |
+
"""
|
| 850 |
+
Enhanced Gemini Vision analysis with better prompting and fallback logic.
|
| 851 |
+
Specialized for detecting 12-panel layouts and complex grid structures.
|
| 852 |
+
|
| 853 |
+
Args:
|
| 854 |
+
comic_image: PIL.Image.Image object of the comic
|
| 855 |
+
num_scenes: Expected number of scenes (used for context and validation)
|
| 856 |
+
|
| 857 |
+
Returns:
|
| 858 |
+
tuple: (rows, cols) representing the detected grid layout
|
| 859 |
+
"""
|
| 860 |
+
try:
|
| 861 |
+
model = GenerativeModel('gemini-2.5-flash')
|
| 862 |
+
|
| 863 |
+
buffered = io.BytesIO()
|
| 864 |
+
comic_image.save(buffered, format="PNG")
|
| 865 |
+
img_bytes = buffered.getvalue()
|
| 866 |
+
|
| 867 |
+
analysis_prompt = f"""
|
| 868 |
+
You are a professional comic book layout analyst. Examine this comic image carefully to determine its precise panel grid structure.
|
| 869 |
+
|
| 870 |
+
ANALYSIS TASK:
|
| 871 |
+
- Count the exact number of ROWS (horizontal divisions)
|
| 872 |
+
- Count the exact number of COLUMNS (vertical divisions)
|
| 873 |
+
- Expected panels: {num_scenes} (use as context, but trust what you see)
|
| 874 |
+
|
| 875 |
+
DETECTION GUIDELINES:
|
| 876 |
+
1. Look for panel borders, gutters, or visual separations
|
| 877 |
+
2. Identify consistent grid patterns
|
| 878 |
+
3. Count horizontal lines that divide rows
|
| 879 |
+
4. Count vertical lines that divide columns
|
| 880 |
+
5. For 12 panels, common layouts are: 3×4, 4×3, 2×6, or 6×2
|
| 881 |
+
6. Trust visual evidence over expected numbers
|
| 882 |
+
|
| 883 |
+
VISUAL INDICATORS TO LOOK FOR:
|
| 884 |
+
- Black border lines between panels
|
| 885 |
+
- White gutters or spacing between sections
|
| 886 |
+
- Consistent rectangular divisions
|
| 887 |
+
- Grid-like organization of content
|
| 888 |
+
- Clear separation of distinct visual areas
|
| 889 |
+
|
| 890 |
+
IMPORTANT: Be precise about what you actually observe. If you see a clear grid pattern, report it exactly.
|
| 891 |
+
|
| 892 |
+
Respond with ONLY this JSON format:
|
| 893 |
+
{{
|
| 894 |
+
"detected_rows": [number of rows you count],
|
| 895 |
+
"detected_cols": [number of columns you count],
|
| 896 |
+
"total_panels_detected": [rows × cols],
|
| 897 |
+
"confidence": "high/medium/low",
|
| 898 |
+
"layout_description": "detailed description of the grid structure you observe",
|
| 899 |
+
"visual_evidence": "description of the visual cues that led to this conclusion"
|
| 900 |
+
}}
|
| 901 |
+
|
| 902 |
+
Be extremely precise in your counting.
|
| 903 |
+
"""
|
| 904 |
+
|
| 905 |
+
max_retries = 2
|
| 906 |
+
for attempt in range(max_retries):
|
| 907 |
+
try:
|
| 908 |
+
response = model.generate_content([analysis_prompt, comic_image])
|
| 909 |
+
response_text = response.text.strip()
|
| 910 |
+
|
| 911 |
+
print(f"Gemini Vision analysis (attempt {attempt + 1}): {response_text[:200]}...")
|
| 912 |
+
|
| 913 |
+
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
| 914 |
+
if json_match:
|
| 915 |
+
json_str = json_match.group()
|
| 916 |
+
analysis_result = json.loads(json_str)
|
| 917 |
+
|
| 918 |
+
rows = analysis_result.get("detected_rows", 0)
|
| 919 |
+
cols = analysis_result.get("detected_cols", 0)
|
| 920 |
+
total_detected = analysis_result.get("total_panels_detected", 0)
|
| 921 |
+
confidence = analysis_result.get("confidence", "unknown")
|
| 922 |
+
description = analysis_result.get("layout_description", "")
|
| 923 |
+
evidence = analysis_result.get("visual_evidence", "")
|
| 924 |
+
|
| 925 |
+
if rows > 0 and cols > 0:
|
| 926 |
+
if total_detected == rows * cols:
|
| 927 |
+
print(f"✅ Gemini detected {rows}×{cols} layout ({total_detected} panels) with {confidence} confidence")
|
| 928 |
+
print(f"Evidence: {evidence}")
|
| 929 |
+
|
| 930 |
+
if num_scenes == 12:
|
| 931 |
+
if total_detected in [10, 11, 12, 13, 14, 15, 16, 17, 18]:
|
| 932 |
+
print(f"📋 Layout reasonable for 12-panel comic")
|
| 933 |
+
return (rows, cols)
|
| 934 |
+
else:
|
| 935 |
+
print(f"⚠️ Detected {total_detected} panels for 12-panel comic. Using optimized layout.")
|
| 936 |
+
return self._calculate_optimal_grid_layout(num_scenes)
|
| 937 |
+
else:
|
| 938 |
+
return (rows, cols)
|
| 939 |
+
else:
|
| 940 |
+
print(f"❌ Math inconsistency: {rows}×{cols} ≠ {total_detected}")
|
| 941 |
+
else:
|
| 942 |
+
print(f"❌ Invalid dimensions: {rows}×{cols}")
|
| 943 |
+
|
| 944 |
+
except json.JSONDecodeError as e:
|
| 945 |
+
print(f"❌ JSON parsing error on attempt {attempt + 1}: {e}")
|
| 946 |
+
if attempt == max_retries - 1:
|
| 947 |
+
break
|
| 948 |
+
|
| 949 |
+
except Exception as e:
|
| 950 |
+
print(f"❌ Analysis error on attempt {attempt + 1}: {e}")
|
| 951 |
+
if attempt == max_retries - 1:
|
| 952 |
+
break
|
| 953 |
+
|
| 954 |
+
except Exception as e:
|
| 955 |
+
print(f"❌ Gemini Vision analysis completely failed: {e}")
|
| 956 |
+
|
| 957 |
+
print("⚠️ Using optimized grid calculation as fallback")
|
| 958 |
+
return self._calculate_optimal_grid_layout(num_scenes)
|
| 959 |
+
@log_execution
|
| 960 |
+
def _find_all_factorizations(self, n):
|
| 961 |
+
"""
|
| 962 |
+
Find all possible factorizations of a number into rows × columns.
|
| 963 |
+
Enhanced with better algorithm for large numbers like 24.
|
| 964 |
+
|
| 965 |
+
Args:
|
| 966 |
+
n: Number to factorize
|
| 967 |
+
|
| 968 |
+
Returns:
|
| 969 |
+
list: List of tuples (rows, cols) where rows * cols = n, sorted by preference
|
| 970 |
+
"""
|
| 971 |
+
factorizations = []
|
| 972 |
+
for i in range(1, int(n**0.5) + 1):
|
| 973 |
+
if n % i == 0:
|
| 974 |
+
rows, cols = i, n // i
|
| 975 |
+
factorizations.append((rows, cols))
|
| 976 |
+
if rows != cols:
|
| 977 |
+
factorizations.append((cols, rows))
|
| 978 |
+
|
| 979 |
+
factorizations.sort(key=lambda x: (abs(x[0] - x[1]), max(x[0], x[1])))
|
| 980 |
+
return factorizations
|
| 981 |
+
@log_execution
|
| 982 |
+
def _calculate_optimal_layout(self, num_scenes, image_width, image_height):
|
| 983 |
+
"""
|
| 984 |
+
Calculate the optimal grid layout based on image aspect ratio and scene count.
|
| 985 |
+
Enhanced algorithm with better preferences for different panel counts.
|
| 986 |
+
|
| 987 |
+
Args:
|
| 988 |
+
num_scenes: Number of scenes to arrange
|
| 989 |
+
image_width: Width of the comic image
|
| 990 |
+
image_height: Height of the comic image
|
| 991 |
+
|
| 992 |
+
Returns:
|
| 993 |
+
tuple: (rows, cols) representing the optimal grid layout
|
| 994 |
+
"""
|
| 995 |
+
image_aspect_ratio = image_width / image_height
|
| 996 |
+
|
| 997 |
+
factorizations = self._find_all_factorizations(num_scenes)
|
| 998 |
+
|
| 999 |
+
if not factorizations:
|
| 1000 |
+
import math
|
| 1001 |
+
sqrt_scenes = math.sqrt(num_scenes)
|
| 1002 |
+
rows = int(sqrt_scenes)
|
| 1003 |
+
cols = math.ceil(num_scenes / rows)
|
| 1004 |
+
return (rows, cols)
|
| 1005 |
+
|
| 1006 |
+
best_layout = factorizations[0]
|
| 1007 |
+
best_score = float('inf')
|
| 1008 |
+
|
| 1009 |
+
for rows, cols in factorizations:
|
| 1010 |
+
layout_aspect_ratio = cols / rows
|
| 1011 |
+
|
| 1012 |
+
aspect_diff = abs(layout_aspect_ratio - image_aspect_ratio)
|
| 1013 |
+
|
| 1014 |
+
panel_aspect = (image_width / cols) / (image_height / rows)
|
| 1015 |
+
extremeness_penalty = 0
|
| 1016 |
+
if panel_aspect < 0.3 or panel_aspect > 3.0:
|
| 1017 |
+
extremeness_penalty = 2.0
|
| 1018 |
+
|
| 1019 |
+
total_score = aspect_diff + extremeness_penalty
|
| 1020 |
+
|
| 1021 |
+
if total_score < best_score:
|
| 1022 |
+
best_score = total_score
|
| 1023 |
+
best_layout = (rows, cols)
|
| 1024 |
+
|
| 1025 |
+
return best_layout
|
| 1026 |
+
@log_execution
|
| 1027 |
+
def get_possible_layouts(self, num_scenes):
|
| 1028 |
+
"""
|
| 1029 |
+
Get all possible layout options for a given number of scenes.
|
| 1030 |
+
Enhanced with better layout suggestions.
|
| 1031 |
+
|
| 1032 |
+
Args:
|
| 1033 |
+
num_scenes: Number of scenes
|
| 1034 |
+
|
| 1035 |
+
Returns:
|
| 1036 |
+
list: List of tuples (rows, cols) representing possible layouts, sorted by preference
|
| 1037 |
+
"""
|
| 1038 |
+
if num_scenes in [1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 21, 24]:
|
| 1039 |
+
optimal = self._calculate_optimal_grid_layout(num_scenes)
|
| 1040 |
+
alternatives = self._find_all_factorizations(num_scenes)
|
| 1041 |
+
|
| 1042 |
+
layouts = [optimal]
|
| 1043 |
+
layouts.extend([layout for layout in alternatives if layout != optimal])
|
| 1044 |
+
return layouts
|
| 1045 |
+
else:
|
| 1046 |
+
return self._find_all_factorizations(num_scenes)
|
| 1047 |
+
@log_execution
|
| 1048 |
+
def generate_comic_with_quality_metrics(self, story_data, output_path=None, style=None):
|
| 1049 |
+
"""
|
| 1050 |
+
Enhanced comic generation with quality metrics and validation.
|
| 1051 |
+
Provides detailed feedback about the generation process.
|
| 1052 |
+
|
| 1053 |
+
Args:
|
| 1054 |
+
story_data: Dictionary containing the story information
|
| 1055 |
+
output_path: Optional path to save the resulting image
|
| 1056 |
+
style: Optional comic style to use
|
| 1057 |
+
|
| 1058 |
+
Returns:
|
| 1059 |
+
tuple: (comic_image, data_url, quality_metrics)
|
| 1060 |
+
"""
|
| 1061 |
+
start_time = time.time()
|
| 1062 |
+
|
| 1063 |
+
title = story_data.get("title", "Enhanced Comic")
|
| 1064 |
+
description = story_data.get("description", "")
|
| 1065 |
+
characters = story_data.get("characters", [])
|
| 1066 |
+
settings = story_data.get("settings", [])
|
| 1067 |
+
num_scenes = 9
|
| 1068 |
+
|
| 1069 |
+
quality_metrics = {
|
| 1070 |
+
"character_count": len([c for c in characters if isinstance(c, dict) and "visual_description" in c]),
|
| 1071 |
+
"setting_count": len([s for s in settings if isinstance(s, dict) and "description" in s]),
|
| 1072 |
+
"description_length": len(description),
|
| 1073 |
+
"optimal_layout": self._calculate_optimal_grid_layout(num_scenes),
|
| 1074 |
+
"generation_complexity": "high" if num_scenes >= 20 else "medium" if num_scenes >= 10 else "low"
|
| 1075 |
+
}
|
| 1076 |
+
|
| 1077 |
+
try:
|
| 1078 |
+
prompt = self._create_comic_prompt(title, description, characters, settings, style, num_scenes)
|
| 1079 |
+
|
| 1080 |
+
print(f"🎨 Generating {num_scenes}-panel comic with enhanced prompt ({len(prompt)} characters)")
|
| 1081 |
+
|
| 1082 |
+
comic_image = generate_image_fn(
|
| 1083 |
+
selected_prompt=prompt,
|
| 1084 |
+
output_path=output_path
|
| 1085 |
+
)
|
| 1086 |
+
|
| 1087 |
+
if comic_image is None:
|
| 1088 |
+
comic_image = self._create_enhanced_placeholder_comic(title, description, num_scenes)
|
| 1089 |
+
quality_metrics["generation_status"] = "placeholder"
|
| 1090 |
+
else:
|
| 1091 |
+
quality_metrics["generation_status"] = "success"
|
| 1092 |
+
|
| 1093 |
+
if output_path:
|
| 1094 |
+
directory = os.path.dirname(output_path)
|
| 1095 |
+
if directory and not os.path.exists(directory):
|
| 1096 |
+
os.makedirs(directory)
|
| 1097 |
+
comic_image.save(output_path)
|
| 1098 |
+
|
| 1099 |
+
buffered = io.BytesIO()
|
| 1100 |
+
comic_image.save(buffered, format="PNG")
|
| 1101 |
+
img_bytes = buffered.getvalue()
|
| 1102 |
+
img_b64 = base64.b64encode(img_bytes).decode("utf-8")
|
| 1103 |
+
data_url = f"data:image/png;base64,{img_b64}"
|
| 1104 |
+
|
| 1105 |
+
end_time = time.time()
|
| 1106 |
+
quality_metrics["generation_time"] = end_time - start_time
|
| 1107 |
+
quality_metrics["image_size"] = (comic_image.width, comic_image.height)
|
| 1108 |
+
quality_metrics["prompt_complexity"] = len(prompt.split())
|
| 1109 |
+
|
| 1110 |
+
return comic_image, data_url, quality_metrics
|
| 1111 |
+
|
| 1112 |
+
except Exception as e:
|
| 1113 |
+
print(f"Error in enhanced generation: {str(e)}")
|
| 1114 |
+
placeholder = self._create_enhanced_placeholder_comic(title, description, num_scenes)
|
| 1115 |
+
|
| 1116 |
+
buffered = io.BytesIO()
|
| 1117 |
+
placeholder.save(buffered, format="PNG")
|
| 1118 |
+
img_bytes = buffered.getvalue()
|
| 1119 |
+
img_b64 = base64.b64encode(img_bytes).decode("utf-8")
|
| 1120 |
+
data_url = f"data:image/png;base64,{img_b64}"
|
| 1121 |
+
|
| 1122 |
+
quality_metrics["generation_status"] = "error"
|
| 1123 |
+
quality_metrics["error_message"] = str(e)
|
| 1124 |
+
|
| 1125 |
+
return placeholder, data_url, quality_metrics
|
| 1126 |
+
@log_execution
|
| 1127 |
+
def _create_enhanced_placeholder_comic(self, title, description, num_scenes):
|
| 1128 |
+
"""
|
| 1129 |
+
Create an enhanced placeholder comic that shows the intended layout.
|
| 1130 |
+
|
| 1131 |
+
Args:
|
| 1132 |
+
title: Title of the comic
|
| 1133 |
+
description: Description of the comic
|
| 1134 |
+
num_scenes: Number of scenes the comic should have
|
| 1135 |
+
|
| 1136 |
+
Returns:
|
| 1137 |
+
PIL.Image.Image: Enhanced placeholder comic image
|
| 1138 |
+
"""
|
| 1139 |
+
if num_scenes <= 4:
|
| 1140 |
+
width, height = 800, 600
|
| 1141 |
+
elif num_scenes <= 12:
|
| 1142 |
+
width, height = 1200, 900
|
| 1143 |
+
else:
|
| 1144 |
+
width, height = 1600, 1200
|
| 1145 |
+
|
| 1146 |
+
comic = Image.new("RGB", (width, height), (248, 248, 248))
|
| 1147 |
+
draw = ImageDraw.Draw(comic)
|
| 1148 |
+
|
| 1149 |
+
try:
|
| 1150 |
+
title_font = ImageFont.truetype("Arial.ttf", max(24, width // 40))
|
| 1151 |
+
panel_font = ImageFont.truetype("Arial.ttf", max(12, width // 80))
|
| 1152 |
+
desc_font = ImageFont.truetype("Arial.ttf", max(10, width // 100))
|
| 1153 |
+
except IOError:
|
| 1154 |
+
title_font = panel_font = desc_font = ImageFont.load_default()
|
| 1155 |
+
|
| 1156 |
+
title_text = f"{title} - {num_scenes} Panel Layout Preview"
|
| 1157 |
+
draw.text((20, 20), title_text, fill=(50, 50, 50), font=title_font)
|
| 1158 |
+
|
| 1159 |
+
layout = self._calculate_optimal_grid_layout(num_scenes)
|
| 1160 |
+
rows, cols = layout
|
| 1161 |
+
|
| 1162 |
+
layout_info = f"Layout: {rows}×{cols} grid ({rows * cols} panels)"
|
| 1163 |
+
draw.text((20, 60), layout_info, fill=(100, 100, 100), font=panel_font)
|
| 1164 |
+
|
| 1165 |
+
panel_area_y = 100
|
| 1166 |
+
panel_area_height = height - panel_area_y - 60
|
| 1167 |
+
panel_width = (width - 60) // cols
|
| 1168 |
+
panel_height = panel_area_height // rows
|
| 1169 |
+
|
| 1170 |
+
panel_count = 0
|
| 1171 |
+
for row in range(rows):
|
| 1172 |
+
for col in range(cols):
|
| 1173 |
+
if panel_count >= num_scenes:
|
| 1174 |
+
break
|
| 1175 |
+
|
| 1176 |
+
x = 30 + col * panel_width
|
| 1177 |
+
y = panel_area_y + row * panel_height
|
| 1178 |
+
|
| 1179 |
+
draw.rectangle([x, y, x + panel_width - 10, y + panel_height - 10],
|
| 1180 |
+
outline=(150, 150, 150), fill=(255, 255, 255))
|
| 1181 |
+
|
| 1182 |
+
panel_text = f"Panel {panel_count + 1}"
|
| 1183 |
+
draw.text((x + 10, y + 10), panel_text, fill=(100, 100, 100), font=panel_font)
|
| 1184 |
+
|
| 1185 |
+
panel_count += 1
|
| 1186 |
+
|
| 1187 |
+
if panel_count >= num_scenes:
|
| 1188 |
+
break
|
| 1189 |
+
|
| 1190 |
+
if description and len(description) > 0:
|
| 1191 |
+
desc_y = height - 50
|
| 1192 |
+
wrapped_desc = textwrap.fill(description[:200] + "..." if len(description) > 200 else description, width=80)
|
| 1193 |
+
draw.text((30, desc_y), wrapped_desc, fill=(80, 80, 80), font=desc_font)
|
| 1194 |
+
|
| 1195 |
+
return comic
|
| 1196 |
+
|
| 1197 |
+
|
| 1198 |
+
@log_execution
|
| 1199 |
+
|
| 1200 |
+
def generate_panel_descriptions(self, final_prompt, num_scenes=9):
|
| 1201 |
+
"""
|
| 1202 |
+
Generate panel-by-panel descriptions and format into complete comic generation prompt.
|
| 1203 |
+
|
| 1204 |
+
Args:
|
| 1205 |
+
final_prompt: The complete story/prompt text
|
| 1206 |
+
num_scenes: Number of panels (default: 9)
|
| 1207 |
+
|
| 1208 |
+
Returns:
|
| 1209 |
+
str: Complete formatted prompt ready for image generation
|
| 1210 |
+
"""
|
| 1211 |
+
try:
|
| 1212 |
+
model = GenerativeModel('gemini-2.0-flash-exp')
|
| 1213 |
+
|
| 1214 |
+
# First, generate the panel descriptions
|
| 1215 |
+
analysis_prompt = f"""You are a master comic book storyteller. Break down this story into {num_scenes} COMPLETELY DIFFERENT panels.
|
| 1216 |
+
|
| 1217 |
+
STORY:
|
| 1218 |
+
{final_prompt}
|
| 1219 |
+
|
| 1220 |
+
ABSOLUTE REQUIREMENTS FOR UNIQUENESS:
|
| 1221 |
+
|
| 1222 |
+
1. STORY STRUCTURE - Divide the story into {num_scenes} distinct narrative beats:
|
| 1223 |
+
- Each panel = ONE specific story moment that happens at a DIFFERENT time
|
| 1224 |
+
- Panel 1 happens BEFORE Panel 2, Panel 2 BEFORE Panel 3, etc.
|
| 1225 |
+
- NO panel should show the same moment or similar action
|
| 1226 |
+
- Think of it like a movie: each panel is a different scene
|
| 1227 |
+
|
| 1228 |
+
2. VISUAL VARIETY - Each panel MUST have:
|
| 1229 |
+
- DIFFERENT location or setting (if story allows)
|
| 1230 |
+
- DIFFERENT character positions and poses
|
| 1231 |
+
- DIFFERENT camera angle/shot type
|
| 1232 |
+
- DIFFERENT action or emotional beat
|
| 1233 |
+
- DIFFERENT time of day or lighting (if applicable)
|
| 1234 |
+
|
| 1235 |
+
3. SHOT TYPES - Use variety:
|
| 1236 |
+
- Extreme Wide Shot, Wide Shot, Medium Shot, Close-Up, Extreme Close-Up, Over-the-Shoulder, Low Angle, High Angle, Bird's Eye View
|
| 1237 |
+
|
| 1238 |
+
FORMAT EXACTLY LIKE THIS:
|
| 1239 |
+
Panel 1: [Title]
|
| 1240 |
+
Shot Type: [Type]
|
| 1241 |
+
Content: [Detailed description]
|
| 1242 |
+
|
| 1243 |
+
Panel 2: [Different title]
|
| 1244 |
+
Shot Type: [Different type]
|
| 1245 |
+
Content: [Completely different scene]
|
| 1246 |
+
|
| 1247 |
+
Generate all {num_scenes} panels now:"""
|
| 1248 |
+
|
| 1249 |
+
generation_config = GenerationConfig(
|
| 1250 |
+
temperature=0.9,
|
| 1251 |
+
top_p=0.95,
|
| 1252 |
+
)
|
| 1253 |
+
|
| 1254 |
+
response = model.generate_content(analysis_prompt, generation_config=generation_config)
|
| 1255 |
+
panel_descriptions = response.text.strip()
|
| 1256 |
+
|
| 1257 |
+
# Now format into the complete prompt structure
|
| 1258 |
+
grid_layout = "3x3 grid (3 rows, 3 columns)" if num_scenes == 9 else f"{num_scenes} panels"
|
| 1259 |
+
|
| 1260 |
+
complete_prompt = f'''"""CRITICAL COMMAND: UNIFORM {grid_layout.upper()} (NON-NEGOTIABLE)
|
| 1261 |
+
|
| 1262 |
+
Layout: Generate exactly {num_scenes} panels in a {grid_layout}.
|
| 1263 |
+
Panel Integrity: Every panel MUST be identical in size and shape. Do not change panel dimensions for any reason.
|
| 1264 |
+
Formatting: Use clean, equal-width white gutters between all panels and a uniform thin black border around each panel.
|
| 1265 |
+
CRITICAL RULE: SILENT COMIC - NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS EVER.
|
| 1266 |
+
CRITICAL RULE: EACH PANEL IS A DISTINCT SCENE.
|
| 1267 |
+
Each panel must depict a unique, self-contained moment or tableau from the story.
|
| 1268 |
+
PANEL-BY-PANEL STORYBOARD (READ LEFT-TO-RIGHT, TOP-TO-BOTTOM)
|
| 1269 |
+
{panel_descriptions}
|
| 1270 |
+
|
| 1271 |
+
GLOBAL STYLE & CONSISTENCY MANDATES
|
| 1272 |
+
Art Style: Modern Digital Manga
|
| 1273 |
+
|
| 1274 |
+
Line Art: Sharp digital lines, clean and precise, emphasizing dynamic movement and emotional clarity.
|
| 1275 |
+
Tones & Shading: Cel shading with clear, distinct shadows and highlights, giving a vibrant yet defined look.
|
| 1276 |
+
Composition: Every panel must have a clear focal point and excellent use of foreground, midground, and background elements.
|
| 1277 |
+
Character Consistency: Characters must maintain consistent facial features, hair, and design throughout all panels while showing progression in age, clothing, or emotional state as the story requires.
|
| 1278 |
+
Environmental & Lighting Continuity: Lighting and atmosphere should support the narrative progression and emotional tone of each scene.
|
| 1279 |
+
Color Palette: A vibrant and saturated palette that enhances the story's emotional journey.
|
| 1280 |
+
|
| 1281 |
+
FINAL OVERRIDE & NEGATIVE PROMPTS
|
| 1282 |
+
ABSOLUTE RULE: The {grid_layout} uniform grid structure is the most important rule and must be followed perfectly.
|
| 1283 |
+
NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE.
|
| 1284 |
+
"""'''
|
| 1285 |
+
|
| 1286 |
+
print(f"Generated complete prompt with {num_scenes} panels")
|
| 1287 |
+
|
| 1288 |
+
return complete_prompt
|
| 1289 |
+
|
| 1290 |
+
except Exception as e:
|
| 1291 |
+
print(f"Error generating complete prompt: {e}")
|
| 1292 |
+
return None
|
models/content/log.txt
ADDED
|
File without changes
|
models/image_generation.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import base64
|
| 3 |
+
import os
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import config
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
import warnings
|
| 8 |
+
import time
|
| 9 |
+
from google.generativeai import GenerativeModel
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
warnings.filterwarnings("ignore", message="IMAGE_SAFETY is not a valid FinishReason")
|
| 12 |
+
|
| 13 |
+
global_image_data_url = None
|
| 14 |
+
global_image_prompt = None
|
| 15 |
+
global_image_description = None
|
| 16 |
+
|
| 17 |
+
def log_execution(func):
|
| 18 |
+
def wrapper(*args, **kwargs):
|
| 19 |
+
start_time = time.time()
|
| 20 |
+
start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 21 |
+
|
| 22 |
+
result = func(*args, **kwargs)
|
| 23 |
+
|
| 24 |
+
end_time = time.time()
|
| 25 |
+
end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 26 |
+
duration = end_time - start_time
|
| 27 |
+
|
| 28 |
+
# Write to file (works in Colab)
|
| 29 |
+
with open('content/logs.txt', 'a') as f:
|
| 30 |
+
f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
|
| 31 |
+
|
| 32 |
+
# Also print to see output immediately
|
| 33 |
+
print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
|
| 34 |
+
|
| 35 |
+
return result
|
| 36 |
+
return wrapper
|
| 37 |
+
|
| 38 |
+
@log_execution
|
| 39 |
+
def generate_image_fn_deprecated (selected_prompt, model="gpt-image-1", output_path="models\benchmark"):
|
| 40 |
+
"""
|
| 41 |
+
Generate an image from the prompt via the OpenAI API using gpt-image-1.
|
| 42 |
+
Convert the image to a data URL and optionally save it to a file.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
selected_prompt (str): The prompt to generate the image from.
|
| 46 |
+
model (str): Should be "gpt-image-1". Parameter kept for compatibility.
|
| 47 |
+
output_path (str, optional): If provided, saves the image to this path. Defaults to None.
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
PIL.Image.Image or None: The generated image as a PIL Image object, or None on error.
|
| 51 |
+
"""
|
| 52 |
+
global global_image_data_url, global_image_prompt
|
| 53 |
+
|
| 54 |
+
MAX_PROMPT_LENGTH = 32000
|
| 55 |
+
if len(selected_prompt) > MAX_PROMPT_LENGTH:
|
| 56 |
+
selected_prompt = smart_truncate_prompt(selected_prompt, MAX_PROMPT_LENGTH)
|
| 57 |
+
print(f"Warning: Prompt was smartly truncated to {len(selected_prompt)} characters while preserving critical details")
|
| 58 |
+
|
| 59 |
+
global_image_prompt = selected_prompt
|
| 60 |
+
|
| 61 |
+
model = "gpt-image-1"
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", config.OPENAI_API_KEY))
|
| 65 |
+
|
| 66 |
+
api_params = {
|
| 67 |
+
"model": model,
|
| 68 |
+
"prompt": selected_prompt,
|
| 69 |
+
"size": "1024x1536" ,
|
| 70 |
+
"quality": "high",
|
| 71 |
+
"moderation":"low"
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
result = client.images.generate(**api_params)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
image_bytes = base64.b64decode(image_base64)
|
| 79 |
+
|
| 80 |
+
image = Image.open(io.BytesIO(image_bytes))
|
| 81 |
+
|
| 82 |
+
if output_path:
|
| 83 |
+
try:
|
| 84 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 85 |
+
with open(output_path, "wb") as f:
|
| 86 |
+
f.write(image_bytes)
|
| 87 |
+
print(f"Successfully saved image to {output_path}")
|
| 88 |
+
except Exception as e:
|
| 89 |
+
print(f"Error saving image to {output_path}: {str(e)}")
|
| 90 |
+
|
| 91 |
+
buffered = io.BytesIO()
|
| 92 |
+
image.save(buffered, format="PNG")
|
| 93 |
+
img_bytes = buffered.getvalue()
|
| 94 |
+
img_b64 = base64.b64encode(img_bytes).decode("utf-8")
|
| 95 |
+
global_image_data_url = f"data:image/png;base64,{img_b64}"
|
| 96 |
+
|
| 97 |
+
print(f"Successfully generated image with prompt: {selected_prompt[:50]}...")
|
| 98 |
+
return image
|
| 99 |
+
except Exception as e:
|
| 100 |
+
print(f"Error generating image: {str(e)}")
|
| 101 |
+
return None
|
| 102 |
+
@log_execution
|
| 103 |
+
def generate_image_fn(selected_prompt, model="gemini-2.5-flash-image-preview", output_path="models/benchmark"):
|
| 104 |
+
"""
|
| 105 |
+
Generate an image from the prompt via the Google Gemini API using vertexai.
|
| 106 |
+
Convert the image to a data URL and optionally save it to a file.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
selected_prompt (str): The prompt to generate the image from.
|
| 110 |
+
model (str): The Gemini model to use. Defaults to "gemini-2.5-flash-image-preview".
|
| 111 |
+
output_path (str, optional): If provided, saves the image to this path. Defaults to "models/benchmark".
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
PIL.Image.Image or None: The generated image as a PIL Image object, or None on error.
|
| 115 |
+
"""
|
| 116 |
+
global global_image_data_url, global_image_prompt
|
| 117 |
+
|
| 118 |
+
MAX_PROMPT_LENGTH = 32000
|
| 119 |
+
if len(selected_prompt) > MAX_PROMPT_LENGTH:
|
| 120 |
+
selected_prompt = smart_truncate_prompt(selected_prompt, MAX_PROMPT_LENGTH)
|
| 121 |
+
print(f"Warning: Prompt was smartly truncated to {len(selected_prompt)} characters while preserving critical details")
|
| 122 |
+
|
| 123 |
+
global_image_prompt = selected_prompt
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
from google.generativeai import GenerativeModel
|
| 127 |
+
from PIL import Image
|
| 128 |
+
import io
|
| 129 |
+
import base64
|
| 130 |
+
import os
|
| 131 |
+
|
| 132 |
+
# Initialize the Gemini model
|
| 133 |
+
gemini_model = GenerativeModel(model)
|
| 134 |
+
|
| 135 |
+
# Generate content with the prompt
|
| 136 |
+
response = gemini_model.generate_content([selected_prompt])
|
| 137 |
+
|
| 138 |
+
# Extract the generated image from the response
|
| 139 |
+
image = None
|
| 140 |
+
image_bytes = None
|
| 141 |
+
has_text_response = False
|
| 142 |
+
|
| 143 |
+
for part in response.candidates[0].content.parts:
|
| 144 |
+
# Check for text responses (ignore these)
|
| 145 |
+
if hasattr(part, 'text') and part.text:
|
| 146 |
+
has_text_response = True
|
| 147 |
+
print(f"Ignoring text response from API: {part.text[:100]}...")
|
| 148 |
+
continue
|
| 149 |
+
|
| 150 |
+
# Look for image data
|
| 151 |
+
if hasattr(part, 'inline_data') and part.inline_data is not None:
|
| 152 |
+
image_bytes = part.inline_data.data
|
| 153 |
+
|
| 154 |
+
# Verify we have valid data
|
| 155 |
+
if not image_bytes or len(image_bytes) == 0:
|
| 156 |
+
print("Warning: inline_data.data is empty, skipping...")
|
| 157 |
+
continue
|
| 158 |
+
|
| 159 |
+
# Try to parse the image
|
| 160 |
+
try:
|
| 161 |
+
img_io = io.BytesIO(image_bytes)
|
| 162 |
+
image = Image.open(img_io)
|
| 163 |
+
image.load() # Force load to verify it's valid
|
| 164 |
+
print(f"Successfully loaded image: {len(image_bytes)} bytes")
|
| 165 |
+
break
|
| 166 |
+
except Exception as img_error:
|
| 167 |
+
print(f"Invalid image data received, skipping: {img_error}")
|
| 168 |
+
continue
|
| 169 |
+
|
| 170 |
+
# If we only got text and no image, return None
|
| 171 |
+
if image is None:
|
| 172 |
+
if has_text_response:
|
| 173 |
+
print("API returned text instead of image - skipping this response")
|
| 174 |
+
else:
|
| 175 |
+
print("No image data found in response")
|
| 176 |
+
return None
|
| 177 |
+
|
| 178 |
+
# Save image to file if output_path is provided
|
| 179 |
+
if output_path:
|
| 180 |
+
try:
|
| 181 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 182 |
+
# Ensure output_path has an image extension
|
| 183 |
+
if not output_path.lower().endswith(('.png', '.jpg', '.jpeg')):
|
| 184 |
+
output_path = f"{output_path}.png"
|
| 185 |
+
|
| 186 |
+
image.save(output_path)
|
| 187 |
+
print(f"Successfully saved image to {output_path}")
|
| 188 |
+
except Exception as e:
|
| 189 |
+
print(f"Error saving image to {output_path}: {str(e)}")
|
| 190 |
+
|
| 191 |
+
# Create data URL for the image
|
| 192 |
+
buffered = io.BytesIO()
|
| 193 |
+
image.save(buffered, format="PNG")
|
| 194 |
+
img_bytes = buffered.getvalue()
|
| 195 |
+
img_b64 = base64.b64encode(img_bytes).decode("utf-8")
|
| 196 |
+
global_image_data_url = f"data:image/png;base64,{img_b64}"
|
| 197 |
+
|
| 198 |
+
print(f"Successfully generated image with prompt: {selected_prompt[:50]}...")
|
| 199 |
+
return image
|
| 200 |
+
|
| 201 |
+
except Exception as e:
|
| 202 |
+
print(f"Error generating image: {str(e)}")
|
| 203 |
+
import traceback
|
| 204 |
+
traceback.print_exc()
|
| 205 |
+
return None
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
@log_execution
|
| 209 |
+
def smart_truncate_prompt(prompt, max_length):
|
| 210 |
+
"""
|
| 211 |
+
Smart truncation that preserves critical details and visual consistency information.
|
| 212 |
+
Prioritizes character descriptions, layout specifications, and technical requirements.
|
| 213 |
+
"""
|
| 214 |
+
if len(prompt) <= max_length:
|
| 215 |
+
return prompt
|
| 216 |
+
|
| 217 |
+
critical_sections = [
|
| 218 |
+
"CRITICAL LAYOUT:",
|
| 219 |
+
"🎭 CRITICAL CHARACTER CONSISTENCY PROTOCOL:",
|
| 220 |
+
"CHARACTER 1",
|
| 221 |
+
"CHARACTER 2",
|
| 222 |
+
"CHARACTER 3",
|
| 223 |
+
"STORY CONTENT:",
|
| 224 |
+
"🏗️ ENVIRONMENTAL CONSISTENCY PROTOCOL:",
|
| 225 |
+
"🎨 COMIC BOOK STYLE MASTERY:",
|
| 226 |
+
"🎨 AUTHENTIC MANGA STYLE:",
|
| 227 |
+
"🎨 PHOTOREALISTIC EXCELLENCE:",
|
| 228 |
+
"🎨 CINEMATIC VISUAL MASTERY:",
|
| 229 |
+
"🎨 HIGH-QUALITY ILLUSTRATION:",
|
| 230 |
+
"📐 PANEL COMPOSITION MASTERY:",
|
| 231 |
+
"🔍 DETAIL PRESERVATION PROTOCOL:",
|
| 232 |
+
"⚡ ADVANCED QUALITY REQUIREMENTS:"
|
| 233 |
+
]
|
| 234 |
+
|
| 235 |
+
sections = prompt.split(" || ")
|
| 236 |
+
|
| 237 |
+
preserved_sections = []
|
| 238 |
+
preserved_length = 0
|
| 239 |
+
|
| 240 |
+
for section in sections:
|
| 241 |
+
section_trimmed = section.strip()
|
| 242 |
+
if not section_trimmed:
|
| 243 |
+
continue
|
| 244 |
+
|
| 245 |
+
is_critical = any(critical_marker in section_trimmed for critical_marker in critical_sections[:8])
|
| 246 |
+
|
| 247 |
+
if is_critical or (preserved_length + len(section_trimmed) + 4 < max_length - 200):
|
| 248 |
+
preserved_sections.append(section_trimmed)
|
| 249 |
+
preserved_length += len(section_trimmed) + 4
|
| 250 |
+
elif preserved_length < max_length * 0.7:
|
| 251 |
+
available_space = max_length - preserved_length - 200
|
| 252 |
+
if available_space > 100:
|
| 253 |
+
truncated_section = section_trimmed[:available_space-20] + "..."
|
| 254 |
+
preserved_sections.append(truncated_section)
|
| 255 |
+
break
|
| 256 |
+
|
| 257 |
+
preserved_prompt = " || ".join(preserved_sections)
|
| 258 |
+
|
| 259 |
+
final_mandate = " || FINAL MANDATE: Create a masterpiece with perfect character consistency and narrative clarity"
|
| 260 |
+
if len(preserved_prompt) + len(final_mandate) <= max_length:
|
| 261 |
+
preserved_prompt += final_mandate
|
| 262 |
+
|
| 263 |
+
return preserved_prompt
|
| 264 |
+
|
models/story_generator.py
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google.generativeai import GenerativeModel
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
import os
|
| 5 |
+
import datetime
|
| 6 |
+
import openai
|
| 7 |
+
import config
|
| 8 |
+
import time
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def log_execution(func):
|
| 13 |
+
def wrapper(*args, **kwargs):
|
| 14 |
+
start_time = time.time()
|
| 15 |
+
start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 16 |
+
|
| 17 |
+
result = func(*args, **kwargs)
|
| 18 |
+
|
| 19 |
+
end_time = time.time()
|
| 20 |
+
end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 21 |
+
duration = end_time - start_time
|
| 22 |
+
|
| 23 |
+
# Write to file (works in Colab)
|
| 24 |
+
with open('content/logs.txt', 'a') as f:
|
| 25 |
+
f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
|
| 26 |
+
|
| 27 |
+
# Also print to see output immediately
|
| 28 |
+
print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
|
| 29 |
+
|
| 30 |
+
return result
|
| 31 |
+
return wrapper
|
| 32 |
+
|
| 33 |
+
class StoryGenerator:
|
| 34 |
+
"""
|
| 35 |
+
Direct story generator that creates comic panel style stories from user input.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def __init__(self):
|
| 39 |
+
self.model = GenerativeModel('gemini-2.5-flash')
|
| 40 |
+
@log_execution
|
| 41 |
+
def log_prompt(self, prompt, log_file="story_prompt_logs.jsonl"):
|
| 42 |
+
"""Log the prompt to a file for debugging and improvement purposes."""
|
| 43 |
+
log_entry = {
|
| 44 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
| 45 |
+
"prompt": prompt
|
| 46 |
+
}
|
| 47 |
+
with open(log_file, "a", encoding="utf-8") as f:
|
| 48 |
+
f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
|
| 49 |
+
@log_execution
|
| 50 |
+
def enhance_user_story(self, user_description, max_retries=3, current_retry=0):
|
| 51 |
+
"""
|
| 52 |
+
Enhance the user's story with more vibrancy, detail, and narrative richness using
|
| 53 |
+
optimized AI prompting techniques for visual storytelling with smart detail preservation.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
user_description: The user's original story idea or prompt
|
| 57 |
+
max_retries: Maximum number of retry attempts (default: 3)
|
| 58 |
+
current_retry: Current retry attempt number (default: 0)
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
enhanced_story: A more vibrant and detailed version of the story with preserved key elements
|
| 62 |
+
"""
|
| 63 |
+
print(f"[StoryGenerator] Enhancing user story (attempt {current_retry + 1}/{max_retries}): {user_description[:100]}...")
|
| 64 |
+
|
| 65 |
+
if current_retry >= max_retries:
|
| 66 |
+
print(f"[StoryGenerator] Max retries reached, returning original description")
|
| 67 |
+
return user_description
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
enhancement_prompt = self._create_detail_focused_enhancement_prompt(user_description)
|
| 71 |
+
|
| 72 |
+
self.log_prompt(enhancement_prompt)
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
response = self.model.generate_content(enhancement_prompt)
|
| 76 |
+
enhanced_story = response.text.strip()
|
| 77 |
+
|
| 78 |
+
if self._validate_enhancement_quality(enhanced_story, user_description):
|
| 79 |
+
print(f"[StoryGenerator] Story successfully enhanced with detail preservation")
|
| 80 |
+
return enhanced_story
|
| 81 |
+
else:
|
| 82 |
+
print(f"[StoryGenerator] Enhancement quality insufficient, using original with minimal enhancement")
|
| 83 |
+
return self._create_minimal_enhancement(user_description)
|
| 84 |
+
|
| 85 |
+
except Exception as gemini_error:
|
| 86 |
+
print(f"[StoryGenerator] Gemini API error: {gemini_error}")
|
| 87 |
+
if current_retry < max_retries - 1:
|
| 88 |
+
print(f"[StoryGenerator] Retrying with simplified approach...")
|
| 89 |
+
return self._simplified_enhancement(user_description)
|
| 90 |
+
else:
|
| 91 |
+
raise gemini_error
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"[StoryGenerator] Enhancement error: {e}")
|
| 95 |
+
if current_retry < max_retries - 1:
|
| 96 |
+
import time
|
| 97 |
+
time.sleep(1 * (current_retry + 1))
|
| 98 |
+
return self.enhance_user_story(user_description, max_retries, current_retry + 1)
|
| 99 |
+
else:
|
| 100 |
+
print(f"[StoryGenerator] All enhancement attempts failed, returning original")
|
| 101 |
+
return user_description
|
| 102 |
+
@log_execution
|
| 103 |
+
|
| 104 |
+
def _create_detail_focused_enhancement_prompt(self, user_description):
|
| 105 |
+
"""Create a concise enhancement prompt that adds coherence and enough detail for the required number of scenes."""
|
| 106 |
+
return f"""
|
| 107 |
+
You are an expert visual storytelling assistant. Enhance the user's story concept to create a rich visual narrative.
|
| 108 |
+
|
| 109 |
+
ORIGINAL STORY: "{user_description}"
|
| 110 |
+
|
| 111 |
+
ENHANCEMENT GOALS:
|
| 112 |
+
• Define key character appearances (visual features, clothing).
|
| 113 |
+
• Establish a clear setting and atmosphere.
|
| 114 |
+
• Outline a logical scene progression that can be broken down into multiple action-focused panels.
|
| 115 |
+
• Ensure visual consistency for characters and locations.
|
| 116 |
+
• Descriptions should be concise yet vivid, focusing on elements crucial for an action-oriented digital comic.
|
| 117 |
+
|
| 118 |
+
OUTPUT: Enhanced story description (2-3 paragraphs maximum) that provides a strong foundation for a multi-panel, action-focused visual story. Ensure the tone is suitable for a modern digital comic.
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
def _validate_enhancement_quality(self, enhanced_story, original_story):
|
| 122 |
+
"""Validate that the enhancement adds coherence and appropriate detail."""
|
| 123 |
+
if not enhanced_story or len(enhanced_story) < 50:
|
| 124 |
+
return False
|
| 125 |
+
|
| 126 |
+
enhanced_words = len(enhanced_story.split())
|
| 127 |
+
original_words = len(original_story.split())
|
| 128 |
+
|
| 129 |
+
if enhanced_words < original_words or enhanced_words > original_words * 5:
|
| 130 |
+
return False
|
| 131 |
+
|
| 132 |
+
story_elements = ['character', 'scene', 'story', 'visual', 'setting', 'action']
|
| 133 |
+
has_story_elements = sum(1 for element in story_elements if element.lower() in enhanced_story.lower())
|
| 134 |
+
|
| 135 |
+
if has_story_elements < 2:
|
| 136 |
+
return False
|
| 137 |
+
|
| 138 |
+
similarity_threshold = 0.8
|
| 139 |
+
original_lower = original_story.lower()
|
| 140 |
+
enhanced_lower = enhanced_story.lower()
|
| 141 |
+
|
| 142 |
+
common_words = set(original_lower.split()) & set(enhanced_lower.split())
|
| 143 |
+
original_unique = len(set(original_lower.split()))
|
| 144 |
+
|
| 145 |
+
if original_unique > 0:
|
| 146 |
+
similarity = len(common_words) / original_unique
|
| 147 |
+
if similarity > similarity_threshold and enhanced_words < original_words * 1.5:
|
| 148 |
+
return False
|
| 149 |
+
|
| 150 |
+
return True
|
| 151 |
+
@log_execution
|
| 152 |
+
|
| 153 |
+
def _create_minimal_enhancement(self, user_description):
|
| 154 |
+
"""Create minimal enhancement that preserves original while adding basic coherence for the required number of scenes."""
|
| 155 |
+
|
| 156 |
+
enhanced = f"""
|
| 157 |
+
Enhanced Story: {user_description}
|
| 158 |
+
|
| 159 |
+
Visual Coherence Elements:
|
| 160 |
+
- Main character with consistent appearance throughout all scenes
|
| 161 |
+
- Clear setting that remains visually consistent
|
| 162 |
+
- Logical progression suitable for the required number of sequential panels
|
| 163 |
+
- Simple but complete story arc with beginning, middle, and end
|
| 164 |
+
|
| 165 |
+
This story will unfold across the required number of scenes showing the character's journey with visual consistency and narrative coherence.
|
| 166 |
+
"""
|
| 167 |
+
|
| 168 |
+
return enhanced.strip()
|
| 169 |
+
@log_execution
|
| 170 |
+
|
| 171 |
+
def _simplified_enhancement(self, user_description):
|
| 172 |
+
"""
|
| 173 |
+
Simplified enhancement fallback when the main enhancement fails.
|
| 174 |
+
|
| 175 |
+
Args:
|
| 176 |
+
user_description: Original user story description
|
| 177 |
+
|
| 178 |
+
Returns:
|
| 179 |
+
str: Simplified enhanced description focused on coherence for the required number of scenes.
|
| 180 |
+
"""
|
| 181 |
+
try:
|
| 182 |
+
simplified_prompt = f"""
|
| 183 |
+
Briefly enhance this story for an action-focused visual narrative. Keep it concise and coherent.
|
| 184 |
+
|
| 185 |
+
Original: "{user_description}"
|
| 186 |
+
|
| 187 |
+
Focus on:
|
| 188 |
+
- Core character appearance notes.
|
| 189 |
+
- Main setting description.
|
| 190 |
+
- Basic story flow suitable for action scenes.
|
| 191 |
+
- Visual consistency hints.
|
| 192 |
+
|
| 193 |
+
Enhanced story (1-2 sentences):
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
response = self.model.generate_content(simplified_prompt)
|
| 197 |
+
enhanced_story = response.text.strip()
|
| 198 |
+
|
| 199 |
+
if enhanced_story and len(enhanced_story) > 20:
|
| 200 |
+
print(f"[StoryGenerator] Used simplified enhancement successfully")
|
| 201 |
+
return enhanced_story
|
| 202 |
+
else:
|
| 203 |
+
return user_description
|
| 204 |
+
|
| 205 |
+
except Exception as e:
|
| 206 |
+
print(f"[StoryGenerator] Simplified enhancement also failed: {e}")
|
| 207 |
+
return user_description
|
| 208 |
+
@log_execution
|
| 209 |
+
def generate_story(self, user_description, panels_per_page=9, num_pages=1):
|
| 210 |
+
"""
|
| 211 |
+
Generate a comic panel style story directly from user input.
|
| 212 |
+
|
| 213 |
+
Args:
|
| 214 |
+
user_description: The user's story idea or prompt
|
| 215 |
+
panels_per_page: Number of panels per comic page (default is 8)
|
| 216 |
+
num_pages: Number of pages to generate (default is 1)
|
| 217 |
+
|
| 218 |
+
Returns:
|
| 219 |
+
story_data: Structured data for the story with panels organized by pages
|
| 220 |
+
"""
|
| 221 |
+
enhanced_story = self.enhance_user_story(user_description)
|
| 222 |
+
|
| 223 |
+
panels_per_page = 9
|
| 224 |
+
total_panels = panels_per_page * num_pages
|
| 225 |
+
print(f"[StoryGenerator] Generating comic story with {num_pages} pages, {panels_per_page} panels per page ({total_panels} total panels) from enhanced story...")
|
| 226 |
+
|
| 227 |
+
query = f"""
|
| 228 |
+
You are a world-class comic book writer and visual storyteller. Your task is to create a SINGLE CONTINUOUS STORY.
|
| 229 |
+
The story will span exactly {num_pages} pages. Each page must contain exactly {panels_per_page} sequential action-focused panels (total of {total_panels} panels).
|
| 230 |
+
The final output must be a modern, digital-style comic with high quality and resolution, suitable for a 1024x1536 image size. **All {panels_per_page} panels must fit entirely within the page with clear gutters—no panel content may be cropped or cut off.**
|
| 231 |
+
Avoid any deformities, missing limbs, distorted or missing facial features, blurry visuals, or sketch styles. Ensure all panels are exactly the same size.
|
| 232 |
+
|
| 233 |
+
STORY CONCEPT:
|
| 234 |
+
"{enhanced_story}"
|
| 235 |
+
|
| 236 |
+
KEY REQUIREMENTS:
|
| 237 |
+
1. **Panel Count & Style**: Strictly {panels_per_page} action scenes per page. No filler. All scenes must be dynamic and contribute to the story's momentum.
|
| 238 |
+
2. **Visual Quality**: Generate ultra-high quality, modern digital comic art. Ensure no visual defects (deformities, missing limbs, distorted faces). All panels must be suitable for a combined 1024x1536 page layout.
|
| 239 |
+
3. **Continuity**:
|
| 240 |
+
* Story must flow seamlessly page-to-page and panel-to-panel.
|
| 241 |
+
* Maintain consistent character appearances (detailed in a character sheet you will generate) and settings (detailed in a setting guide you will generate).
|
| 242 |
+
* Logical plot progression: actions have clear causes and effects.
|
| 243 |
+
* Show passage of time clearly (e.g., "later," "next day").
|
| 244 |
+
4. **Narrative Structure**:
|
| 245 |
+
* Complete arc: beginning, rising action, climax, resolution.
|
| 246 |
+
* Meaningful character development and motivations.
|
| 247 |
+
5. **Visual Storytelling Focus**:
|
| 248 |
+
* Descriptions should emphasize actions, expressions, and settings to make the story understandable through visuals alone.
|
| 249 |
+
* Each panel description needs: camera angle, character positions, expressions, environment details, color palette, and mood.
|
| 250 |
+
* Focus on clear, dynamic action sequences.
|
| 251 |
+
|
| 252 |
+
JSON OUTPUT STRUCTURE:
|
| 253 |
+
{{
|
| 254 |
+
"title": "Overall Story Title",
|
| 255 |
+
"premise": "Brief story overview, themes, and setting.",
|
| 256 |
+
"characters": [
|
| 257 |
+
{{
|
| 258 |
+
"name": "Character Name",
|
| 259 |
+
"visual_description": "DETAILED visual description: height, build, face, hair, clothing. CRITICAL for consistency.",
|
| 260 |
+
"traits": ["Key visual trait 1", "Key visual trait 2"],
|
| 261 |
+
"background": "Brief backstory.",
|
| 262 |
+
"arc": "Character's journey/change."
|
| 263 |
+
}}
|
| 264 |
+
// ... (add more characters as needed)
|
| 265 |
+
],
|
| 266 |
+
"settings": [
|
| 267 |
+
{{
|
| 268 |
+
"name": "Setting Name",
|
| 269 |
+
"description": "DETAILED visual description of the location, including key elements for consistency.",
|
| 270 |
+
"visual_elements": ["Notable visual element 1", "Notable visual trait 2"],
|
| 271 |
+
"mood": "Atmosphere of the location."
|
| 272 |
+
}}
|
| 273 |
+
// ... (add more settings as needed)
|
| 274 |
+
],
|
| 275 |
+
"pages": [
|
| 276 |
+
{{
|
| 277 |
+
"page_number": 1,
|
| 278 |
+
"panels": [ // Exactly {panels_per_page} panels
|
| 279 |
+
{{
|
| 280 |
+
"panel_number": 1,
|
| 281 |
+
"title": "Action-Oriented Panel Title",
|
| 282 |
+
"visual_description": "ACTION-FOCUSED, extremely detailed description: character actions, expressions, positions, environment, lighting, colors, camera angle. Ensure it fits 1024x1536 page context. NO FILLER.",
|
| 283 |
+
"text": "Dialogue/narration (context only, not for image)",
|
| 284 |
+
"purpose": "How this ACTION panel drives the story.",
|
| 285 |
+
"symbolism": "Any visual symbols."
|
| 286 |
+
}}
|
| 287 |
+
// ... (repeat for all {panels_per_page} panels on page 1)
|
| 288 |
+
]
|
| 289 |
+
}}
|
| 290 |
+
// ... (repeat for all {num_pages} pages)
|
| 291 |
+
]
|
| 292 |
+
}}
|
| 293 |
+
|
| 294 |
+
REMEMBER:
|
| 295 |
+
- Focus on ACTION scenes. Eliminate all filler.
|
| 296 |
+
- Visuals are paramount. Descriptions must be rich and allow for image generation that tells the story without text.
|
| 297 |
+
- Adhere strictly to {panels_per_page} panels per page.
|
| 298 |
+
- Ensure top-tier digital art quality with no visual errors.
|
| 299 |
+
- All panels on a page contribute to a single 1024x1536 image.
|
| 300 |
+
"""
|
| 301 |
+
|
| 302 |
+
self.log_prompt(query)
|
| 303 |
+
response = self.model.generate_content(query)
|
| 304 |
+
|
| 305 |
+
try:
|
| 306 |
+
json_match = re.search(r'\{[\s\S]*\}', response.text, re.DOTALL)
|
| 307 |
+
if json_match:
|
| 308 |
+
json_str = json_match.group(0)
|
| 309 |
+
|
| 310 |
+
json_str = self._fix_json(json_str)
|
| 311 |
+
|
| 312 |
+
story_data = json.loads(json_str)
|
| 313 |
+
|
| 314 |
+
story_data = self._validate_and_fix_structure(story_data, panels_per_page, num_pages)
|
| 315 |
+
|
| 316 |
+
print(f"[StoryGenerator] Successfully generated story: {story_data.get('title', 'Untitled')}")
|
| 317 |
+
return story_data
|
| 318 |
+
else:
|
| 319 |
+
print("[StoryGenerator] No valid JSON found in response.")
|
| 320 |
+
raise ValueError("No valid JSON found in response")
|
| 321 |
+
except Exception as e:
|
| 322 |
+
print(f"Error in StoryGenerator: {e}")
|
| 323 |
+
return self._create_fallback_story(user_description, panels_per_page, num_pages)
|
| 324 |
+
@log_execution
|
| 325 |
+
def _validate_and_fix_structure(self, story_data, panels_per_page, num_pages):
|
| 326 |
+
"""Validate and fix the story structure if needed."""
|
| 327 |
+
if "title" not in story_data:
|
| 328 |
+
story_data["title"] = "Untitled Comic"
|
| 329 |
+
|
| 330 |
+
if "premise" not in story_data:
|
| 331 |
+
story_data["premise"] = "A visual story."
|
| 332 |
+
|
| 333 |
+
if "characters" not in story_data:
|
| 334 |
+
story_data["characters"] = []
|
| 335 |
+
|
| 336 |
+
for character in story_data.get("characters", []):
|
| 337 |
+
if "visual_description" not in character:
|
| 338 |
+
character["visual_description"] = "A character in the story."
|
| 339 |
+
if "traits" not in character:
|
| 340 |
+
character["traits"] = []
|
| 341 |
+
if "background" not in character:
|
| 342 |
+
character["background"] = "Unknown background."
|
| 343 |
+
if "arc" not in character:
|
| 344 |
+
character["arc"] = "Experiences events throughout the story."
|
| 345 |
+
|
| 346 |
+
if "settings" not in story_data:
|
| 347 |
+
story_data["settings"] = []
|
| 348 |
+
|
| 349 |
+
for setting in story_data.get("settings", []):
|
| 350 |
+
if "description" not in setting:
|
| 351 |
+
setting["description"] = "A location in the story."
|
| 352 |
+
if "visual_elements" not in setting:
|
| 353 |
+
setting["visual_elements"] = []
|
| 354 |
+
if "mood" not in setting:
|
| 355 |
+
setting["mood"] = "Neutral."
|
| 356 |
+
|
| 357 |
+
if "pages" not in story_data:
|
| 358 |
+
if "panels" in story_data:
|
| 359 |
+
panels = story_data.pop("panels")
|
| 360 |
+
story_data["pages"] = []
|
| 361 |
+
|
| 362 |
+
for i in range(num_pages):
|
| 363 |
+
start_idx = i * panels_per_page
|
| 364 |
+
end_idx = start_idx + panels_per_page
|
| 365 |
+
page_panels = panels[start_idx:end_idx] if start_idx < len(panels) else []
|
| 366 |
+
|
| 367 |
+
while len(page_panels) < panels_per_page:
|
| 368 |
+
panel_num = len(page_panels) + 1 + (i * panels_per_page)
|
| 369 |
+
page_panels.append({
|
| 370 |
+
"panel_number": panel_num,
|
| 371 |
+
"title": f"Panel {panel_num}",
|
| 372 |
+
"visual_description": "A placeholder panel",
|
| 373 |
+
"text": "",
|
| 374 |
+
"purpose": "Continuation of the story",
|
| 375 |
+
"symbolism": ""
|
| 376 |
+
})
|
| 377 |
+
|
| 378 |
+
story_data["pages"].append({
|
| 379 |
+
"page_number": i + 1,
|
| 380 |
+
"panels": page_panels
|
| 381 |
+
})
|
| 382 |
+
else:
|
| 383 |
+
story_data["pages"] = []
|
| 384 |
+
for i in range(num_pages):
|
| 385 |
+
page_panels = []
|
| 386 |
+
for j in range(panels_per_page):
|
| 387 |
+
panel_num = j + 1 + (i * panels_per_page)
|
| 388 |
+
page_panels.append({
|
| 389 |
+
"panel_number": panel_num,
|
| 390 |
+
"title": f"Panel {panel_num}",
|
| 391 |
+
"visual_description": "A placeholder panel",
|
| 392 |
+
"text": "",
|
| 393 |
+
"purpose": "Continuation of the story",
|
| 394 |
+
"symbolism": ""
|
| 395 |
+
})
|
| 396 |
+
|
| 397 |
+
story_data["pages"].append({
|
| 398 |
+
"page_number": i + 1,
|
| 399 |
+
"panels": page_panels
|
| 400 |
+
})
|
| 401 |
+
|
| 402 |
+
for i in range(len(story_data["pages"]) - 1):
|
| 403 |
+
current_page = story_data["pages"][i]
|
| 404 |
+
next_page = story_data["pages"][i + 1]
|
| 405 |
+
|
| 406 |
+
if "panels" in current_page and "panels" in next_page and current_page["panels"] and next_page["panels"]:
|
| 407 |
+
last_panel = current_page["panels"][-1]
|
| 408 |
+
first_panel = next_page["panels"][0]
|
| 409 |
+
|
| 410 |
+
last_panel_desc = last_panel.get("visual_description", "")
|
| 411 |
+
last_panel_action = last_panel.get("text", "")
|
| 412 |
+
|
| 413 |
+
continuity_note = f"Continues directly from page {current_page.get('page_number', i+1)}, panel {last_panel.get('panel_number', len(current_page['panels']))}: {last_panel_desc[:100]}..."
|
| 414 |
+
|
| 415 |
+
first_panel["continuity_note"] = continuity_note
|
| 416 |
+
|
| 417 |
+
if "visual_description" in first_panel:
|
| 418 |
+
if not first_panel["visual_description"].startswith("CONTINUING DIRECTLY"):
|
| 419 |
+
first_panel["visual_description"] = "CONTINUING DIRECTLY from previous page: " + first_panel["visual_description"]
|
| 420 |
+
|
| 421 |
+
for i, page in enumerate(story_data["pages"]):
|
| 422 |
+
if "page_number" not in page:
|
| 423 |
+
page["page_number"] = i + 1
|
| 424 |
+
|
| 425 |
+
if "panels" not in page:
|
| 426 |
+
page["panels"] = []
|
| 427 |
+
|
| 428 |
+
if len(page["panels"]) > panels_per_page:
|
| 429 |
+
page["panels"] = page["panels"][:panels_per_page]
|
| 430 |
+
|
| 431 |
+
while len(page["panels"]) < panels_per_page:
|
| 432 |
+
panel_num = len(page["panels"]) + 1 + (i * panels_per_page)
|
| 433 |
+
|
| 434 |
+
context_desc = ""
|
| 435 |
+
if page["panels"]:
|
| 436 |
+
prev_panel = page["panels"][-1]
|
| 437 |
+
prev_desc = prev_panel.get("visual_description", "")
|
| 438 |
+
context_desc = f"Continuing from previous panel: {prev_desc[:50]}... "
|
| 439 |
+
|
| 440 |
+
page["panels"].append({
|
| 441 |
+
"panel_number": panel_num,
|
| 442 |
+
"title": f"Panel {panel_num}",
|
| 443 |
+
"visual_description": f"{context_desc}A scene related to the story, moving the narrative forward.",
|
| 444 |
+
"text": "",
|
| 445 |
+
"purpose": "Continuation of the story progression",
|
| 446 |
+
"symbolism": ""
|
| 447 |
+
})
|
| 448 |
+
|
| 449 |
+
for j, panel in enumerate(page["panels"]):
|
| 450 |
+
panel_num = j + 1 + (i * panels_per_page)
|
| 451 |
+
|
| 452 |
+
if "panel_number" not in panel:
|
| 453 |
+
panel["panel_number"] = panel_num
|
| 454 |
+
|
| 455 |
+
if "title" not in panel or not panel["title"]:
|
| 456 |
+
panel["title"] = f"Panel {panel_num}"
|
| 457 |
+
|
| 458 |
+
if "visual_description" not in panel or not panel["visual_description"]:
|
| 459 |
+
context_desc = ""
|
| 460 |
+
if j > 0:
|
| 461 |
+
prev_panel = page["panels"][j-1]
|
| 462 |
+
prev_desc = prev_panel.get("visual_description", "")
|
| 463 |
+
context_desc = f"Following from previous panel: {prev_desc[:50]}... "
|
| 464 |
+
|
| 465 |
+
panel["visual_description"] = f"{context_desc}A scene that advances the story narrative."
|
| 466 |
+
|
| 467 |
+
if "text" not in panel:
|
| 468 |
+
panel["text"] = ""
|
| 469 |
+
|
| 470 |
+
if "purpose" not in panel:
|
| 471 |
+
panel["purpose"] = "Advancing the story progression"
|
| 472 |
+
|
| 473 |
+
if "symbolism" not in panel:
|
| 474 |
+
panel["symbolism"] = ""
|
| 475 |
+
|
| 476 |
+
while len(story_data["pages"]) < num_pages:
|
| 477 |
+
page_num = len(story_data["pages"]) + 1
|
| 478 |
+
page_panels = []
|
| 479 |
+
|
| 480 |
+
context_from_prev_page = ""
|
| 481 |
+
if story_data["pages"]:
|
| 482 |
+
prev_page = story_data["pages"][-1]
|
| 483 |
+
if prev_page.get("panels"):
|
| 484 |
+
last_panel = prev_page["panels"][-1]
|
| 485 |
+
last_desc = last_panel.get("visual_description", "")
|
| 486 |
+
context_from_prev_page = f"Continuing directly from the previous page: {last_desc[:100]}... "
|
| 487 |
+
|
| 488 |
+
for j in range(panels_per_page):
|
| 489 |
+
panel_num = j + 1 + ((page_num - 1) * panels_per_page)
|
| 490 |
+
|
| 491 |
+
panel_desc = "A scene that advances the story narrative."
|
| 492 |
+
if j == 0 and context_from_prev_page:
|
| 493 |
+
panel_desc = context_from_prev_page + panel_desc
|
| 494 |
+
elif j > 0 and page_panels:
|
| 495 |
+
prev_panel = page_panels[j-1]
|
| 496 |
+
prev_desc = prev_panel.get("visual_description", "")
|
| 497 |
+
panel_desc = f"Following from previous panel: {prev_desc[:50]}... " + panel_desc
|
| 498 |
+
|
| 499 |
+
page_panels.append({
|
| 500 |
+
"panel_number": panel_num,
|
| 501 |
+
"title": f"Panel {panel_num}",
|
| 502 |
+
"visual_description": panel_desc,
|
| 503 |
+
"text": "",
|
| 504 |
+
"purpose": "Advancing the story progression",
|
| 505 |
+
"symbolism": ""
|
| 506 |
+
})
|
| 507 |
+
|
| 508 |
+
story_data["pages"].append({
|
| 509 |
+
"page_number": page_num,
|
| 510 |
+
"panels": page_panels
|
| 511 |
+
})
|
| 512 |
+
|
| 513 |
+
return story_data
|
| 514 |
+
@log_execution
|
| 515 |
+
def _create_fallback_story(self, user_description, panels_per_page, num_pages):
|
| 516 |
+
"""Create a basic fallback story structure if generation fails."""
|
| 517 |
+
pages = []
|
| 518 |
+
|
| 519 |
+
for i in range(num_pages):
|
| 520 |
+
page_panels = []
|
| 521 |
+
for j in range(panels_per_page):
|
| 522 |
+
panel_num = j + 1 + (i * panels_per_page)
|
| 523 |
+
page_panels.append({
|
| 524 |
+
"panel_number": panel_num,
|
| 525 |
+
"title": f"Panel {panel_num}",
|
| 526 |
+
"visual_description": f"A scene related to {user_description[:30]}...",
|
| 527 |
+
"text": f"Text for panel {panel_num}",
|
| 528 |
+
"purpose": f"Part of the story progression",
|
| 529 |
+
"symbolism": ""
|
| 530 |
+
})
|
| 531 |
+
|
| 532 |
+
pages.append({
|
| 533 |
+
"page_number": i + 1,
|
| 534 |
+
"panels": page_panels
|
| 535 |
+
})
|
| 536 |
+
|
| 537 |
+
return {
|
| 538 |
+
"title": f"A Story About {user_description[:30]}...",
|
| 539 |
+
"premise": f"A comic story about {user_description[:50]}...",
|
| 540 |
+
"pages": pages
|
| 541 |
+
}
|
| 542 |
+
@log_execution
|
| 543 |
+
|
| 544 |
+
def _fix_json(self, json_str):
|
| 545 |
+
"""Attempt to fix common JSON issues from LLM responses."""
|
| 546 |
+
json_str = re.sub(r'//.*?', '', json_str)
|
| 547 |
+
json_str = re.sub(r'/\*[\s\S]*?\*/', '', json_str, flags=re.DOTALL)
|
| 548 |
+
|
| 549 |
+
json_str = re.sub(r'([{, ]\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\s*:)', r'\1"\2"\3', json_str)
|
| 550 |
+
|
| 551 |
+
json_str = re.sub(r',(\s*[}\\]])', r'\1', json_str)
|
| 552 |
+
return json_str
|
| 553 |
+
@log_execution
|
| 554 |
+
|
| 555 |
+
def generate_panel_image_prompt(self, panel_data, style=None):
|
| 556 |
+
"""Generate a prompt for image generation from panel data."""
|
| 557 |
+
style_text = f" in {style} style" if style else ""
|
| 558 |
+
|
| 559 |
+
prompt = f"Create a comic book panel{style_text} showing: {panel_data['visual_description']}. "
|
| 560 |
+
if 'text' in panel_data and panel_data['text']:
|
| 561 |
+
prompt += f"The panel includes the dialogue: '{panel_data['text']}'. "
|
| 562 |
+
return prompt
|
new_image_splitting.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
class AutomatedCollageSplitter:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.min_segment_area_ratio = 0.01
|
| 10 |
+
self.max_segment_area_ratio = 0.95
|
| 11 |
+
self.min_aspect_ratio = 0.2
|
| 12 |
+
self.max_aspect_ratio = 5.0
|
| 13 |
+
self.min_solidity = 0.9
|
| 14 |
+
self.nms_threshold = 0.3
|
| 15 |
+
|
| 16 |
+
self.upscaler = None
|
| 17 |
+
self._initialize_upscaler()
|
| 18 |
+
|
| 19 |
+
def _initialize_upscaler(self):
|
| 20 |
+
if self.upscaler is not None:
|
| 21 |
+
return
|
| 22 |
+
try:
|
| 23 |
+
model_name = 'fsrcnn'
|
| 24 |
+
model_path = os.path.join('models', 'weights', 'FSRCNN-small_x4.pb')
|
| 25 |
+
scale = 4
|
| 26 |
+
if not os.path.isfile(model_path):
|
| 27 |
+
raise FileNotFoundError(f"Model file not found at {model_path}")
|
| 28 |
+
self.upscaler = cv2.dnn_superres.DnnSuperResImpl_create()
|
| 29 |
+
self.upscaler.readModel(model_path)
|
| 30 |
+
self.upscaler.setModel(model_name, scale)
|
| 31 |
+
print(f"✓ OpenCV DNN upscaler model loaded: {model_name} with scale x{scale}")
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"⚠ Could not initialize OpenCV DNN upscaler: {e}. Proceeding without upscaling.")
|
| 34 |
+
self.upscaler = None
|
| 35 |
+
|
| 36 |
+
def _upscale_image(self, image_array):
|
| 37 |
+
if not self.upscaler:
|
| 38 |
+
print("❌ Upscaling skipped because the upscaler is not available.")
|
| 39 |
+
return image_array
|
| 40 |
+
try:
|
| 41 |
+
return self.upscaler.upsample(image_array)
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f"❌ Error during image upscaling: {e}")
|
| 44 |
+
return image_array
|
| 45 |
+
|
| 46 |
+
def preprocess_for_contours(self, image):
|
| 47 |
+
"""Pre-process the image to make panel borders stand out for contour detection."""
|
| 48 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 49 |
+
|
| 50 |
+
binary = cv2.adaptiveThreshold(
|
| 51 |
+
gray, 255,
|
| 52 |
+
cv2.ADAPTIVE_THRESH_MEAN_C,
|
| 53 |
+
cv2.THRESH_BINARY_INV,
|
| 54 |
+
21,
|
| 55 |
+
8
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
kernel = np.ones((5, 5), np.uint8)
|
| 59 |
+
closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
|
| 60 |
+
|
| 61 |
+
return closed
|
| 62 |
+
|
| 63 |
+
def find_panel_contours(self, processed_image, original_shape):
|
| 64 |
+
"""Find and filter contours that are likely to be comic panels."""
|
| 65 |
+
contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 66 |
+
|
| 67 |
+
height, width = original_shape[:2]
|
| 68 |
+
total_area = height * width
|
| 69 |
+
min_area = total_area * self.min_segment_area_ratio
|
| 70 |
+
max_area = total_area * self.max_segment_area_ratio
|
| 71 |
+
|
| 72 |
+
potential_panels = []
|
| 73 |
+
for contour in contours:
|
| 74 |
+
area = cv2.contourArea(contour)
|
| 75 |
+
|
| 76 |
+
if not (min_area < area < max_area):
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
x, y, w, h = cv2.boundingRect(contour)
|
| 80 |
+
|
| 81 |
+
if h == 0: continue
|
| 82 |
+
aspect_ratio = w / h
|
| 83 |
+
if not (self.min_aspect_ratio < aspect_ratio < self.max_aspect_ratio):
|
| 84 |
+
continue
|
| 85 |
+
|
| 86 |
+
hull = cv2.convexHull(contour)
|
| 87 |
+
hull_area = cv2.contourArea(hull)
|
| 88 |
+
if hull_area == 0: continue
|
| 89 |
+
solidity = float(area) / hull_area
|
| 90 |
+
if solidity < self.min_solidity:
|
| 91 |
+
continue
|
| 92 |
+
|
| 93 |
+
potential_panels.append([x, y, x + w, y + h, area])
|
| 94 |
+
|
| 95 |
+
return np.array(potential_panels)
|
| 96 |
+
|
| 97 |
+
def apply_non_maximum_suppression(self, boxes):
|
| 98 |
+
"""Apply NMS to merge overlapping bounding boxes."""
|
| 99 |
+
if len(boxes) == 0:
|
| 100 |
+
return []
|
| 101 |
+
|
| 102 |
+
boxes = boxes[boxes[:, 4].argsort()[::-1]]
|
| 103 |
+
|
| 104 |
+
picked_boxes = []
|
| 105 |
+
while len(boxes) > 0:
|
| 106 |
+
best_box = boxes[0]
|
| 107 |
+
picked_boxes.append(best_box)
|
| 108 |
+
|
| 109 |
+
remaining_boxes = boxes[1:]
|
| 110 |
+
|
| 111 |
+
x1 = np.maximum(best_box[0], remaining_boxes[:, 0])
|
| 112 |
+
y1 = np.maximum(best_box[1], remaining_boxes[:, 1])
|
| 113 |
+
x2 = np.minimum(best_box[2], remaining_boxes[:, 2])
|
| 114 |
+
y2 = np.minimum(best_box[3], remaining_boxes[:, 3])
|
| 115 |
+
|
| 116 |
+
inter_w = np.maximum(0, x2 - x1)
|
| 117 |
+
inter_h = np.maximum(0, y2 - y1)
|
| 118 |
+
intersection_area = inter_w * inter_h
|
| 119 |
+
|
| 120 |
+
best_box_area = (best_box[2] - best_box[0]) * (best_box[3] - best_box[1])
|
| 121 |
+
remaining_boxes_area = (remaining_boxes[:, 2] - remaining_boxes[:, 0]) * (remaining_boxes[:, 3] - remaining_boxes[:, 1])
|
| 122 |
+
|
| 123 |
+
union_area = best_box_area + remaining_boxes_area - intersection_area
|
| 124 |
+
iou = intersection_area / union_area
|
| 125 |
+
|
| 126 |
+
boxes = remaining_boxes[iou < self.nms_threshold]
|
| 127 |
+
|
| 128 |
+
return np.array(picked_boxes)
|
| 129 |
+
|
| 130 |
+
def split_collage(self, image_path, output_dir=None, debug=False):
|
| 131 |
+
"""Main function to automatically split collage using contour detection and NMS."""
|
| 132 |
+
img = cv2.imread(str(image_path))
|
| 133 |
+
if img is None:
|
| 134 |
+
raise ValueError(f"Could not load image from {image_path}")
|
| 135 |
+
|
| 136 |
+
print(f"Processing image: {image_path}")
|
| 137 |
+
print(f"Image dimensions: {img.shape[1]}x{img.shape[0]}")
|
| 138 |
+
|
| 139 |
+
processed_image = self.preprocess_for_contours(img)
|
| 140 |
+
print("✓ Preprocessed image for contour detection")
|
| 141 |
+
|
| 142 |
+
potential_panels = self.find_panel_contours(processed_image, img.shape)
|
| 143 |
+
print(f"✓ Found {len(potential_panels)} potential panel contours")
|
| 144 |
+
|
| 145 |
+
final_panels = self.apply_non_maximum_suppression(potential_panels)
|
| 146 |
+
print(f"✓ Refined to {len(final_panels)} panels after Non-Maximum Suppression")
|
| 147 |
+
|
| 148 |
+
if len(final_panels) > 0:
|
| 149 |
+
img_height = img.shape[0]
|
| 150 |
+
panel_heights = [box[3] - box[1] for box in final_panels]
|
| 151 |
+
if panel_heights:
|
| 152 |
+
max_panel_height = max(panel_heights)
|
| 153 |
+
|
| 154 |
+
if max_panel_height > 0:
|
| 155 |
+
bottom_margin = 10
|
| 156 |
+
height_threshold_ratio = 0.8
|
| 157 |
+
|
| 158 |
+
truly_final_panels = []
|
| 159 |
+
for box in final_panels:
|
| 160 |
+
h = box[3] - box[1]
|
| 161 |
+
y2 = box[3]
|
| 162 |
+
|
| 163 |
+
is_at_bottom = y2 >= (img_height - bottom_margin)
|
| 164 |
+
is_too_short = h < (max_panel_height * height_threshold_ratio)
|
| 165 |
+
|
| 166 |
+
if is_at_bottom and is_too_short:
|
| 167 |
+
print(f"Skipping potentially incomplete panel at the bottom (h={h} vs max_h={max_panel_height})")
|
| 168 |
+
continue
|
| 169 |
+
truly_final_panels.append(box)
|
| 170 |
+
|
| 171 |
+
final_panels = truly_final_panels
|
| 172 |
+
|
| 173 |
+
if len(final_panels) < 4:
|
| 174 |
+
print("⚠ Contour detection found too few panels. Creating fallback 2x2 grid...")
|
| 175 |
+
h, w = img.shape[:2]
|
| 176 |
+
final_panels = np.array([
|
| 177 |
+
[0, 0, w//2, h//2, 0],
|
| 178 |
+
[w//2, 0, w, h//2, 0],
|
| 179 |
+
[0, h//2, w//2, h, 0],
|
| 180 |
+
[w//2, h//2, w, h, 0]
|
| 181 |
+
])
|
| 182 |
+
|
| 183 |
+
final_panels = sorted(final_panels, key=lambda b: (b[1], b[0]))
|
| 184 |
+
|
| 185 |
+
if output_dir is None:
|
| 186 |
+
output_dir = Path(image_path).parent / f"{Path(image_path).stem}_segments"
|
| 187 |
+
output_dir = Path(output_dir)
|
| 188 |
+
output_dir.mkdir(exist_ok=True)
|
| 189 |
+
|
| 190 |
+
saved_segments_info = []
|
| 191 |
+
for i, box in enumerate(final_panels):
|
| 192 |
+
x1, y1, x2, y2, _ = map(int, box)
|
| 193 |
+
|
| 194 |
+
padding = 3
|
| 195 |
+
x1 = max(0, x1 - padding)
|
| 196 |
+
y1 = max(0, y1 - padding)
|
| 197 |
+
x2 = min(img.shape[1], x2 + padding)
|
| 198 |
+
y2 = min(img.shape[0], y2 + padding)
|
| 199 |
+
|
| 200 |
+
segment = img[y1:y2, x1:x2]
|
| 201 |
+
original_dims = (segment.shape[1], segment.shape[0])
|
| 202 |
+
|
| 203 |
+
upscaled_segment_np = self._upscale_image(segment)
|
| 204 |
+
|
| 205 |
+
final_image = Image.fromarray(cv2.cvtColor(upscaled_segment_np, cv2.COLOR_BGR2RGB))
|
| 206 |
+
|
| 207 |
+
final_dims = (final_image.width, final_image.height)
|
| 208 |
+
output_path = output_dir / f"segment_{i+1:02d}.png"
|
| 209 |
+
final_image.save(str(output_path))
|
| 210 |
+
|
| 211 |
+
caption = (
|
| 212 |
+
f"Panel {i+1}<br>"
|
| 213 |
+
f"Original: {original_dims[0]}x{original_dims[1]}<br>"
|
| 214 |
+
f"Upscaled: {final_dims[0]}x{final_dims[1]}"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
saved_segments_info.append({
|
| 218 |
+
"path": str(output_path),
|
| 219 |
+
"caption": caption
|
| 220 |
+
})
|
| 221 |
+
print(f" Saved segment {i+1}: {final_dims[0]}x{final_dims[1]} pixels to {output_path}")
|
| 222 |
+
|
| 223 |
+
if debug:
|
| 224 |
+
debug_potential_panels = self.find_panel_contours(processed_image, img.shape)
|
| 225 |
+
self.create_debug_images(img, processed_image, debug_potential_panels, final_panels, output_dir)
|
| 226 |
+
|
| 227 |
+
print(f"\n🎉 Successfully split collage into {len(saved_segments_info)} segments!")
|
| 228 |
+
print(f"📁 Segments saved in: {output_dir}")
|
| 229 |
+
return saved_segments_info
|
| 230 |
+
|
| 231 |
+
def create_debug_images(self, original, processed, potential_boxes, final_boxes, output_dir):
|
| 232 |
+
"""Create debug images showing the processing steps."""
|
| 233 |
+
cv2.imwrite(str(output_dir / "debug_01_binary_closed.png"), processed)
|
| 234 |
+
|
| 235 |
+
potential_img = original.copy()
|
| 236 |
+
if len(potential_boxes) > 0:
|
| 237 |
+
for x1, y1, x2, y2, _ in potential_boxes:
|
| 238 |
+
cv2.rectangle(potential_img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 255), 2)
|
| 239 |
+
cv2.imwrite(str(output_dir / "debug_02_potential_boxes.png"), potential_img)
|
| 240 |
+
|
| 241 |
+
final_img = original.copy()
|
| 242 |
+
if len(final_boxes) > 0:
|
| 243 |
+
for x1, y1, x2, y2, _ in final_boxes:
|
| 244 |
+
cv2.rectangle(final_img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 3)
|
| 245 |
+
cv2.imwrite(str(output_dir / "debug_03_final_panels.png"), final_img)
|
| 246 |
+
|
| 247 |
+
print("🔍 Debug images saved:")
|
| 248 |
+
print(" - debug_01_binary_closed.png (preprocessed)")
|
| 249 |
+
print(" - debug_02_potential_boxes.png (before NMS)")
|
| 250 |
+
print(" - debug_03_final_panels.png (after NMS)")
|
| 251 |
+
|
| 252 |
+
def main():
|
| 253 |
+
"""Example usage"""
|
| 254 |
+
splitter = AutomatedCollageSplitter()
|
| 255 |
+
|
| 256 |
+
image_path = "path/to/your/comic_image.png"
|
| 257 |
+
|
| 258 |
+
try:
|
| 259 |
+
if not Path(image_path).exists():
|
| 260 |
+
print(f"❌ Image file not found: {image_path}")
|
| 261 |
+
print("Please update the image_path variable with the correct path to your collage image.")
|
| 262 |
+
return
|
| 263 |
+
|
| 264 |
+
segments = splitter.split_collage(
|
| 265 |
+
image_path=image_path,
|
| 266 |
+
debug=True
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
print(f"\n📊 Processing complete!")
|
| 270 |
+
print(f"Generated {len(segments)} separate images from the collage")
|
| 271 |
+
|
| 272 |
+
except Exception as e:
|
| 273 |
+
print(f"❌ Error processing image: {e}")
|
| 274 |
+
import traceback
|
| 275 |
+
traceback.print_exc()
|
| 276 |
+
|
| 277 |
+
if __name__ == "__main__":
|
| 278 |
+
main()
|
notes.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Image Processing Pipeline Notes
|
| 2 |
+
|
| 3 |
+
## Task Tracking
|
| 4 |
+
|
| 5 |
+
| Task | Status |
|
| 6 |
+
|------|--------|
|
| 7 |
+
| Split images correctly and discard half-generated images | ✅ |
|
| 8 |
+
| Increase split images sizes | ✅ |
|
| 9 |
+
| Upscale images and increase resolution | ✅ |
|
| 10 |
+
| Ensure everything is working correctly | ✅ |
|
| 11 |
+
| Review and adjust images | ✅ |
|
| 12 |
+
| Use better Image processing techniques and models | ⬜ |
|
| 13 |
+
|
| 14 |
+
## Original Task List
|
| 15 |
+
1. Split the images correctly and discard half generated images.
|
| 16 |
+
2. Increase split images sizes.
|
| 17 |
+
3. Upscale images and increase resolution.
|
| 18 |
+
4. Ensure everything is working correctly.
|
| 19 |
+
|
| 20 |
+
## Process Flow
|
| 21 |
+
|
| 22 |
+
```mermaid
|
| 23 |
+
graph TD
|
| 24 |
+
A[Input Image] --> B[Split Images]
|
| 25 |
+
B --> C{Quality Check}
|
| 26 |
+
C -->|Good Quality| D[Size Increase]
|
| 27 |
+
C -->|Poor Quality| E[Discard]
|
| 28 |
+
D --> F[Upscale]
|
| 29 |
+
F --> G[Resolution Enhancement]
|
| 30 |
+
G --> H[Final Quality Check]
|
| 31 |
+
H -->|Pass| I[Final Image]
|
| 32 |
+
H -->|Fail| J[Review & Adjust]
|
| 33 |
+
J --> B
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## Notes
|
| 37 |
+
- ⬜ = Not Started/In Progress
|
| 38 |
+
- ✅ = Completed
|
| 39 |
+
- Update checkboxes as tasks are completed
|
| 40 |
+
- Follow the process flow for each image processing task
|
| 41 |
+
- Ensure quality checks are performed at each stage
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.35.0
|
| 2 |
+
pillow==11.2.1
|
| 3 |
+
python-dotenv==1.1.0
|
| 4 |
+
google-generativeai==0.8.4
|
| 5 |
+
google-auth-oauthlib==1.2.1
|
| 6 |
+
google-api-python-client==2.166.0
|
| 7 |
+
requests==2.32.3
|
| 8 |
+
numpy==2.2.5
|
| 9 |
+
openai==1.78.0
|
| 10 |
+
opencv-contrib-python
|
| 11 |
+
matplotlib
|
start.sh
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
nohup python3 app.py > output.log 2>&1 &
|
| 5 |
+
|
| 6 |
+
echo "starting server"
|
| 7 |
+
|
token.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d9f3572ff848d00c9fdface71ac47338c510ae286d1dfd7128625eb75a5956a
|
| 3 |
+
size 1002
|
ui/Compumacy-Logo-Trans2.png
ADDED
|
ui/__init__.py
ADDED
|
File without changes
|
ui/content/log.txt
ADDED
|
File without changes
|
ui/story_interface.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import time
|
| 3 |
+
from utils.story_management import (
|
| 4 |
+
generate_direct_comic,
|
| 5 |
+
extract_comic_scenes,
|
| 6 |
+
load_narration_from_file
|
| 7 |
+
)
|
| 8 |
+
from config import IMAGE_STYLES, IMAGE_STYLE_INFO, AGE_GROUPS
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
def log_execution(func):
|
| 12 |
+
def wrapper(*args, **kwargs):
|
| 13 |
+
start_time = time.time()
|
| 14 |
+
start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 15 |
+
|
| 16 |
+
result = func(*args, **kwargs)
|
| 17 |
+
|
| 18 |
+
end_time = time.time()
|
| 19 |
+
end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 20 |
+
duration = end_time - start_time
|
| 21 |
+
|
| 22 |
+
# Write to file (works in Colab)
|
| 23 |
+
with open('content/logs.txt', 'a') as f:
|
| 24 |
+
f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
|
| 25 |
+
|
| 26 |
+
# Also print to see output immediately
|
| 27 |
+
print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
|
| 28 |
+
|
| 29 |
+
return result
|
| 30 |
+
return wrapper
|
| 31 |
+
@log_execution
|
| 32 |
+
def create_story_interface(demo: gr.Blocks) -> gr.Blocks:
|
| 33 |
+
"""Create the main story interface with comic generation functionality.
|
| 34 |
+
|
| 35 |
+
This function initializes the primary UI interface for the comic generation system,
|
| 36 |
+
setting up the main tab structure and components.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
demo (gr.Blocks): The Gradio Blocks instance to build the interface on
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
gr.Blocks: The configured Gradio interface with all components initialized
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
create_quick_comic_tab()
|
| 46 |
+
|
| 47 |
+
return demo
|
| 48 |
+
def create_quick_comic_tab() -> None:
|
| 49 |
+
"""Create a simple tab for direct prompt-to-image comic generation.
|
| 50 |
+
|
| 51 |
+
Sets up the main comic generation interface with the following components:
|
| 52 |
+
- Story prompt input field
|
| 53 |
+
- AI prompt enhancement option
|
| 54 |
+
- Visual style selection
|
| 55 |
+
- Number of scenes selector
|
| 56 |
+
- Generation controls
|
| 57 |
+
- Image display area
|
| 58 |
+
- Scene navigation system
|
| 59 |
+
|
| 60 |
+
The interface allows users to:
|
| 61 |
+
1. Input their story description
|
| 62 |
+
2. Configure generation parameters
|
| 63 |
+
3. Generate a multi-panel comic
|
| 64 |
+
4. View and navigate through individual scenes
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
with gr.Column():
|
| 68 |
+
|
| 69 |
+
gr.Markdown("Welcome to Hekaya ")
|
| 70 |
+
|
| 71 |
+
with gr.Row():
|
| 72 |
+
with gr.Column(scale=3):
|
| 73 |
+
user_prompt = gr.Textbox(
|
| 74 |
+
label="What Hekaya story would you like to visualize?",
|
| 75 |
+
placeholder="Describe your story with main characters and settings... (e.g., 'A young wizard learning magic in an ancient castle')",
|
| 76 |
+
lines=4
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
enrich_prompt = gr.Checkbox(
|
| 80 |
+
label="Enhance prompt with AI for coherence",
|
| 81 |
+
value=True,
|
| 82 |
+
info="Use AI to add just enough detail and coherence for consistent visual storytelling across all scenes"
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
with gr.Column(scale=1):
|
| 86 |
+
comic_style = gr.Dropdown(
|
| 87 |
+
label="Visual Style",
|
| 88 |
+
choices=IMAGE_STYLES,
|
| 89 |
+
value="Comic Book Style"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
style_description = gr.Markdown(
|
| 93 |
+
value=f"*{IMAGE_STYLE_INFO['Comic Book Style']}*",
|
| 94 |
+
label="Style Description"
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
age_group = gr.Dropdown(
|
| 98 |
+
label="Target Age Group",
|
| 99 |
+
choices=AGE_GROUPS,
|
| 100 |
+
value="9-12 (Pre-teen)",
|
| 101 |
+
info="Select the audience age group. Narration language, detail, and length will adapt automatically."
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
image_quality = gr.Dropdown(
|
| 105 |
+
label="Image Quality",
|
| 106 |
+
choices=["Low", "Medium", "High"],
|
| 107 |
+
value="Low",
|
| 108 |
+
info="Select the quality level for generated images. Higher quality may take longer to generate."
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
generate_btn = gr.Button("Generate Hekaya Story", variant="primary")
|
| 112 |
+
status_display = gr.Markdown("")
|
| 113 |
+
|
| 114 |
+
with gr.Row():
|
| 115 |
+
with gr.Column(scale=2):
|
| 116 |
+
comic_image = gr.Image(label="Generated Hekaya Story", type="filepath")
|
| 117 |
+
|
| 118 |
+
with gr.Column(scale=1, elem_id="save_info_container"):
|
| 119 |
+
|
| 120 |
+
gr.Markdown("Your generated story images are automatically saved locally.")
|
| 121 |
+
save_path_display = gr.Markdown("", elem_id="save_path_info")
|
| 122 |
+
|
| 123 |
+
narration_display = gr.Markdown(
|
| 124 |
+
|
| 125 |
+
visible=True,
|
| 126 |
+
elem_id="story_narration",
|
| 127 |
+
elem_classes=["story-narration-box"]
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
with gr.Column(visible=False) as scene_viewer_container:
|
| 131 |
+
|
| 132 |
+
gr.Markdown("Use the navigation buttons to view each upscaled scene individually.")
|
| 133 |
+
|
| 134 |
+
with gr.Row(equal_height=True):
|
| 135 |
+
prev_scene_btn = gr.Button("← Previous Scene", variant="secondary")
|
| 136 |
+
scene_counter = gr.Markdown("Scene 1 of 1", elem_id="scene_counter")
|
| 137 |
+
next_scene_btn = gr.Button("Next Scene →", variant="secondary")
|
| 138 |
+
|
| 139 |
+
scene_image = gr.Image(label="Current Scene", type="filepath", height=768)
|
| 140 |
+
scene_caption_display = gr.Markdown("", elem_id="scene_caption", elem_classes=["scene-caption-box"])
|
| 141 |
+
scene_save_path = gr.Markdown("", elem_id="scene_save_path_info")
|
| 142 |
+
|
| 143 |
+
scene_info = gr.State([])
|
| 144 |
+
current_scene_index = gr.State(0)
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def update_style_description(style: str) -> str:
|
| 148 |
+
"""Update the style description text when a new style is selected."""
|
| 149 |
+
return f"*{IMAGE_STYLE_INFO[style]}*"
|
| 150 |
+
|
| 151 |
+
def show_generating_message() -> str:
|
| 152 |
+
"""Display a loading message while story scenes are being generated."""
|
| 153 |
+
return "🔄 Generating your story scenes... Please wait..."
|
| 154 |
+
|
| 155 |
+
def generate_comic_with_length(user_prompt, comic_style, enrich_prompt, age_group, image_quality):
|
| 156 |
+
"""Wrapper that handles the fixed num_scenes value while passing the age group and image quality."""
|
| 157 |
+
comic_image, save_path_display, status_display, narration = generate_direct_comic(
|
| 158 |
+
user_prompt,
|
| 159 |
+
comic_style,
|
| 160 |
+
12,
|
| 161 |
+
enrich_prompt,
|
| 162 |
+
3,
|
| 163 |
+
age_group,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
if narration and narration.strip():
|
| 167 |
+
narration_formatted = f""
|
| 168 |
+
narration_update = gr.update(visible=True, value=narration_formatted)
|
| 169 |
+
else:
|
| 170 |
+
narration_update = gr.update(visible=True, value="")
|
| 171 |
+
|
| 172 |
+
return comic_image, save_path_display, status_display, narration_update
|
| 173 |
+
|
| 174 |
+
def init_scene_viewer(comic_path: str | None) -> tuple:
|
| 175 |
+
"""Initialize the scene viewer with extracted scenes from the comic image."""
|
| 176 |
+
if not comic_path:
|
| 177 |
+
return [], 0, gr.update(visible=False), None, "", "Scene 0 of 0", "No story image generated"
|
| 178 |
+
|
| 179 |
+
scene_data, save_message = extract_comic_scenes(comic_path, 0)
|
| 180 |
+
|
| 181 |
+
if not scene_data:
|
| 182 |
+
return [], 0, gr.update(visible=False), None, "", "Scene 0 of 0", "Failed to extract scenes"
|
| 183 |
+
|
| 184 |
+
first_scene = scene_data[0]
|
| 185 |
+
|
| 186 |
+
return (
|
| 187 |
+
scene_data,
|
| 188 |
+
0,
|
| 189 |
+
gr.update(visible=True),
|
| 190 |
+
first_scene['path'],
|
| 191 |
+
first_scene['caption'],
|
| 192 |
+
f"Scene 1 of {len(scene_data)}",
|
| 193 |
+
save_message
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
def update_scene_display(scene_data: list, current_index: int) -> tuple:
|
| 197 |
+
"""Update the scene viewer display with the current scene."""
|
| 198 |
+
if not scene_data:
|
| 199 |
+
return None, "", "Scene 0 of 0"
|
| 200 |
+
|
| 201 |
+
index = max(0, min(current_index, len(scene_data) - 1))
|
| 202 |
+
scene = scene_data[index]
|
| 203 |
+
|
| 204 |
+
return scene['path'], scene['caption'], f"Scene {index + 1} of {len(scene_data)}"
|
| 205 |
+
|
| 206 |
+
def navigate_to_previous_scene(idx: int) -> int:
|
| 207 |
+
"""Navigate to the previous scene in the sequence."""
|
| 208 |
+
return max(0, idx - 1)
|
| 209 |
+
|
| 210 |
+
def navigate_to_next_scene(paths: list, idx: int) -> int:
|
| 211 |
+
"""Navigate to the next scene in the sequence."""
|
| 212 |
+
return min(len(paths) - 1, idx + 1) if paths else 0
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
comic_style.change(
|
| 216 |
+
fn=update_style_description,
|
| 217 |
+
inputs=[comic_style],
|
| 218 |
+
outputs=[style_description]
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
generate_btn.click(
|
| 222 |
+
fn=show_generating_message,
|
| 223 |
+
inputs=None,
|
| 224 |
+
outputs=status_display
|
| 225 |
+
).then(
|
| 226 |
+
fn=generate_comic_with_length,
|
| 227 |
+
inputs=[user_prompt, comic_style, enrich_prompt, age_group, image_quality],
|
| 228 |
+
outputs=[comic_image, save_path_display, status_display, narration_display]
|
| 229 |
+
).then(
|
| 230 |
+
fn=init_scene_viewer,
|
| 231 |
+
inputs=[comic_image],
|
| 232 |
+
outputs=[
|
| 233 |
+
scene_info,
|
| 234 |
+
current_scene_index,
|
| 235 |
+
scene_viewer_container,
|
| 236 |
+
scene_image,
|
| 237 |
+
scene_caption_display,
|
| 238 |
+
scene_counter,
|
| 239 |
+
scene_save_path
|
| 240 |
+
]
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
prev_scene_btn.click(
|
| 244 |
+
fn=navigate_to_previous_scene,
|
| 245 |
+
inputs=[current_scene_index],
|
| 246 |
+
outputs=[current_scene_index]
|
| 247 |
+
).then(
|
| 248 |
+
fn=update_scene_display,
|
| 249 |
+
inputs=[scene_info, current_scene_index],
|
| 250 |
+
outputs=[scene_image, scene_caption_display, scene_counter]
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
next_scene_btn.click(
|
| 254 |
+
fn=navigate_to_next_scene,
|
| 255 |
+
inputs=[scene_info, current_scene_index],
|
| 256 |
+
outputs=[current_scene_index]
|
| 257 |
+
).then(
|
| 258 |
+
fn=update_scene_display,
|
| 259 |
+
inputs=[scene_info, current_scene_index],
|
| 260 |
+
outputs=[scene_image, scene_caption_display, scene_counter]
|
| 261 |
+
)
|
utils/__init__.py
ADDED
|
File without changes
|
utils/comic_panel_splitter.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def log_execution(func):
|
| 9 |
+
def wrapper(*args, **kwargs):
|
| 10 |
+
start_time = time.time()
|
| 11 |
+
start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 12 |
+
|
| 13 |
+
result = func(*args, **kwargs)
|
| 14 |
+
|
| 15 |
+
end_time = time.time()
|
| 16 |
+
end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 17 |
+
duration = end_time - start_time
|
| 18 |
+
|
| 19 |
+
# Write to file (works in Colab)
|
| 20 |
+
with open('content/logs.txt', 'a') as f:
|
| 21 |
+
f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
|
| 22 |
+
|
| 23 |
+
# Also print to see output immediately
|
| 24 |
+
print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
|
| 25 |
+
|
| 26 |
+
return result
|
| 27 |
+
return wrapper
|
| 28 |
+
|
| 29 |
+
@log_execution
|
| 30 |
+
def split_comic_panels(image_path, output_dir):
|
| 31 |
+
img = cv2.imread(image_path)
|
| 32 |
+
if img is None:
|
| 33 |
+
print(f"Error: Could not read image from {image_path}")
|
| 34 |
+
return
|
| 35 |
+
|
| 36 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 37 |
+
|
| 38 |
+
th = cv2.adaptiveThreshold(gray, 255,
|
| 39 |
+
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,
|
| 40 |
+
blockSize=51, C=10)
|
| 41 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,15))
|
| 42 |
+
clean = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel)
|
| 43 |
+
|
| 44 |
+
cnts, _ = cv2.findContours(clean, cv2.RETR_EXTERNAL,
|
| 45 |
+
cv2.CHAIN_APPROX_SIMPLE)
|
| 46 |
+
panels = []
|
| 47 |
+
for c in cnts:
|
| 48 |
+
area = cv2.contourArea(c)
|
| 49 |
+
if area < 10000:
|
| 50 |
+
continue
|
| 51 |
+
peri = cv2.arcLength(c, True)
|
| 52 |
+
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
|
| 53 |
+
panels.append((cv2.boundingRect(approx), c))
|
| 54 |
+
@log_execution
|
| 55 |
+
def panel_key(item):
|
| 56 |
+
(x,y,w,h), _ = item
|
| 57 |
+
return (y//50, x)
|
| 58 |
+
|
| 59 |
+
panels_sorted = sorted(panels, key=panel_key)
|
| 60 |
+
|
| 61 |
+
if not os.path.exists(output_dir):
|
| 62 |
+
os.makedirs(output_dir)
|
| 63 |
+
|
| 64 |
+
for i, ((x,y,w,h), contour) in enumerate(panels_sorted, 1):
|
| 65 |
+
crop = img[y:y+h, x:x+w]
|
| 66 |
+
cv2.imwrite(os.path.join(output_dir, f"panel_{i:02d}.png"), crop)
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
if not os.path.exists("comic_page.jpg"):
|
| 70 |
+
dummy_image = np.zeros((600, 800, 3), dtype=np.uint8)
|
| 71 |
+
cv2.putText(dummy_image, "Comic Page", (300, 300), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
|
| 72 |
+
cv2.rectangle(dummy_image, (50, 50), (350, 250), (255,255,255), 5)
|
| 73 |
+
cv2.rectangle(dummy_image, (400, 50), (750, 250), (255,255,255), 5)
|
| 74 |
+
cv2.rectangle(dummy_image, (50, 300), (750, 550), (255,255,255), 5)
|
| 75 |
+
cv2.imwrite("comic_page.jpg", dummy_image)
|
| 76 |
+
|
| 77 |
+
if not os.path.exists("output_panels"):
|
| 78 |
+
os.makedirs("output_panels")
|
| 79 |
+
split_comic_panels("comic_page.jpg", "output_panels")
|
| 80 |
+
print("Comic panels split and saved to output_panels directory.")
|
utils/content/log.txt
ADDED
|
File without changes
|
utils/story_management.py
ADDED
|
@@ -0,0 +1,455 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import base64
|
| 4 |
+
import time
|
| 5 |
+
from models.story_generator import StoryGenerator
|
| 6 |
+
from models.comic_image_generator import ComicImageGenerator
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import io
|
| 10 |
+
from google.generativeai import GenerativeModel, configure
|
| 11 |
+
import config
|
| 12 |
+
from new_image_splitting import AutomatedCollageSplitter
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def log_execution(func):
|
| 16 |
+
def wrapper(*args, **kwargs):
|
| 17 |
+
start_time = time.time()
|
| 18 |
+
start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 19 |
+
|
| 20 |
+
result = func(*args, **kwargs)
|
| 21 |
+
|
| 22 |
+
end_time = time.time()
|
| 23 |
+
end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
|
| 24 |
+
duration = end_time - start_time
|
| 25 |
+
|
| 26 |
+
# Write to file (works in Colab)
|
| 27 |
+
with open('content/logs.txt', 'a') as f:
|
| 28 |
+
f.write(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s\n")
|
| 29 |
+
|
| 30 |
+
# Also print to see output immediately
|
| 31 |
+
print(f"{func.__name__}, start: {start_str}, end: {end_str}, duration: {duration:.4f}s")
|
| 32 |
+
|
| 33 |
+
return result
|
| 34 |
+
return wrapper
|
| 35 |
+
@log_execution
|
| 36 |
+
def save_image_from_data_url(data_url, filename):
|
| 37 |
+
"""Save an image from a data URL to a file."""
|
| 38 |
+
if not data_url or not data_url.startswith("data:image"):
|
| 39 |
+
print(f"Invalid data URL: {data_url[:30] if data_url else None}")
|
| 40 |
+
return False
|
| 41 |
+
|
| 42 |
+
try:
|
| 43 |
+
image_data = data_url.split(",")[1]
|
| 44 |
+
image_bytes = base64.b64decode(image_data)
|
| 45 |
+
|
| 46 |
+
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
| 47 |
+
|
| 48 |
+
with open(filename, "wb") as f:
|
| 49 |
+
f.write(image_bytes)
|
| 50 |
+
|
| 51 |
+
return True
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error saving image: {e}")
|
| 54 |
+
return False
|
| 55 |
+
@log_execution
|
| 56 |
+
def generate_direct_comic(
|
| 57 |
+
user_prompt,
|
| 58 |
+
image_style=None,
|
| 59 |
+
num_scenes: int = 12,
|
| 60 |
+
enrich_prompt=True,
|
| 61 |
+
narration_length=3,
|
| 62 |
+
age_group: str | None = None,
|
| 63 |
+
):
|
| 64 |
+
"""
|
| 65 |
+
Generate a comic directly from a user prompt with optional story enhancement.
|
| 66 |
+
|
| 67 |
+
NOTE: The system is now standardized to generate exactly 12 scenes arranged in a 3×4 grid. Any value passed into
|
| 68 |
+
`num_scenes` will be overridden to maintain this consistency.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
user_prompt: The user's description of what they want in the comic
|
| 72 |
+
image_style: Style to use for comic generation
|
| 73 |
+
num_scenes: Ignored—kept for backward compatibility. Always forced to 12.
|
| 74 |
+
enrich_prompt: Whether to enhance the prompt using Gemini AI
|
| 75 |
+
narration_length: Length of narration (1=Brief, 2=Medium, 3=Detailed, 4=Very Detailed)
|
| 76 |
+
age_group: Target age group that determines the language complexity (e.g., "6-8 (Kids)")
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
comic_output_path: Path to the generated comic image
|
| 80 |
+
save_path_display: Display message about where the image was saved
|
| 81 |
+
status_display: Status message about generation success/failure
|
| 82 |
+
narration: Generated story narration
|
| 83 |
+
"""
|
| 84 |
+
start_time = time.time()
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
if not user_prompt or len(user_prompt.strip()) == 0:
|
| 88 |
+
raise ValueError("User prompt cannot be empty")
|
| 89 |
+
|
| 90 |
+
user_prompt = user_prompt.strip()
|
| 91 |
+
if len(user_prompt) > 10000:
|
| 92 |
+
user_prompt = user_prompt[:10000] + "..."
|
| 93 |
+
print("⚠️ Prompt truncated to prevent processing issues")
|
| 94 |
+
|
| 95 |
+
num_scenes = 12
|
| 96 |
+
|
| 97 |
+
if enrich_prompt:
|
| 98 |
+
try:
|
| 99 |
+
story_generator = StoryGenerator()
|
| 100 |
+
|
| 101 |
+
print(f"Enhancing user prompt: {user_prompt[:100]}...")
|
| 102 |
+
enhanced_description = story_generator.enhance_user_story(user_prompt)
|
| 103 |
+
|
| 104 |
+
if enhanced_description and len(enhanced_description.strip()) > 0:
|
| 105 |
+
print(f"Prompt successfully enhanced")
|
| 106 |
+
final_prompt = enhanced_description
|
| 107 |
+
else:
|
| 108 |
+
print("⚠️ Enhancement failed, using original prompt")
|
| 109 |
+
final_prompt = user_prompt
|
| 110 |
+
|
| 111 |
+
except Exception as enhancement_error:
|
| 112 |
+
print(f"⚠️ Story enhancement failed: {enhancement_error}")
|
| 113 |
+
print("Using original prompt without enhancement")
|
| 114 |
+
final_prompt = user_prompt
|
| 115 |
+
else:
|
| 116 |
+
print(f"Using original prompt without enhancement: {user_prompt[:100]}...")
|
| 117 |
+
final_prompt = user_prompt
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
image_generator = ComicImageGenerator()
|
| 121 |
+
except Exception as generator_error:
|
| 122 |
+
print(f"❌ Failed to create image generator: {generator_error}")
|
| 123 |
+
raise generator_error
|
| 124 |
+
|
| 125 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 126 |
+
safe_title = "".join(c if c.isalnum() or c in [' ', '_', '-'] else '_' for c in user_prompt[:20])
|
| 127 |
+
safe_title = safe_title.strip().replace(" ", "_")
|
| 128 |
+
if not safe_title:
|
| 129 |
+
safe_title = "story"
|
| 130 |
+
|
| 131 |
+
image_dir = f"Story-Generation/generated/images/{safe_title}_{timestamp}"
|
| 132 |
+
try:
|
| 133 |
+
os.makedirs(image_dir, exist_ok=True)
|
| 134 |
+
except Exception as dir_error:
|
| 135 |
+
print(f"❌ Failed to create directory: {dir_error}")
|
| 136 |
+
image_dir = f"{safe_title}_{timestamp}"
|
| 137 |
+
os.makedirs(image_dir, exist_ok=True)
|
| 138 |
+
|
| 139 |
+
comic_output_path = os.path.join(image_dir, f"story_scenes.png")
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
comic_image, data_url = image_generator.generate_comic(
|
| 143 |
+
{
|
| 144 |
+
"title": safe_title,
|
| 145 |
+
"description": final_prompt,
|
| 146 |
+
"num_scenes": num_scenes
|
| 147 |
+
},
|
| 148 |
+
output_path=comic_output_path,
|
| 149 |
+
style=image_style
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
if comic_image is None and data_url is None:
|
| 153 |
+
raise ValueError("Comic generation returned no results")
|
| 154 |
+
|
| 155 |
+
except Exception as generation_error:
|
| 156 |
+
print(f"❌ Comic generation failed: {generation_error}")
|
| 157 |
+
raise generation_error
|
| 158 |
+
|
| 159 |
+
if age_group:
|
| 160 |
+
age_to_length = {
|
| 161 |
+
"3-5 (Pre-school)": 1,
|
| 162 |
+
"6-8 (Kids)": 2,
|
| 163 |
+
"9-12 (Pre-teen)": 3,
|
| 164 |
+
"13-18 (Teen)": 3,
|
| 165 |
+
"18+ (Adult)": 4,
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
mapped_length = age_to_length.get(age_group)
|
| 169 |
+
if mapped_length is not None:
|
| 170 |
+
narration_length = mapped_length
|
| 171 |
+
|
| 172 |
+
narration = ""
|
| 173 |
+
if comic_output_path and os.path.exists(comic_output_path):
|
| 174 |
+
try:
|
| 175 |
+
narration = generate_image_narration(
|
| 176 |
+
comic_output_path, narration_length, age_group=age_group
|
| 177 |
+
)
|
| 178 |
+
if narration:
|
| 179 |
+
narration_path = os.path.join(image_dir, "narration.txt")
|
| 180 |
+
with open(narration_path, "w", encoding="utf-8") as f:
|
| 181 |
+
f.write(narration)
|
| 182 |
+
print(f"💾 Narration saved to: {narration_path}")
|
| 183 |
+
except Exception as narration_error:
|
| 184 |
+
print(f"⚠️ Narration generation failed: {narration_error}")
|
| 185 |
+
narration = ""
|
| 186 |
+
|
| 187 |
+
end_time = time.time()
|
| 188 |
+
generation_time = end_time - start_time
|
| 189 |
+
|
| 190 |
+
try:
|
| 191 |
+
absolute_path = os.path.abspath(comic_output_path)
|
| 192 |
+
except Exception:
|
| 193 |
+
absolute_path = comic_output_path
|
| 194 |
+
|
| 195 |
+
enhancement_status = "with AI enhancement" if enrich_prompt else "using original prompt"
|
| 196 |
+
save_message = f"""
|
| 197 |
+
<div style="padding: 10px; border: 1px solid
|
| 198 |
+
<p><strong>💾 Story Scenes Saved Successfully</strong></p>
|
| 199 |
+
<p>Generated {enhancement_status}</p>
|
| 200 |
+
<p>Location: <code>{absolute_path}</code></p>
|
| 201 |
+
<p>You can find all your saved story images in the images directory.</p>
|
| 202 |
+
</div>
|
| 203 |
+
"""
|
| 204 |
+
|
| 205 |
+
return (
|
| 206 |
+
comic_output_path,
|
| 207 |
+
save_message,
|
| 208 |
+
f"✅ Story scenes generated successfully in {generation_time:.2f} seconds!",
|
| 209 |
+
narration
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
except Exception as e:
|
| 213 |
+
import traceback
|
| 214 |
+
error_details = traceback.format_exc()
|
| 215 |
+
print(f"❌ Error generating story scenes: {e}")
|
| 216 |
+
print(f"Full error details: {error_details}")
|
| 217 |
+
|
| 218 |
+
return None, "", f"❌ Error generating story scenes: {str(e)}", ""
|
| 219 |
+
|
| 220 |
+
@log_execution
|
| 221 |
+
def extract_comic_scenes(comic_image_path, num_scenes):
|
| 222 |
+
"""
|
| 223 |
+
Extract individual scenes from a story image and save them as separate files.
|
| 224 |
+
This version uses the AutomatedCollageSplitter.
|
| 225 |
+
|
| 226 |
+
Args:
|
| 227 |
+
comic_image_path: Path to the story image
|
| 228 |
+
num_scenes: This parameter is kept for compatibility but is not used
|
| 229 |
+
by the automated splitter.
|
| 230 |
+
|
| 231 |
+
Returns:
|
| 232 |
+
list: List of paths to the individual scene images
|
| 233 |
+
str: Display message about where the scenes were saved
|
| 234 |
+
"""
|
| 235 |
+
if not comic_image_path or not os.path.exists(comic_image_path):
|
| 236 |
+
return [], "Error: Comic image not found."
|
| 237 |
+
|
| 238 |
+
try:
|
| 239 |
+
base_dir = os.path.dirname(comic_image_path)
|
| 240 |
+
image_stem = os.path.splitext(os.path.basename(comic_image_path))[0]
|
| 241 |
+
output_dir = os.path.join(base_dir, f"{image_stem}_segments")
|
| 242 |
+
|
| 243 |
+
splitter = AutomatedCollageSplitter()
|
| 244 |
+
|
| 245 |
+
scene_info = splitter.split_collage(
|
| 246 |
+
image_path=comic_image_path,
|
| 247 |
+
output_dir=output_dir,
|
| 248 |
+
debug=False
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
if not scene_info:
|
| 252 |
+
return [], "Automated splitting failed to find any panels."
|
| 253 |
+
|
| 254 |
+
scene_paths = [info['path'] for info in scene_info]
|
| 255 |
+
|
| 256 |
+
save_message = f"""
|
| 257 |
+
<div style="padding: 10px; border: 1px solid
|
| 258 |
+
<p><strong>💾 Individual Scenes Saved Successfully</strong></p>
|
| 259 |
+
<p>Found and saved {len(scene_paths)} scenes.</p>
|
| 260 |
+
<p>Location: <code>{os.path.abspath(output_dir)}</code></p>
|
| 261 |
+
</div>
|
| 262 |
+
"""
|
| 263 |
+
|
| 264 |
+
return scene_info, save_message
|
| 265 |
+
|
| 266 |
+
except Exception as e:
|
| 267 |
+
import traceback
|
| 268 |
+
error_details = traceback.format_exc()
|
| 269 |
+
print(f"❌ Error extracting scenes: {e}")
|
| 270 |
+
print(f"Full error details: {error_details}")
|
| 271 |
+
return [], f"Error extracting scenes: {e}"
|
| 272 |
+
@log_execution
|
| 273 |
+
def generate_image_narration(image_path, narration_length=3, age_group: str | None = None):
|
| 274 |
+
"""
|
| 275 |
+
Generate detailed story narration that actually tells the story shown in the comic panels.
|
| 276 |
+
|
| 277 |
+
Args:
|
| 278 |
+
image_path: Path to the generated comic image
|
| 279 |
+
narration_length: Length of narration (1=Brief, 2=Medium, 3=Detailed, 4=Very Detailed)
|
| 280 |
+
age_group: Optional target age group string that informs the language difficulty
|
| 281 |
+
|
| 282 |
+
Returns:
|
| 283 |
+
str: Generated detailed narration text or empty string if failed
|
| 284 |
+
"""
|
| 285 |
+
try:
|
| 286 |
+
configure(api_key=config.GOOGLE_API_KEY)
|
| 287 |
+
|
| 288 |
+
model = GenerativeModel('gemini-2.5-flash')
|
| 289 |
+
|
| 290 |
+
comic_image = Image.open(image_path)
|
| 291 |
+
|
| 292 |
+
length_specs = {
|
| 293 |
+
1: {
|
| 294 |
+
"description": "Quick scene summary",
|
| 295 |
+
"min_chars": 100,
|
| 296 |
+
"style": "concise, 2-3 lines per scene - capture the essence and emotion of each moment"
|
| 297 |
+
},
|
| 298 |
+
2: {
|
| 299 |
+
"description": "Brief scene descriptions",
|
| 300 |
+
"min_chars": 200,
|
| 301 |
+
"style": "short and engaging, 2-3 lines per scene - focus on key actions and feelings"
|
| 302 |
+
},
|
| 303 |
+
3: {
|
| 304 |
+
"description": "Quick narrative",
|
| 305 |
+
"min_chars": 300,
|
| 306 |
+
"style": "brief but vivid, 2-3 lines per scene - capture emotion, action, and atmosphere"
|
| 307 |
+
},
|
| 308 |
+
4: {
|
| 309 |
+
"description": "Concise story flow",
|
| 310 |
+
"min_chars": 400,
|
| 311 |
+
"style": "quick and immersive, 2-3 lines per scene - paint rich pictures efficiently"
|
| 312 |
+
}
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
spec = length_specs.get(narration_length, length_specs[3])
|
| 316 |
+
|
| 317 |
+
age_group_specs = {
|
| 318 |
+
"3-5 (Pre-school)": {
|
| 319 |
+
"audience": "very young children ages 3-5",
|
| 320 |
+
"language": "Narrate like a playful adult reading aloud. Use extremely simple words, cheerful tone, repetition, and lots of sound effects and emotions."
|
| 321 |
+
},
|
| 322 |
+
"6-8 (Kids)": {
|
| 323 |
+
"audience": "children ages 6-8",
|
| 324 |
+
"language": "Use an enthusiastic and descriptive tone. Keep vocabulary simple but expressive. Include action, feelings, and dialogue they can relate to."
|
| 325 |
+
},
|
| 326 |
+
"9-12 (Pre-teen)": {
|
| 327 |
+
"audience": "pre-teens ages 9-12",
|
| 328 |
+
"language": "Use a lively, adventurous tone with accessible vocabulary. Add humor, suspense, and vivid action to keep engagement high."
|
| 329 |
+
},
|
| 330 |
+
"13-18 (Teen)": {
|
| 331 |
+
"audience": "teenagers ages 13-18",
|
| 332 |
+
"language": "Use present-tense narration with emotional depth and realism. Include internal thoughts, intense scenes, and natural dialogue."
|
| 333 |
+
},
|
| 334 |
+
"18+ (Adult)": {
|
| 335 |
+
"audience": "adults",
|
| 336 |
+
"language": "Use mature, nuanced storytelling. Allow complex emotions, sensory detail, and deeper themes to emerge naturally."
|
| 337 |
+
}
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
selected_age_spec = age_group_specs.get(age_group) if age_group else None
|
| 341 |
+
|
| 342 |
+
audience_desc = selected_age_spec['audience'] if selected_age_spec else "a general audience"
|
| 343 |
+
language_guideline = selected_age_spec['language'] if selected_age_spec else "Use vivid, engaging language appropriate to the story."
|
| 344 |
+
|
| 345 |
+
narration_mode = "Provide quick, engaging narration with 2-3 lines per scene. Each scene gets 2-3 concise, standalone sentences that capture the key moment, emotion, and atmosphere. Do NOT prefix scenes with 'Scene'"
|
| 346 |
+
|
| 347 |
+
narration_prompt = f"""
|
| 348 |
+
You are a concise narrator creating quick scene descriptions for {audience_desc}.
|
| 349 |
+
|
| 350 |
+
**NARRATION STYLE:**
|
| 351 |
+
{narration_mode}
|
| 352 |
+
|
| 353 |
+
**FORMAT REQUIREMENTS:**
|
| 354 |
+
• Identify each distinct scene and write 2-3 sentences (2 minimum, 3 maximum) **without** any numeric prefixes or headings.
|
| 355 |
+
• First sentence: What's happening (action/event)
|
| 356 |
+
• Second sentence: How it feels or what it means (emotion/impact)
|
| 357 |
+
• Optional third sentence: Additional detail or atmosphere
|
| 358 |
+
• Keep each sentence under 20 words
|
| 359 |
+
• Use present tense and active voice
|
| 360 |
+
• Separate scenes with a blank line for readability
|
| 361 |
+
• Make it flow naturally from scene to scene
|
| 362 |
+
|
| 363 |
+
**EXAMPLE FORMAT:**
|
| 364 |
+
The cat stretches lazily in the morning sunlight streaming through the window. A new day full of adventure awaits her curious spirit.
|
| 365 |
+
|
| 366 |
+
She bounds toward the garden gate with graceful excitement. Something magical calls to her curious heart.
|
| 367 |
+
|
| 368 |
+
**STYLE:** {spec['style']}
|
| 369 |
+
**LANGUAGE:** {language_guideline}
|
| 370 |
+
|
| 371 |
+
Now provide your quick, 2-line narration for each scene. Remember: NO headings or prefixes, just the narration lines separated by blank lines:
|
| 372 |
+
"""
|
| 373 |
+
|
| 374 |
+
print(f"🔍 Generating {spec['description']} using Gemini Vision...")
|
| 375 |
+
|
| 376 |
+
response = model.generate_content([narration_prompt, comic_image])
|
| 377 |
+
narration = response.text.strip()
|
| 378 |
+
|
| 379 |
+
if narration:
|
| 380 |
+
narration = narration.strip('"\'`')
|
| 381 |
+
|
| 382 |
+
lines = narration.split('\n')
|
| 383 |
+
clean_lines = []
|
| 384 |
+
import re
|
| 385 |
+
for line in lines:
|
| 386 |
+
line = line.strip()
|
| 387 |
+
if not line:
|
| 388 |
+
continue
|
| 389 |
+
|
| 390 |
+
if line.startswith(('Note:', 'Remember:', 'Format:', 'Your narration should:', 'Think of this as')):
|
| 391 |
+
continue
|
| 392 |
+
|
| 393 |
+
line = re.sub(r'^Scene\s*\d+\s*[:.\-]\s*', '', line, flags=re.IGNORECASE)
|
| 394 |
+
|
| 395 |
+
clean_lines.append(line)
|
| 396 |
+
|
| 397 |
+
narration = '\n\n'.join(clean_lines)
|
| 398 |
+
|
| 399 |
+
if len(narration) < spec['min_chars']:
|
| 400 |
+
print(f"⚠️ Initial narration too short for selected length, requesting more detail...")
|
| 401 |
+
|
| 402 |
+
detailed_prompt = f"""
|
| 403 |
+
The narration needs to be more detailed while keeping the concise 2-line-per-scene format.
|
| 404 |
+
|
| 405 |
+
CURRENT NARRATION:
|
| 406 |
+
{narration}
|
| 407 |
+
|
| 408 |
+
Expand this following the same format:
|
| 409 |
+
• **Style:** {narration_mode}
|
| 410 |
+
• **Format:** 2-3 lines per scene, but make them more vivid and detailed
|
| 411 |
+
• **Voice:** {spec['style']}
|
| 412 |
+
• **Language:** {language_guideline}
|
| 413 |
+
• **Focus:** Keep it concise but add more sensory details, emotion, and atmosphere
|
| 414 |
+
|
| 415 |
+
Provide the enhanced 2-3-lines-per-scene narration:
|
| 416 |
+
"""
|
| 417 |
+
|
| 418 |
+
response = model.generate_content(detailed_prompt)
|
| 419 |
+
expanded_narration = response.text.strip()
|
| 420 |
+
if expanded_narration and len(expanded_narration) > len(narration):
|
| 421 |
+
narration = expanded_narration.strip('"\'`')
|
| 422 |
+
|
| 423 |
+
print(f"✅ Generated {spec['description']}: {len(narration)} characters")
|
| 424 |
+
return narration
|
| 425 |
+
|
| 426 |
+
except Exception as e:
|
| 427 |
+
print(f"⚠️ Failed to generate narration: {e}")
|
| 428 |
+
|
| 429 |
+
return ""
|
| 430 |
+
@log_execution
|
| 431 |
+
def load_narration_from_file(comic_image_path):
|
| 432 |
+
"""
|
| 433 |
+
Load narration from the saved narration.txt file for a given comic image.
|
| 434 |
+
|
| 435 |
+
Args:
|
| 436 |
+
comic_image_path: Path to the comic image
|
| 437 |
+
|
| 438 |
+
Returns:
|
| 439 |
+
str: Loaded narration text or empty string if not found
|
| 440 |
+
"""
|
| 441 |
+
try:
|
| 442 |
+
if not comic_image_path:
|
| 443 |
+
return ""
|
| 444 |
+
|
| 445 |
+
image_dir = os.path.dirname(comic_image_path)
|
| 446 |
+
narration_path = os.path.join(image_dir, "narration.txt")
|
| 447 |
+
|
| 448 |
+
if os.path.exists(narration_path):
|
| 449 |
+
with open(narration_path, "r", encoding="utf-8") as f:
|
| 450 |
+
narration = f.read().strip()
|
| 451 |
+
return narration
|
| 452 |
+
except Exception as e:
|
| 453 |
+
print(f"⚠️ Could not load narration: {e}")
|
| 454 |
+
|
| 455 |
+
return ""
|