yashgori20 commited on
Commit
dd14242
·
1 Parent(s): 1dc9cc3
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . /code/
10
+
11
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -3,9 +3,8 @@ title: SEO Report Generator
3
  emoji: 🔍
4
  colorFrom: blue
5
  colorTo: green
6
- sdk: streamlit
7
- sdk_version: 1.28.1
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
 
3
  emoji: 🔍
4
  colorFrom: blue
5
  colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
 
8
  pinned: false
9
  license: mit
10
  ---
requirements.txt CHANGED
@@ -7,4 +7,5 @@ jinja2
7
  validators
8
  urllib3
9
  lxml
10
- uuid
 
 
7
  validators
8
  urllib3
9
  lxml
10
+ uuid
11
+ reportlab
simple_pdf_generator.py CHANGED
@@ -17,7 +17,7 @@ class SimplePDFGenerator:
17
 
18
  def generate_pdf(self, html_content: str) -> bytes:
19
  """
20
- Generate PDF from HTML content using simple text-based approach
21
  """
22
  if not self.available:
23
  raise ImportError("PDF generation requires reportlab: pip install reportlab")
@@ -25,59 +25,78 @@ class SimplePDFGenerator:
25
  # Import reportlab components
26
  from reportlab.pdfgen import canvas
27
  from reportlab.lib.pagesizes import letter, A4
28
- from reportlab.lib.styles import getSampleStyleSheet
29
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
30
  from reportlab.lib.units import inch
 
31
  from bs4 import BeautifulSoup
 
32
 
33
- # Parse HTML and extract text content
34
  soup = BeautifulSoup(html_content, 'html.parser')
35
 
36
- # Remove style and script tags
37
- for tag in soup(["style", "script"]):
38
- tag.decompose()
39
-
40
  # Create PDF buffer
41
  buffer = io.BytesIO()
42
 
43
- # Create PDF document
44
- doc = SimpleDocTemplate(buffer, pagesize=A4)
 
 
 
 
 
 
 
 
 
45
  styles = getSampleStyleSheet()
46
- story = []
47
 
48
- # Extract title
49
- title_tag = soup.find('title')
50
- title = title_tag.text if title_tag else "SEO Report"
 
 
 
 
 
 
51
 
52
- # Add title
53
- story.append(Paragraph(title, styles['Title']))
54
- story.append(Spacer(1, 12))
 
 
 
 
 
55
 
56
- # Extract main content sections
57
- sections = soup.find_all(['h1', 'h2', 'h3', 'p', 'div'])
 
 
 
 
 
 
58
 
59
- for section in sections:
60
- if section.name in ['h1', 'h2', 'h3']:
61
- # Headers
62
- text = section.get_text().strip()
63
- if text:
64
- if section.name == 'h1':
65
- story.append(Paragraph(text, styles['Heading1']))
66
- elif section.name == 'h2':
67
- story.append(Paragraph(text, styles['Heading2']))
68
- else:
69
- story.append(Paragraph(text, styles['Heading3']))
70
- story.append(Spacer(1, 6))
71
-
72
- elif section.name in ['p', 'div']:
73
- # Paragraphs
74
- text = section.get_text().strip()
75
- if text and len(text) > 20: # Skip very short text
76
- try:
77
- story.append(Paragraph(text[:500], styles['Normal'])) # Limit length
78
- story.append(Spacer(1, 6))
79
- except:
80
- pass # Skip problematic content
81
 
82
  # Build PDF
83
  doc.build(story)
@@ -85,6 +104,67 @@ class SimplePDFGenerator:
85
  # Get PDF data
86
  buffer.seek(0)
87
  return buffer.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def create_browser_pdf_instructions() -> str:
90
  """
 
17
 
18
  def generate_pdf(self, html_content: str) -> bytes:
19
  """
20
+ Generate PDF from HTML content with better formatting
21
  """
22
  if not self.available:
23
  raise ImportError("PDF generation requires reportlab: pip install reportlab")
 
25
  # Import reportlab components
26
  from reportlab.pdfgen import canvas
27
  from reportlab.lib.pagesizes import letter, A4
28
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
29
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
30
  from reportlab.lib.units import inch
31
+ from reportlab.lib.colors import Color, black, blue, green, red
32
  from bs4 import BeautifulSoup
33
+ import re
34
 
35
+ # Parse HTML and extract content
36
  soup = BeautifulSoup(html_content, 'html.parser')
37
 
 
 
 
 
38
  # Create PDF buffer
39
  buffer = io.BytesIO()
40
 
41
+ # Create PDF document with margins
42
+ doc = SimpleDocTemplate(
43
+ buffer,
44
+ pagesize=A4,
45
+ topMargin=0.75*inch,
46
+ bottomMargin=0.75*inch,
47
+ leftMargin=0.75*inch,
48
+ rightMargin=0.75*inch
49
+ )
50
+
51
+ # Define custom styles
52
  styles = getSampleStyleSheet()
 
53
 
54
+ # Custom styles
55
+ title_style = ParagraphStyle(
56
+ 'CustomTitle',
57
+ parent=styles['Heading1'],
58
+ fontSize=24,
59
+ textColor=black,
60
+ spaceAfter=20,
61
+ alignment=1 # Center
62
+ )
63
 
64
+ header_style = ParagraphStyle(
65
+ 'CustomHeader',
66
+ parent=styles['Heading2'],
67
+ fontSize=16,
68
+ textColor=blue,
69
+ spaceBefore=15,
70
+ spaceAfter=10
71
+ )
72
 
73
+ subheader_style = ParagraphStyle(
74
+ 'CustomSubHeader',
75
+ parent=styles['Heading3'],
76
+ fontSize=14,
77
+ textColor=black,
78
+ spaceBefore=10,
79
+ spaceAfter=8
80
+ )
81
 
82
+ story = []
83
+
84
+ # Add report title
85
+ title = "SEO Analysis Report"
86
+ url_elem = soup.find(string=re.compile(r'https?://'))
87
+ if url_elem:
88
+ url = re.search(r'https?://[^\s]+', str(url_elem))
89
+ if url:
90
+ title = f"SEO Analysis Report - {url.group()}"
91
+
92
+ story.append(Paragraph(title, title_style))
93
+ story.append(Spacer(1, 20))
94
+
95
+ # Extract and format content systematically
96
+ self._extract_executive_summary(soup, story, header_style, styles['Normal'])
97
+ self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
98
+ self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
99
+ self._extract_recommendations(soup, story, header_style, styles['Normal'])
 
 
 
 
100
 
101
  # Build PDF
102
  doc.build(story)
 
104
  # Get PDF data
105
  buffer.seek(0)
106
  return buffer.getvalue()
107
+
108
+ def _extract_executive_summary(self, soup, story, header_style, normal_style):
109
+ """Extract executive summary section"""
110
+ exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
111
+ if exec_section:
112
+ story.append(Paragraph("Executive Summary", header_style))
113
+
114
+ # Look for health score
115
+ health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
116
+ if health_text:
117
+ parent = health_text.find_parent()
118
+ if parent:
119
+ text = parent.get_text().strip()
120
+ story.append(Paragraph(text, normal_style))
121
+ story.append(Spacer(1, 10))
122
+
123
+ def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
124
+ """Extract technical SEO section"""
125
+ tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
126
+ if tech_section:
127
+ story.append(Paragraph("Technical SEO Analysis", header_style))
128
+
129
+ # Look for performance scores
130
+ perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
131
+ for elem in perf_elements[:3]: # Limit results
132
+ parent = elem.find_parent()
133
+ if parent:
134
+ text = parent.get_text().strip()
135
+ if len(text) > 10 and len(text) < 200:
136
+ story.append(Paragraph(text, normal_style))
137
+ story.append(Spacer(1, 10))
138
+
139
+ def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
140
+ """Extract content audit section"""
141
+ content_section = soup.find(string=re.compile(r'Content Audit', re.I))
142
+ if content_section:
143
+ story.append(Paragraph("Content Audit", header_style))
144
+
145
+ # Look for content metrics
146
+ content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
147
+ for elem in content_elements[:3]: # Limit results
148
+ parent = elem.find_parent()
149
+ if parent:
150
+ text = parent.get_text().strip()
151
+ if len(text) > 10 and len(text) < 200:
152
+ story.append(Paragraph(text, normal_style))
153
+ story.append(Spacer(1, 10))
154
+
155
+ def _extract_recommendations(self, soup, story, header_style, normal_style):
156
+ """Extract recommendations section"""
157
+ rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
158
+ if rec_section:
159
+ story.append(Paragraph("Recommendations", header_style))
160
+
161
+ # Look for recommendation items
162
+ rec_elements = soup.find_all('li')
163
+ for elem in rec_elements[:5]: # Top 5 recommendations
164
+ text = elem.get_text().strip()
165
+ if len(text) > 15:
166
+ story.append(Paragraph(f"• {text}", normal_style))
167
+ story.append(Spacer(1, 10))
168
 
169
  def create_browser_pdf_instructions() -> str:
170
  """
templates/index.html CHANGED
@@ -1,5 +1,6 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -17,7 +18,7 @@
17
  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
18
  min-height: 100vh;
19
  background-image: radial-gradient(circle at 20% 20%, rgba(120, 119, 198, 0.3) 0%, transparent 50%),
20
- radial-gradient(circle at 80% 80%, rgba(255, 119, 198, 0.3) 0%, transparent 50%);
21
  }
22
 
23
  .container {
@@ -42,7 +43,7 @@
42
  justify-content: center;
43
  gap: 12px;
44
  }
45
-
46
  .logo-icon {
47
  background: rgba(255, 255, 255, 0.1);
48
  border-radius: 12px;
@@ -53,13 +54,13 @@
53
  backdrop-filter: blur(5px);
54
  border: 1px solid rgba(255, 255, 255, 0.2);
55
  }
56
-
57
  .logo-svg {
58
  width: 32px;
59
  height: 32px;
60
  filter: brightness(0) invert(1);
61
  }
62
-
63
  .logo-text {
64
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
65
  -webkit-background-clip: text;
@@ -107,7 +108,11 @@
107
  .form-input:focus {
108
  outline: none;
109
  border-color: #374151;
110
- background: rgba(255, 255, 255, 0.08);
 
 
 
 
111
  }
112
 
113
  .form-input::placeholder {
@@ -220,8 +225,13 @@
220
  }
221
 
222
  @keyframes spin {
223
- 0% { transform: rotate(0deg); }
224
- 100% { transform: rotate(360deg); }
 
 
 
 
 
225
  }
226
 
227
  .error-message {
@@ -238,17 +248,18 @@
238
  .container {
239
  padding: 20px 16px;
240
  }
241
-
242
  .main-card {
243
  padding: 24px;
244
  }
245
-
246
  .logo {
247
  font-size: 36px;
248
  }
249
  }
250
  </style>
251
  </head>
 
252
  <body>
253
  <div class="container">
254
  <div class="header">
@@ -256,7 +267,7 @@
256
  <span class="logo-icon">
257
  <img src="/static/logo.svg" alt="Thinkly Logo" class="logo-svg">
258
  </span>
259
- <span class="logo-text">Thinkly SEO</span>
260
  </h1>
261
  <p class="subtitle">Professional SEO Analysis & Reporting</p>
262
  </div>
@@ -288,14 +299,13 @@
288
  <form id="seoForm">
289
  <div class="form-group">
290
  <label class="form-label" for="url">Website URL</label>
291
- <input type="url" id="url" name="url" class="form-input"
292
- placeholder="https://example.com" required>
293
  </div>
294
 
295
  <div class="form-group">
296
  <label class="form-label" for="competitors">Competitor URLs (Optional)</label>
297
- <textarea id="competitors" name="competitors" class="form-input textarea"
298
- placeholder="https://competitor1.com&#10;https://competitor2.com&#10;One URL per line"></textarea>
299
  </div>
300
 
301
  <button type="submit" class="generate-btn" id="generateBtn">
@@ -314,30 +324,30 @@
314
  </div>
315
 
316
  <script>
317
- document.getElementById('seoForm').addEventListener('submit', async function(e) {
318
  e.preventDefault();
319
-
320
  const url = document.getElementById('url').value.trim();
321
  const competitors = document.getElementById('competitors').value
322
  .split('\n')
323
  .map(c => c.trim())
324
  .filter(c => c);
325
-
326
  const loadingOverlay = document.getElementById('loadingOverlay');
327
  const errorMessage = document.getElementById('errorMessage');
328
  const loadingText = document.getElementById('loadingText');
329
  const loadingSubtext = document.getElementById('loadingSubtext');
330
-
331
  errorMessage.style.display = 'none';
332
  loadingOverlay.style.display = 'flex';
333
-
334
  const loadingMessages = [
335
  { text: 'Analyzing technical SEO...', subtext: 'Checking PageSpeed insights' },
336
  { text: 'Performing content audit...', subtext: 'Crawling website content' },
337
  { text: 'Analyzing competitors...', subtext: 'Comparing performance metrics' },
338
  { text: 'Generating professional report...', subtext: 'Creating charts and visualizations' }
339
  ];
340
-
341
  let messageIndex = 0;
342
  const messageInterval = setInterval(() => {
343
  if (messageIndex < loadingMessages.length) {
@@ -346,7 +356,7 @@
346
  messageIndex++;
347
  }
348
  }, 3000);
349
-
350
  try {
351
  const response = await fetch('/generate', {
352
  method: 'POST',
@@ -358,12 +368,12 @@
358
  competitors: competitors
359
  })
360
  });
361
-
362
  const data = await response.json();
363
-
364
  clearInterval(messageInterval);
365
  loadingOverlay.style.display = 'none';
366
-
367
  if (data.success) {
368
  window.location.href = data.redirect_url;
369
  } else {
@@ -379,4 +389,5 @@
379
  });
380
  </script>
381
  </body>
 
382
  </html>
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
+
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
18
  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
19
  min-height: 100vh;
20
  background-image: radial-gradient(circle at 20% 20%, rgba(120, 119, 198, 0.3) 0%, transparent 50%),
21
+ radial-gradient(circle at 80% 80%, rgba(255, 119, 198, 0.3) 0%, transparent 50%);
22
  }
23
 
24
  .container {
 
43
  justify-content: center;
44
  gap: 12px;
45
  }
46
+
47
  .logo-icon {
48
  background: rgba(255, 255, 255, 0.1);
49
  border-radius: 12px;
 
54
  backdrop-filter: blur(5px);
55
  border: 1px solid rgba(255, 255, 255, 0.2);
56
  }
57
+
58
  .logo-svg {
59
  width: 32px;
60
  height: 32px;
61
  filter: brightness(0) invert(1);
62
  }
63
+
64
  .logo-text {
65
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
66
  -webkit-background-clip: text;
 
108
  .form-input:focus {
109
  outline: none;
110
  border-color: #374151;
111
+ background: rgba(255, 255, 255, 0.15);
112
+ }
113
+
114
+ .form-input:not(:placeholder-shown) {
115
+ background: rgba(255, 255, 255, 0.15);
116
  }
117
 
118
  .form-input::placeholder {
 
225
  }
226
 
227
  @keyframes spin {
228
+ 0% {
229
+ transform: rotate(0deg);
230
+ }
231
+
232
+ 100% {
233
+ transform: rotate(360deg);
234
+ }
235
  }
236
 
237
  .error-message {
 
248
  .container {
249
  padding: 20px 16px;
250
  }
251
+
252
  .main-card {
253
  padding: 24px;
254
  }
255
+
256
  .logo {
257
  font-size: 36px;
258
  }
259
  }
260
  </style>
261
  </head>
262
+
263
  <body>
264
  <div class="container">
265
  <div class="header">
 
267
  <span class="logo-icon">
268
  <img src="/static/logo.svg" alt="Thinkly Logo" class="logo-svg">
269
  </span>
270
+ <span class="logo-text">Thinkly Labs SEO</span>
271
  </h1>
272
  <p class="subtitle">Professional SEO Analysis & Reporting</p>
273
  </div>
 
299
  <form id="seoForm">
300
  <div class="form-group">
301
  <label class="form-label" for="url">Website URL</label>
302
+ <input type="url" id="url" name="url" class="form-input" placeholder="https://example.com" required>
 
303
  </div>
304
 
305
  <div class="form-group">
306
  <label class="form-label" for="competitors">Competitor URLs (Optional)</label>
307
+ <textarea id="competitors" name="competitors" class="form-input textarea"
308
+ placeholder="https://competitor1.com&#10;https://competitor2.com&#10;One URL per line"></textarea>
309
  </div>
310
 
311
  <button type="submit" class="generate-btn" id="generateBtn">
 
324
  </div>
325
 
326
  <script>
327
+ document.getElementById('seoForm').addEventListener('submit', async function (e) {
328
  e.preventDefault();
329
+
330
  const url = document.getElementById('url').value.trim();
331
  const competitors = document.getElementById('competitors').value
332
  .split('\n')
333
  .map(c => c.trim())
334
  .filter(c => c);
335
+
336
  const loadingOverlay = document.getElementById('loadingOverlay');
337
  const errorMessage = document.getElementById('errorMessage');
338
  const loadingText = document.getElementById('loadingText');
339
  const loadingSubtext = document.getElementById('loadingSubtext');
340
+
341
  errorMessage.style.display = 'none';
342
  loadingOverlay.style.display = 'flex';
343
+
344
  const loadingMessages = [
345
  { text: 'Analyzing technical SEO...', subtext: 'Checking PageSpeed insights' },
346
  { text: 'Performing content audit...', subtext: 'Crawling website content' },
347
  { text: 'Analyzing competitors...', subtext: 'Comparing performance metrics' },
348
  { text: 'Generating professional report...', subtext: 'Creating charts and visualizations' }
349
  ];
350
+
351
  let messageIndex = 0;
352
  const messageInterval = setInterval(() => {
353
  if (messageIndex < loadingMessages.length) {
 
356
  messageIndex++;
357
  }
358
  }, 3000);
359
+
360
  try {
361
  const response = await fetch('/generate', {
362
  method: 'POST',
 
368
  competitors: competitors
369
  })
370
  });
371
+
372
  const data = await response.json();
373
+
374
  clearInterval(messageInterval);
375
  loadingOverlay.style.display = 'none';
376
+
377
  if (data.success) {
378
  window.location.href = data.redirect_url;
379
  } else {
 
389
  });
390
  </script>
391
  </body>
392
+
393
  </html>
templates/report.html CHANGED
@@ -20,7 +20,7 @@
20
  }
21
 
22
  .header {
23
- background: rgba(10, 10, 10, 0.4);
24
  border-bottom: 1px solid rgba(255, 255, 255, 0.1);
25
  padding: 20px 0;
26
  position: sticky;
 
20
  }
21
 
22
  .header {
23
+ background: #00000066;
24
  border-bottom: 1px solid rgba(255, 255, 255, 0.1);
25
  padding: 20px 0;
26
  position: sticky;