Coverage for gws-app/gws/lib/vendor/dog/builder.py: 0%

638 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-16 23:09 +0200

1from typing import Optional 

2 

3import re 

4import os 

5import json 

6import fnmatch 

7import shutil 

8import mimetypes 

9 

10from . import util, template, markdown 

11from .options import Options 

12 

13 

14class ParseNode(util.Data): 

15 pass 

16 

17 

18class MarkdownNode(ParseNode): 

19 el: markdown.Element 

20 

21 

22class SectionNode(ParseNode): 

23 sid: str 

24 

25 

26class EmbedNode(ParseNode): 

27 items: list[str] 

28 sid: str 

29 

30 

31class TocNode(ParseNode): 

32 items: list[str] 

33 sids: list[str] 

34 depth: int 

35 

36 

37class RawHtmlNode(ParseNode): 

38 html: str 

39 

40 

41class Section(util.Data): 

42 sid: str 

43 level: int 

44 status: str 

45 subSids: list[str] 

46 parentSid: str 

47 

48 sourcePath: str 

49 

50 headText: str 

51 headHtml: str 

52 headHtmlLink: str 

53 headNode: MarkdownNode 

54 headLevel: int 

55 

56 nodes: list[ParseNode] 

57 

58 filePath: str 

59 htmlUrl: str 

60 htmlBaseUrl: str 

61 htmlId: str 

62 

63 

64class FileBuffer(util.Data): 

65 sids: list[str] 

66 chunks: list[str] 

67 content: str 

68 

69 

70class Builder: 

71 options: Options 

72 markdownParser: markdown.Markdown 

73 htmlGenerator: 'HTMLGenerator' 

74 mardownGenerator: 'MarkdownGenerator' 

75 docPaths: set[str] 

76 assetPaths: set[str] 

77 sectionMap: dict[str, Section] 

78 sectionNotFound: set[str] 

79 assetMap: dict[str, str] 

80 

81 def __init__(self, opts: Options | dict): 

82 self.options = Options() 

83 if isinstance(opts, Options): 

84 opts = vars(opts) 

85 for k, v in opts.items(): 

86 setattr(self.options, k, v) 

87 

88 util.log.set_level('DEBUG' if self.options.debug else 'INFO') 

89 

90 self.includeTemplate = '' 

91 if self.options.includeTemplate: 

92 self.includeTemplate = util.read_file(self.options.includeTemplate) 

93 

94 self.cache = {} 

95 

96 def collect_and_parse(self): 

97 self.markdownParser = markdown.parser() 

98 

99 self.docPaths = set() 

100 self.assetPaths = set() 

101 self.sectionMap = {} 

102 self.sectionNotFound = set() 

103 self.assetMap = {} 

104 

105 self.collect_sources() 

106 self.parse_all() 

107 

108 def build_html(self, write=False): 

109 self.collect_and_parse() 

110 if not self.sectionMap: 

111 util.log.error('no sections, skip build_html') 

112 return 

113 self.generate_html(write=write) 

114 if write: 

115 util.log.info(f'HTML created in {self.options.outputDir!r}') 

116 

117 def build_markdown(self, write=False): 

118 self.collect_and_parse() 

119 if not self.sectionMap: 

120 util.log.error('no sections, skip build_html') 

121 return 

122 self.generate_markdown(write=write) 

123 if write: 

124 util.log.info(f'Markdown created in {self.options.outputDir!r}') 

125 

126 def build_pdf(self): 

127 pdf_temp_dir = '/tmp/dog_pdf' 

128 shutil.rmtree(pdf_temp_dir, ignore_errors=True) 

129 

130 pdf_opts = Options() 

131 vars(pdf_opts).update(vars(self.options)) 

132 

133 pdf_opts.fileSplitLevel = {'/': 0} 

134 pdf_opts.outputDir = pdf_temp_dir 

135 pdf_opts.webRoot = '.' 

136 

137 if self.options.pdfPageTemplate: 

138 pdf_opts.pageTemplate = self.options.pdfPageTemplate 

139 

140 old_opts = self.options 

141 self.options = pdf_opts 

142 

143 self.collect_and_parse() 

144 if not self.sectionMap: 

145 util.log.error('no sections, skip build_pdf') 

146 return 

147 self.generate_html(write=True) 

148 

149 self.options = old_opts 

150 

151 out_path = self.options.outputDir + '/index.pdf' 

152 self.generate_pdf(pdf_temp_dir + '/index.html', out_path) 

153 shutil.rmtree(pdf_temp_dir, ignore_errors=True) 

154 

155 util.log.info(f'PDF created in {out_path!r}') 

156 

157 def dump(self): 

158 def _default(x): 

159 d = dict(vars(x)) 

160 d['$'] = x.__class__.__name__ 

161 return d 

162 

163 self.collect_and_parse() 

164 return json.dumps(self.sectionMap, indent=4, sort_keys=True, ensure_ascii=False, default=_default) 

165 

166 ## 

167 

168 def collect_sources(self): 

169 for dirname in self.options.docRoots: 

170 self.collect_sources_from_dir(dirname) 

171 

172 def collect_sources_from_dir(self, dirname): 

173 de: os.DirEntry 

174 ex = self.options.excludeRegex 

175 

176 for de in os.scandir(dirname): 

177 if de.name.startswith('.'): 

178 pass 

179 elif ex and re.search(ex, de.path): 

180 util.log.debug(f'exclude: {de.path!r}') 

181 elif de.is_dir(): 

182 self.collect_sources_from_dir(de.path) 

183 elif de.is_file() and any(fnmatch.fnmatch(de.name, p) for p in self.options.docPatterns): 

184 self.docPaths.add(de.path) 

185 elif de.is_file() and any(fnmatch.fnmatch(de.name, p) for p in self.options.assetPatterns): 

186 self.assetPaths.add(de.path) 

187 

188 ## 

189 

190 def get_section(self, sid: str) -> Optional[Section]: 

191 if sid in self.sectionNotFound: 

192 return 

193 if sid not in self.sectionMap: 

194 util.log.error(f'section not found: {sid!r}') 

195 self.sectionNotFound.add(sid) 

196 return 

197 return self.sectionMap.get(sid) 

198 

199 def section_from_url(self, url) -> Optional[Section]: 

200 for sec in self.sectionMap.values(): 

201 if sec.htmlBaseUrl == url: 

202 return sec 

203 

204 def section_from_element(self, el: markdown.Element) -> Optional[Section]: 

205 for sec in self.sectionMap.values(): 

206 if sec.headNode.el == el: 

207 return sec 

208 

209 def sections_from_wildcard_sid(self, sid, parent_sec) -> list[Section]: 

210 abs_sid = self.make_sid(sid, parent_sec.sid, '', '') 

211 

212 if not abs_sid: 

213 util.log.error(f'invalid section id {sid!r} in {parent_sec.sourcePath!r}') 

214 return [] 

215 

216 if '*' not in abs_sid: 

217 sub = self.get_section(abs_sid) 

218 if sub: 

219 return [sub] 

220 return [] 

221 

222 rx = abs_sid.replace('*', '[^/]+') + '$' 

223 subs = [sec for sec in self.sectionMap.values() if re.match(rx, sec.sid)] 

224 return sorted(subs, key=lambda sec: sec.headText) 

225 

226 ## 

227 

228 def generate_html(self, write): 

229 self.assetMap = {} 

230 for path in self.options.extraAssets: 

231 self.add_asset(path) 

232 

233 self.htmlGenerator = HTMLGenerator(self) 

234 self.htmlGenerator.render_section_heads() 

235 self.htmlGenerator.render_sections() 

236 self.htmlGenerator.flush() 

237 

238 if write: 

239 self.htmlGenerator.write() 

240 self.write_assets() 

241 util.write_file( 

242 str(os.path.join(self.options.outputDir, self.options.staticDir, self.GLOBAL_TOC_SCRIPT)), 

243 self.generate_global_toc(), 

244 ) 

245 util.write_file( 

246 str(os.path.join(self.options.outputDir, self.options.staticDir, self.SEARCH_INDEX_SCRIPT)), 

247 self.generate_search_index(), 

248 ) 

249 

250 def generate_markdown(self, write): 

251 self.assetMap = {} 

252 for path in self.options.extraAssets: 

253 self.add_asset(path) 

254 

255 self.mardownGenerator = MarkdownGenerator(self) 

256 self.mardownGenerator.render_sections() 

257 self.mardownGenerator.flush() 

258 

259 if write: 

260 self.mardownGenerator.write() 

261 self.write_assets() 

262 

263 def generate_pdf(self, source: str, target: str): 

264 cmd = [ 

265 'wkhtmltopdf', 

266 '--outline', 

267 '--enable-local-file-access', 

268 '--print-media-type', 

269 '--disable-javascript', 

270 ] 

271 

272 if self.options.pdfOptions: 

273 for k, v in self.options.pdfOptions.items(): 

274 cmd.append(f'--{k}') 

275 if v is not True: 

276 cmd.append(str(v)) 

277 

278 cmd.append(source) 

279 cmd.append(target) 

280 

281 util.run(cmd, pipe=True) 

282 

283 ## 

284 

285 GLOBAL_TOC_SCRIPT = '_global_toc.js' 

286 SEARCH_INDEX_SCRIPT = '_search_index.js' 

287 

288 def generate_global_toc(self): 

289 js = {sec.sid: {'h': sec.headText, 'u': sec.htmlUrl, 'p': '', 's': sec.subSids} for sec in self.sectionMap.values()} 

290 for sec in self.sectionMap.values(): 

291 for sub in sec.subSids: 

292 node = js.get(sub) 

293 if node: 

294 node['p'] = sec.sid 

295 

296 return 'GLOBAL_TOC = ' + json.dumps(js, ensure_ascii=False, indent=4) + '\n' 

297 

298 def generate_search_index(self): 

299 words_map = {} 

300 

301 for sec in self.sectionMap.values(): 

302 words_map[sec.sid] = [] 

303 for node in sec.nodes: 

304 if isinstance(node, MarkdownNode): 

305 self.extract_text(node.el, words_map[sec.sid]) 

306 

307 for sid, words in words_map.items(): 

308 ws = ' '.join(words) 

309 ws = ws.replace("'", '') 

310 ws = re.sub(r'\W+', ' ', ws).lower().strip() 

311 words_map[sid] = ws.split() 

312 

313 all_words = sorted(set(w for ws in words_map.values() for w in ws)) 

314 word_index = {w: n for n, w in enumerate(all_words, 1)} 

315 

316 sections = [] 

317 for sid, words in words_map.items(): 

318 sec = self.sectionMap[sid] 

319 head = sec.headHtml 

320 if sec.parentSid: 

321 parent = self.sectionMap[sec.parentSid] 

322 head += ' (' + parent.headHtml + ')' 

323 sections.append({'h': head, 'u': sec.htmlUrl, 'w': '.' + '.'.join(util.base36(word_index[w]) for w in words) + '.'}) 

324 

325 js = { 

326 'words': '.' + '.'.join(all_words), 

327 'sections': sorted(sections, key=lambda s: s['h']), 

328 } 

329 

330 return 'SEARCH_INDEX = ' + json.dumps(js, ensure_ascii=False, indent=4) + '\n' 

331 

332 def extract_text(self, el: markdown.Element, out: list): 

333 if el.text: 

334 out.append(el.text) 

335 return 

336 if el.children: 

337 for c in el.children: 

338 self.extract_text(c, out) 

339 out.append('.') 

340 

341 ## 

342 

343 def content_for_url(self, url): 

344 if url.endswith('.html'): 

345 sec = self.section_from_url(url) 

346 if sec: 

347 return 'text/html', self.htmlGenerator.buffers[sec.filePath].content 

348 return 

349 

350 m = re.search(self.options.staticDir + '/(.+)$', url) 

351 if not m: 

352 return 

353 

354 fn = m.group(1) 

355 if fn.endswith(self.GLOBAL_TOC_SCRIPT): 

356 return 'application/javascript', self.generate_global_toc() 

357 if fn.endswith(self.SEARCH_INDEX_SCRIPT): 

358 attr = '_CACHED_SEARCH_INDEX' 

359 if not hasattr(self, attr): 

360 setattr(self, attr, self.generate_search_index()) 

361 return 'application/javascript', getattr(self, attr) 

362 

363 for path, fname in self.assetMap.items(): 

364 if fname == fn: 

365 mt = mimetypes.guess_type(path) 

366 return mt[0] if mt else 'text/plain', util.read_file_b(path) 

367 

368 def add_asset(self, path): 

369 if path not in self.assetMap: 

370 self.assetMap[path] = self.unique_asset_filename(path) 

371 return self.options.webRoot + '/' + self.options.staticDir + '/' + self.assetMap[path] 

372 

373 def unique_asset_filename(self, path): 

374 fnames = set(self.assetMap.values()) 

375 fname = os.path.basename(path) 

376 if fname not in fnames: 

377 return fname 

378 n = 1 

379 while True: 

380 base, ext = fname.split('.') 

381 fname2 = f'{base}-{n}.{ext}' 

382 if fname2 not in fnames: 

383 return fname2 

384 n += 1 

385 

386 def write_assets(self): 

387 for src, fname in self.assetMap.items(): 

388 dst = str(os.path.join(self.options.outputDir, self.options.staticDir, fname)) 

389 util.log.debug(f'copy {src!r} => {dst!r}') 

390 util.write_file_b(dst, util.read_file_b(src)) 

391 

392 ## 

393 

394 def parse_all(self): 

395 self.sectionMap = {} 

396 

397 for path in self.docPaths: 

398 for sec in self.parse_file(path): 

399 prev = self.sectionMap.get(sec.sid) 

400 if prev: 

401 util.log.warning(f'section redefined {sec.sid!r} from {prev.sourcePath!r} in {sec.sourcePath!r}') 

402 self.sectionMap[sec.sid] = sec 

403 

404 root = self.sectionMap.get('/') 

405 if not root: 

406 util.log.error('no root section found') 

407 self.sectionMap = {} 

408 return 

409 

410 new_map = {} 

411 self.make_tree(root, None, new_map) 

412 

413 for sec in self.sectionMap.values(): 

414 if sec.sid not in new_map: 

415 util.log.warning(f'unbound section {sec.sid!r} in {sec.sourcePath!r}') 

416 continue 

417 

418 self.sectionMap = new_map 

419 

420 for sec in self.sectionMap.values(): 

421 self.expand_toc_nodes(sec) 

422 

423 self.add_url_and_path(root, 0) 

424 

425 def parse_file(self, path): 

426 return FileParser(self, path).sections() 

427 

428 def make_tree(self, sec: Section, parent_sec: Section | None, new_map): 

429 if parent_sec: 

430 if sec.parentSid: 

431 util.log.warning(f'rebinding section {sec.sid!r} from {sec.parentSid!r} to {parent_sec.sid!r}') 

432 sec.parentSid = parent_sec.sid 

433 

434 if sec.status == 'ok': 

435 return 

436 

437 if sec.status == 'walk': 

438 util.log.error(f'circular dependency in {sec.sid!r}') 

439 return 

440 

441 sec.status = 'walk' 

442 

443 sub_sids: list[str] = [] 

444 new_nodes: list[ParseNode] = [] 

445 new_map[sec.sid] = sec 

446 

447 for node in sec.nodes: 

448 if isinstance(node, SectionNode): 

449 sub = self.get_section(node.sid) 

450 if sub: 

451 self.make_tree(sub, sec, new_map) 

452 sub_sids.append(sub.sid) 

453 new_nodes.append(node) 

454 continue 

455 

456 if isinstance(node, EmbedNode): 

457 secs = self.sections_from_wildcard_sid(node.sid, sec) 

458 for sub in secs: 

459 self.make_tree(sub, sec, new_map) 

460 sub_sids.append(sub.sid) 

461 new_nodes.append(SectionNode(sid=sub.sid)) 

462 continue 

463 

464 new_nodes.append(node) 

465 

466 sec.nodes = new_nodes 

467 sec.subSids = sub_sids 

468 sec.status = 'ok' 

469 

470 def expand_toc_nodes(self, sec: Section): 

471 for node in sec.nodes: 

472 if isinstance(node, TocNode): 

473 sids = [] 

474 for sid in node.items: 

475 secs = self.sections_from_wildcard_sid(sid, sec) 

476 sids.extend(s.sid for s in secs) 

477 node.sids = sids 

478 

479 def add_url_and_path(self, sec: Section, split_level): 

480 if sec.sid in self.options.fileSplitLevel: 

481 split_level = self.options.fileSplitLevel[sec.sid] 

482 

483 parts = sec.sid.split('/')[1:] 

484 

485 if sec.level == 0 or split_level == 0: 

486 path = 'index.html' 

487 else: 

488 dirname = '/'.join(parts[:split_level]) 

489 path = dirname + '/index.html' 

490 

491 sec.htmlId = '-'.join(parts[split_level:]) 

492 sec.filePath = self.options.outputDir + '/' + path 

493 sec.htmlBaseUrl = self.options.webRoot + '/' + path 

494 

495 util.log.debug(f'path {sec.sid} -> {sec.filePath} ({split_level})') 

496 

497 sec.htmlUrl = sec.htmlBaseUrl 

498 if sec.htmlId: 

499 sec.htmlUrl += '#' + sec.htmlId 

500 

501 sec.headLevel = max(1, sec.level - split_level + 1) 

502 

503 for sub in sec.subSids: 

504 sub = self.sectionMap[sub] 

505 self.add_url_and_path(sub, split_level) 

506 

507 def make_sid(self, explicit_sid, parent_sid, prev_sid=None, text=None): 

508 explicit_sid = explicit_sid or '' 

509 text_sid = util.to_uid(text) if text else '' 

510 

511 if explicit_sid == '/': 

512 return '/' 

513 

514 sid = explicit_sid or text_sid 

515 if sid.endswith('/'): 

516 sid += text_sid 

517 if not sid or sid.endswith('/'): 

518 return '' 

519 

520 if sid.startswith('/'): 

521 return util.normpath(sid) 

522 

523 if parent_sid: 

524 return util.normpath(parent_sid + '/' + sid) 

525 

526 if prev_sid: 

527 ps, _, _ = prev_sid.rpartition('/') 

528 return util.normpath(ps + '/' + sid) 

529 

530 return '' 

531 

532 ## 

533 

534 def cached(self, key, fn): 

535 if key not in self.cache: 

536 self.cache[key] = fn() 

537 return self.cache[key] 

538 

539 

540class FileParser: 

541 def __init__(self, b: Builder, path): 

542 self.b = b 

543 self.path = path 

544 

545 def sections(self) -> list[Section]: 

546 util.log.debug(f'parse {self.path!r}') 

547 

548 sections = [] 

549 

550 dummy_root = Section(sid='', nodes=[], level=-1, headNode=MarkdownNode(el=markdown.Element(level=-1))) 

551 stack = [dummy_root] 

552 

553 el: markdown.Element 

554 for el in self.parse(): 

555 if el.type == 'heading': 

556 prev_sec = None 

557 while stack[-1].headNode.el.level > el.level: 

558 stack.pop() 

559 if stack[-1].headNode.el.level == el.level: 

560 prev_sec = stack.pop() 

561 

562 sec = self.parse_heading(el, stack[-1], prev_sec) 

563 if sec: 

564 stack.append(sec) 

565 sections.append(sec) 

566 

567 continue 

568 

569 if el.type == 'block_code' and el.text.startswith(template.GENERATED_NODE): 

570 args = json.loads(el.text[len(template.GENERATED_NODE) :]) 

571 cls = globals()[args.pop('class')] 

572 stack[-1].nodes.append(cls(**args)) 

573 continue 

574 

575 stack[-1].nodes.append(MarkdownNode(el=el)) 

576 

577 return sections 

578 

579 def parse(self) -> list[markdown.Element]: 

580 text = self.b.includeTemplate + util.read_file(self.path) 

581 text = template.render( 

582 self.b, 

583 text, 

584 self.path, 

585 { 

586 'options': self.b.options, 

587 'builder': self.b, 

588 }, 

589 ) 

590 if not text: 

591 return [] 

592 return self.b.markdownParser(text) 

593 

594 def parse_heading(self, el: markdown.Element, parent_sec, prev_sec): 

595 explicit_sid = self.extract_explicit_sid(el) 

596 text = markdown.text_from_element(el) 

597 

598 sid = self.b.make_sid(explicit_sid, parent_sec.sid, prev_sec.sid if prev_sec else None, text) 

599 

600 if not sid and (el.level == 1 and text and not explicit_sid): 

601 util.log.debug(f'creating implicit root section {text!r} in {self.path!r}') 

602 sid = '/' 

603 

604 if not sid: 

605 util.log.error(f'invalid section id for {text!r}:{explicit_sid!r} in {self.path!r}') 

606 return 

607 

608 if not text: 

609 parent_sec.nodes.append(EmbedNode(sid=sid)) 

610 return 

611 

612 parent_sec.nodes.append(SectionNode(sid=sid)) 

613 el.sid = sid 

614 head_node = MarkdownNode(el=el) 

615 

616 return Section( 

617 sid=sid, 

618 level=0 if sid == '/' else sid.count('/'), 

619 status='', 

620 sourcePath=self.path, 

621 headText=text, 

622 headNode=head_node, 

623 nodes=[head_node], 

624 ) 

625 

626 def extract_explicit_sid(self, el: markdown.Element) -> str: 

627 ch = el.children 

628 

629 if not ch or ch[-1].type != 'text': 

630 return '' 

631 

632 m = re.match(r'^(.*?):(\S+)$', ch[-1].text) 

633 if not m: 

634 return '' 

635 

636 ch[-1].text = m.group(1) 

637 markdown.strip_text_content(el) 

638 

639 return m.group(2) 

640 

641 

642class HTMLGenerator: 

643 def __init__(self, b: Builder): 

644 self.b = b 

645 self.buffers: dict[str, FileBuffer] = {} 

646 

647 def render_section_heads(self): 

648 for sec in self.b.sectionMap.values(): 

649 mr = HTMLRenderer(self.b, sec) 

650 sec.headHtml = mr.render_children(sec.headNode.el) 

651 sec.headHtmlLink = f'<a href="{sec.htmlUrl}">{sec.headHtml}</a>' 

652 

653 def render_sections(self): 

654 for sec in self.b.sectionMap.values(): 

655 if not sec.parentSid: 

656 self.render_section(sec.sid) 

657 

658 def render_section(self, sid): 

659 sec = self.b.get_section(sid) 

660 if not sec: 

661 return 

662 

663 util.log.debug(f'render {sid!r}') 

664 

665 mr = HTMLRenderer(self.b, sec) 

666 

667 self.add(sec, f'<section id="{sec.htmlId}" data-sid="{sec.sid}">\n') 

668 

669 for node in sec.nodes: 

670 if isinstance(node, MarkdownNode): 

671 html = mr.render_element(node.el) 

672 self.add(sec, html) 

673 continue 

674 if isinstance(node, SectionNode): 

675 self.render_section(node.sid) 

676 continue 

677 if isinstance(node, TocNode): 

678 entries = ''.join(self.render_toc_entry(sid, node.depth) for sid in node.sids) 

679 html = f'<div class="localtoc"><ul>{entries}</ul></div>' 

680 self.add(sec, html) 

681 continue 

682 if isinstance(node, RawHtmlNode): 

683 self.add(sec, node.html) 

684 continue 

685 

686 self.add(sec, f'</section>\n') 

687 

688 def render_toc_entry(self, sid, depth: int): 

689 sec = self.b.get_section(sid) 

690 if not sec: 

691 return '' 

692 

693 s = '' 

694 if depth > 1: 

695 sub = [self.render_toc_entry(s, depth - 1) for s in sec.subSids] 

696 if sub: 

697 s = '<ul>' + ''.join(sub) + '</ul>' 

698 

699 return f'<li data-sid="{sid}">{sec.headHtmlLink}{s}</li>' 

700 

701 def render_main_toc(self): 

702 root = self.b.get_section('/') 

703 if not root: 

704 return 

705 return '\n'.join(self.render_toc_entry(sid, 999) for sid in root.subSids) 

706 

707 def add(self, sec: Section, chunk: str): 

708 if sec.filePath not in self.buffers: 

709 self.buffers[sec.filePath] = FileBuffer(sids=[], chunks=[], content='') 

710 self.buffers[sec.filePath].sids.append(sec.sid) 

711 self.buffers[sec.filePath].chunks.append(chunk) 

712 

713 def flush(self): 

714 tpl = template.compile(self.b, self.b.options.pageTemplate) 

715 

716 home_url = '' 

717 sec = self.b.get_section('/') 

718 if sec: 

719 home_url = sec.htmlUrl 

720 

721 for path, buf in self.buffers.items(): 

722 buf.content = template.call( 

723 self.b, 

724 tpl, 

725 { 

726 'path': path, 

727 'title': self.b.options.title, 

728 'subTitle': self.b.options.subTitle, 

729 'main': ''.join(buf.chunks), 

730 'breadcrumbs': self.get_breadcrumbs(buf.sids[0]), 

731 'home': home_url, 

732 'builder': self.b, 

733 'options': self.b.options, 

734 }, 

735 ) 

736 

737 def write(self): 

738 for path, buf in self.buffers.items(): 

739 util.log.debug(f'write {path!r}') 

740 util.write_file(path, buf.content) 

741 

742 def get_breadcrumbs(self, sid): 

743 sec = self.b.get_section(sid) 

744 if not sec: 

745 return [] 

746 

747 bs = [] 

748 

749 while sec: 

750 bs.insert(0, (sec.htmlUrl, sec.headHtml)) 

751 if not sec.parentSid: 

752 break 

753 sec = self.b.get_section(sec.parentSid) 

754 

755 return bs 

756 

757 

758class HTMLRenderer(markdown.HTMLRenderer): 

759 def __init__(self, b: Builder, sec: Section): 

760 self.b = b 

761 self.sec = sec 

762 

763 def r_link(self, el: markdown.Element): 

764 c = self.render_children(el) 

765 if el.target.startswith(('http:', 'https:')): 

766 return self.render_link(el.target, el.title, c, el) 

767 if el.target.startswith('//'): 

768 return self.render_link(el.target[1:], el.title, c, el) 

769 

770 sid = self.b.make_sid(el.target, self.sec.sid) 

771 sec = self.b.get_section(sid) 

772 if not sec: 

773 return self.render_link(el.target, el.title, c, el) 

774 return self.render_link(sec.htmlUrl, el.title or sec.headText, c or sec.headHtml, el) 

775 

776 def r_image(self, el: markdown.Element): 

777 if not el.src: 

778 return '' 

779 if el.src.startswith(('http:', 'https:')): 

780 return super().r_image(el) 

781 paths = [path for path in self.b.assetPaths if path.endswith(el.src)] 

782 if not paths: 

783 util.log.error(f'asset not found: {el.src!r} ') 

784 el.src = '' 

785 return super().r_image(el) 

786 el.src = self.b.add_asset(paths[0]) 

787 return super().r_image(el) 

788 

789 def r_heading(self, el: markdown.Element): 

790 sec = self.b.section_from_element(el) 

791 if not sec: 

792 return 

793 c = self.render_children(el) 

794 tag = 'h' + str(sec.headLevel) 

795 a = {'data-url': sec.htmlUrl} 

796 if self.b.options.debug: 

797 a['title'] = markdown.escape(sec.sourcePath) 

798 return f'<{tag}{markdown.attributes(a)}>{c}</{tag}>\n' 

799 

800 

801class MarkdownGenerator: 

802 def __init__(self, b: Builder): 

803 self.b = b 

804 self.buffers: dict[str, FileBuffer] = {} 

805 

806 def render_sections(self): 

807 for sec in self.b.sectionMap.values(): 

808 if not sec.parentSid: 

809 self.render_section(sec.sid) 

810 

811 def render_section(self, sid): 

812 sec = self.b.get_section(sid) 

813 if not sec: 

814 return 

815 

816 util.log.debug(f'render {sid!r}') 

817 

818 mr = MarkdownRenderer(self.b, sec) 

819 

820 for node in sec.nodes: 

821 if isinstance(node, MarkdownNode): 

822 text = mr.render_element(node.el) 

823 self.add(sec, text) 

824 continue 

825 if isinstance(node, SectionNode): 

826 self.render_section(node.sid) 

827 continue 

828 if isinstance(node, RawHtmlNode): 

829 self.add(sec, node.html) 

830 continue 

831 

832 def add(self, sec: Section, chunk: str): 

833 if sec.filePath not in self.buffers: 

834 self.buffers[sec.filePath] = FileBuffer(sids=[], chunks=[], content='') 

835 self.buffers[sec.filePath].sids.append(sec.sid) 

836 self.buffers[sec.filePath].chunks.append(chunk) 

837 

838 def flush(self): 

839 for path, buf in self.buffers.items(): 

840 buf.content = ''.join(buf.chunks) 

841 

842 def write(self): 

843 for path, buf in self.buffers.items(): 

844 path = path.replace('.html', '.md') 

845 util.log.debug(f'write {path!r}') 

846 util.write_file(path, buf.content) 

847 

848 

849class MarkdownRenderer(markdown.MarkdownRenderer): 

850 def __init__(self, b: Builder, sec: Section): 

851 self.b = b 

852 self.sec = sec 

853 

854 def r_link(self, el: markdown.Element): 

855 c = self.render_children(el) 

856 if el.target.startswith(('http:', 'https:')): 

857 return self.render_link(el.target, el.title, c, el) 

858 if el.target.startswith('//'): 

859 return self.render_link(el.target[1:], el.title, c, el) 

860 

861 sid = self.b.make_sid(el.target, self.sec.sid) 

862 sec = self.b.get_section(sid) 

863 if not sec: 

864 return self.render_link(el.target, el.title, c, el) 

865 return self.render_link(sec.htmlUrl, el.title or sec.headText, c or sec.headHtml, el) 

866 

867 def r_image(self, el: markdown.Element): 

868 if not el.src: 

869 return '' 

870 if el.src.startswith(('http:', 'https:')): 

871 return super().r_image(el) 

872 paths = [path for path in self.b.assetPaths if path.endswith(el.src)] 

873 if not paths: 

874 util.log.error(f'asset not found: {el.src!r} ') 

875 el.src = '' 

876 return super().r_image(el) 

877 el.src = self.b.add_asset(paths[0]) 

878 return super().r_image(el) 

879 

880 def r_heading(self, el: markdown.Element): 

881 sec = self.b.section_from_element(el) 

882 if not sec: 

883 return 

884 c = self.render_children(el) 

885 return ('#' * sec.headLevel) + ' ' + c + '\n\n'