Coverage for gws-app/gws/lib/vendor/dog/markdown.py: 0%

390 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-16 22:59 +0200

1import re 

2from typing import List 

3 

4import mistune 

5from mistune import Markdown 

6 

7import pygments 

8import pygments.util 

9import pygments.lexers 

10import pygments.formatters.html 

11 

12from . import util 

13 

14 

15class Element(util.Data): 

16 type: str 

17 

18 align: str 

19 alt: str 

20 children: List['Element'] 

21 info: str 

22 is_head: bool 

23 level: int 

24 target: str 

25 ordered: bool 

26 sid: str 

27 src: str 

28 start: str 

29 text: str 

30 html: str 

31 title: str 

32 

33 classname: str # inline_decoration_plugin 

34 attributes: dict # link_attributes_plugin 

35 

36 def __repr__(self): 

37 return repr(vars(self)) 

38 

39 

40def parser() -> Markdown: 

41 md = mistune.create_markdown(renderer=AstRenderer(), plugins=['table', 'url', inline_decoration_plugin, link_attributes_plugin]) 

42 return md 

43 

44 

45# plugin API reference: https://mistune.lepture.com/en/v2.0.5/advanced.html#create-plugins 

46 

47# plugin: inline decorations 

48# {someclass some text} => <span class="decoration_someclass">some text</span> 

49 

50 

51def inline_decoration_plugin(md): 

52 name = 'inline_decoration' 

53 pattern = r'\{(\w+ .+?)\}' 

54 

55 def parser(inline, m, state): 

56 return name, *m.group(1).split(None, 1) 

57 

58 md.inline.register_rule(name, pattern, parser) 

59 md.inline.rules.append(name) 

60 

61 

62# plugin: link attributes 

63# https://pandoc.org/MANUAL.html#extension-link_attributes 

64 

65 

66def link_attributes_plugin(md): 

67 name = 'link_attributes' 

68 pattern = r'(?<=[)`]){.+?}' 

69 

70 def parser(inline, m, state): 

71 text = m.group(0) 

72 atts = parse_attributes(text[1:-1]) 

73 if atts: 

74 return name, text, atts 

75 return 'text', text 

76 

77 md.inline.register_rule(name, pattern, parser) 

78 md.inline.rules.append(name) 

79 

80 

81## 

82 

83 

84def process(text): 

85 md = parser() 

86 els = md(text) 

87 rd = HTMLRenderer() 

88 return ''.join(rd.render_element(el) for el in els) 

89 

90 

91def strip_text_content(el: Element): 

92 while el.children: 

93 if not el.children[-1].text: 

94 return 

95 el.children[-1].text = el.children[-1].text.rstrip() 

96 if len(el.children[-1].text) > 0: 

97 return 

98 el.children.pop() 

99 

100 

101def text_from_element(el: Element) -> str: 

102 if el.text: 

103 return el.text.strip() 

104 if el.children: 

105 return ' '.join(text_from_element(c) for c in el.children).strip() 

106 return '' 

107 

108 

109# based on mistune/renderers.AstRenderer 

110 

111 

112class AstRenderer: 

113 NAME = 'ast' 

114 

115 def __init__(self): 

116 self.parser = Parser() 

117 

118 def register(self, name, method): 

119 pass 

120 

121 def _get_method(self, name): 

122 return getattr(self.parser, f'p_{name}') 

123 

124 def finalize(self, elements: List[Element]): 

125 # merge 'link attributes' with the previous element 

126 res = [] 

127 for el in elements: 

128 if el.type == 'link_attributes': 

129 if res and res[-1].type in {'image', 'link', 'codespan'}: 

130 res[-1].attributes = el.attributes 

131 continue 

132 else: 

133 el.type = 'text' 

134 res.append(el) 

135 return res 

136 

137 

138## 

139 

140 

141class Parser: 

142 def p_block_code(self, text, info=None): 

143 return Element(type='block_code', text=text, info=info) 

144 

145 def p_block_error(self, children=None): 

146 return Element(type='block_error', children=children) 

147 

148 def p_block_html(self, html): 

149 return Element(type='block_html', html=html) 

150 

151 def p_block_quote(self, children=None): 

152 return Element(type='block_quote', children=children) 

153 

154 def p_block_text(self, children=None): 

155 return Element(type='block_text', children=children) 

156 

157 def p_codespan(self, text): 

158 return Element(type='codespan', text=text) 

159 

160 def p_emphasis(self, children): 

161 return Element(type='emphasis', children=children) 

162 

163 def p_heading(self, children, level): 

164 return Element(type='heading', children=children, level=level) 

165 

166 def p_image(self, src, alt='', title=None): 

167 return Element(type='image', src=src, alt=alt, title=title) 

168 

169 def p_inline_decoration(self, classname, text): 

170 return Element(type='inline_decoration', classname=classname, text=text) 

171 

172 def p_inline_html(self, html): 

173 return Element(type='inline_html', html=html) 

174 

175 def p_linebreak(self): 

176 return Element(type='linebreak') 

177 

178 def p_link(self, target, children=None, title=None): 

179 if isinstance(children, str): 

180 children = [Element(type='text', text=children)] 

181 return Element(type='link', target=target, children=children, title=title) 

182 

183 def p_link_attributes(self, text, attributes): 

184 return Element(type='link_attributes', text=text, attributes=attributes) 

185 

186 def p_list_item(self, children, level): 

187 return Element(type='list_item', children=children, level=level) 

188 

189 def p_list(self, children, ordered, level, start=None): 

190 return Element(type='list', children=children, ordered=ordered, level=level, start=start) 

191 

192 def p_newline(self): 

193 return Element(type='newline') 

194 

195 def p_paragraph(self, children=None): 

196 return Element(type='paragraph', children=children) 

197 

198 def p_strong(self, children=None): 

199 return Element(type='strong', children=children) 

200 

201 def p_table_body(self, children=None): 

202 return Element(type='table_body', children=children) 

203 

204 def p_table_cell(self, children, align=None, is_head=False): 

205 return Element(type='table_cell', children=children, align=align, is_head=is_head) 

206 

207 def p_table_head(self, children=None): 

208 return Element(type='table_head', children=children) 

209 

210 def p_table(self, children=None): 

211 return Element(type='table', children=children) 

212 

213 def p_table_row(self, children=None): 

214 return Element(type='table_row', children=children) 

215 

216 def p_text(self, text): 

217 return Element(type='text', text=text) 

218 

219 def p_thematic_break(self): 

220 return Element(type='thematic_break') 

221 

222 

223class _Renderer: 

224 def render_children(self, el: Element): 

225 if el.children: 

226 return ''.join(self.render_element(c) for c in el.children) 

227 return '' 

228 

229 def render_element(self, el: Element): 

230 fn = getattr(self, f'r_{el.type}') 

231 return fn(el) 

232 

233 

234class MarkdownRenderer(_Renderer): 

235 def render_link(self, href, title, content, el): 

236 title = f' "{title}"' if title else '' 

237 return f'[{content}]({el.target}{title})' 

238 

239 def r_block_code(self, el: Element): 

240 lang = '' 

241 if el.info: 

242 lang = el.info.split(None, 1)[0] 

243 return f'```{lang}\n{el.text}\n```\n' 

244 

245 def r_block_error(self, el: Element): 

246 c = self.render_children(el) 

247 return f'> **ERROR:** {c}\n\n' 

248 

249 def r_block_html(self, el: Element): 

250 return el.html + '\n\n' 

251 

252 def r_block_quote(self, el: Element): 

253 c = self.render_children(el) 

254 lines = c.split('\n') 

255 return ''.join(f'> {line}\n' for line in lines) + '\n' 

256 

257 def r_block_text(self, el: Element): 

258 return self.render_children(el) 

259 

260 def r_codespan(self, el: Element): 

261 return f'`{el.text}`' 

262 

263 def r_emphasis(self, el: Element): 

264 return f'*{self.render_children(el)}*' 

265 

266 def r_heading(self, el: Element): 

267 c = self.render_children(el) 

268 return f'{"#" * el.level} {c}\n\n' 

269 

270 def r_image(self, el: Element): 

271 title = f' "{el.title}"' if el.title else '' 

272 return f'![{el.alt or ""}]({el.src}{title})' 

273 

274 def r_inline_decoration(self, el: Element): 

275 return f'{{{el.classname} {el.text}}}' 

276 

277 def r_inline_html(self, el: Element): 

278 return el.html 

279 

280 def r_linebreak(self, el: Element): 

281 return '\n' 

282 

283 def r_link(self, el: Element): 

284 c = self.render_children(el) 

285 return self.render_link(el.target, el.title, c or el.target, el) 

286 

287 def r_list_item(self, el: Element): 

288 c = self.render_children(el) 

289 indent = ' ' * (el.level - 1) 

290 marker = '1. ' if getattr(el, 'ordered', False) else '- ' 

291 return f'{indent}{marker}{c}\n' 

292 

293 def r_list(self, el: Element): 

294 c = self.render_children(el) 

295 return c + '\n' 

296 

297 def r_newline(self, el: Element): 

298 return '\n' 

299 

300 def r_paragraph(self, el: Element): 

301 c = self.render_children(el) 

302 return f'{c}\n\n' 

303 

304 def r_strong(self, el: Element): 

305 return f'**{self.render_children(el)}**' 

306 

307 def r_table(self, el: Element): 

308 return self.render_children(el) + '\n' 

309 

310 def r_table_head(self, el: Element): 

311 cells = [child for child in el.children if child.type == 'table_cell'] 

312 header = '| ' + ' | '.join(self.render_children(cell) for cell in cells) + ' |\n' 

313 

314 # Create the separator row based on alignment 

315 separators = [] 

316 for cell in cells: 

317 if cell.align == 'center': 

318 separators.append(':---:') 

319 elif cell.align == 'right': 

320 separators.append('---:') 

321 else: # left or None 

322 separators.append('---') 

323 

324 separator = '| ' + ' | '.join(separators) + ' |\n' 

325 return header + separator 

326 

327 def r_table_body(self, el: Element): 

328 return self.render_children(el) 

329 

330 def r_table_row(self, el: Element): 

331 cells = [child for child in el.children if child.type == 'table_cell'] 

332 return '| ' + ' | '.join(self.render_children(cell) for cell in cells) + ' |\n' 

333 

334 def r_table_cell(self, el: Element): 

335 return self.render_children(el) 

336 

337 def r_text(self, el: Element): 

338 return el.text 

339 

340 def r_thematic_break(self, el: Element): 

341 return '---\n\n' 

342 

343 

344class HTMLRenderer(_Renderer): 

345 def render_link(self, href, title, content, el): 

346 a = {'href': href} 

347 if title: 

348 a['title'] = escape(title) 

349 if el.attributes: 

350 a.update(el.attributes) 

351 return f'<a{attributes(a)}>{content or href}</a>' 

352 

353 ## 

354 

355 def r_block_code(self, el: Element): 

356 lang = '' 

357 atts = {} 

358 

359 lines = [s.rstrip() for s in el.text.split('\n')] 

360 while lines and not lines[0]: 

361 lines.pop(0) 

362 while lines and not lines[-1]: 

363 lines.pop() 

364 text = '\n'.join(lines) 

365 

366 if el.info: 

367 # 'javascript' or 'javascript title=...' or 'title=...' 

368 m = re.match(r'^(\w+(?=(\s|$)))?(.*)$', el.info.strip()) 

369 if m: 

370 lang = m.group(1) 

371 atts = parse_attributes(m.group(3)) 

372 

373 lang = lang or 'text' 

374 try: 

375 lexer = pygments.lexers.get_lexer_by_name(lang, stripall=True) 

376 except pygments.util.ClassNotFound: 

377 util.log.warning(f'pygments lexer {lang!r} not found') 

378 lexer = pygments.lexers.get_lexer_by_name('text', stripall=True) 

379 

380 kwargs = dict( 

381 noclasses=True, 

382 nobackground=True, 

383 ) 

384 if 'numbers' in atts: 

385 kwargs['linenos'] = 'table' 

386 kwargs['linenostart'] = atts['numbers'] 

387 

388 formatter = pygments.formatters.html.HtmlFormatter(**kwargs) 

389 html = pygments.highlight(text, lexer, formatter) 

390 

391 if 'title' in atts: 

392 html = f'<p class="highlighttitle">{escape(atts["title"])}</p>' + html 

393 

394 return html 

395 

396 def r_block_error(self, el: Element): 

397 c = self.render_children(el) 

398 return f'<div class="error">{c}</div>\n' 

399 

400 def r_block_html(self, el: Element): 

401 return el.html 

402 

403 def r_block_quote(self, el: Element): 

404 c = self.render_children(el) 

405 return f'<blockquote>\n{c}</blockquote>\n' 

406 

407 def r_block_text(self, el: Element): 

408 return self.render_children(el) 

409 

410 def r_codespan(self, el: Element): 

411 c = escape(el.text) 

412 return f'<code{attributes(el.attributes)}>{c}</code>' 

413 

414 def r_emphasis(self, el: Element): 

415 c = self.render_children(el) 

416 return f'<em>{c}</em>' 

417 

418 def r_heading(self, el: Element): 

419 c = self.render_children(el) 

420 tag = 'h' + str(el.level) 

421 s = '' 

422 if el.id: 

423 s += f' id="{el.id}"' 

424 return f'<{tag}{s}>{c}</{tag}>\n' 

425 

426 def r_image(self, el: Element): 

427 a = {} 

428 if el.src: 

429 a['src'] = el.src 

430 if el.alt: 

431 a['alt'] = escape(el.alt) 

432 if el.title: 

433 a['title'] = escape(el.title) 

434 if el.attributes: 

435 a.update(el.attributes) 

436 n = a.pop('width', '') 

437 if n: 

438 if n.isdigit(): 

439 n += 'px' 

440 a['style'] = f'width:{n};' + a.get('style', '') 

441 n = a.pop('height', '') 

442 if n: 

443 if n.isdigit(): 

444 n += 'px' 

445 a['style'] = f'height:{n};' + a.get('style', '') 

446 

447 return f'<img{attributes(a)}/>' 

448 

449 def r_inline_decoration(self, el: Element): 

450 c = escape(el.text) 

451 return f'<span class="decoration_{el.classname}">{c}</span>' 

452 

453 def r_inline_html(self, el: Element): 

454 return el.html 

455 

456 def r_linebreak(self, el: Element): 

457 return '<br/>\n' 

458 

459 def r_link(self, el: Element): 

460 c = self.render_children(el) 

461 return self.render_link(el.target, el.title, c, el) 

462 

463 def r_list_item(self, el: Element): 

464 c = self.render_children(el) 

465 return f'<li>{c}</li>\n' 

466 

467 def r_list(self, el: Element): 

468 c = self.render_children(el) 

469 tag = 'ol' if el.ordered else 'ul' 

470 a = {} 

471 if el.start: 

472 a['start'] = el.start 

473 return f'<{tag}{attributes(a)}>\n{c}\n</{tag}>\n' 

474 

475 def r_newline(self, el: Element): 

476 return '' 

477 

478 def r_paragraph(self, el: Element): 

479 c = self.render_children(el) 

480 return f'<p>{c}</p>\n' 

481 

482 def r_strong(self, el: Element): 

483 c = self.render_children(el) 

484 return f'<strong>{c}</strong>' 

485 

486 def r_table_body(self, el: Element): 

487 c = self.render_children(el) 

488 return f'<tbody>\n{c}</tbody>\n' 

489 

490 def r_table_cell(self, el: Element): 

491 c = self.render_children(el) 

492 tag = 'th' if el.is_head else 'td' 

493 a = {} 

494 if el.align: 

495 a['style'] = f'text-align:{el.align}' 

496 return f'<{tag}{attributes(a)}>{c}</{tag}>' 

497 

498 def r_table_head(self, el: Element): 

499 c = self.render_children(el) 

500 return f'<thead>\n<tr>{c}</tr>\n</thead>\n' 

501 

502 def r_table(self, el: Element): 

503 c = self.render_children(el) 

504 return f'<table class="markdown-table">{c}</table>\n' 

505 

506 def r_table_row(self, el: Element): 

507 c = self.render_children(el) 

508 return f'<tr>{c}</tr>\n' 

509 

510 def r_text(self, el: Element): 

511 return escape(el.text) 

512 

513 def r_thematic_break(self, el: Element): 

514 return '<hr/>\n' 

515 

516 

517def escape(s, quote=True): 

518 s = s.replace('&', '&amp;') 

519 s = s.replace('<', '&lt;') 

520 s = s.replace('>', '&gt;') 

521 if quote: 

522 s = s.replace('"', '&quot;') 

523 return s 

524 

525 

526def attributes(attrs): 

527 s = '' 

528 if attrs: 

529 for k, v in attrs.items(): 

530 s += f' {k}="{v}"' 

531 return s 

532 

533 

534## 

535 

536 

537_ATTRIBUTE_RE = r"""(?x) 

538 ( 

539 (\# (?P<id> [\w-]+) ) 

540 | 

541 (\. (?P<class> [\w-]+) ) 

542 | 

543 ( 

544 (?P<key> \w+) 

545 = 

546 ( 

547 " (?P<quoted> [^"]*) " 

548 | 

549 (?P<simple> \S+) 

550 ) 

551 ) 

552 ) 

553 \x20 

554""" 

555 

556 

557def parse_attributes(text): 

558 text = text.strip() + ' ' 

559 res = {} 

560 

561 while text: 

562 m = re.match(_ATTRIBUTE_RE, text) 

563 if not m: 

564 return {} 

565 

566 text = text[m.end() :].lstrip() 

567 

568 g = m.groupdict() 

569 if g['id']: 

570 res['id'] = g['id'] 

571 elif g['class']: 

572 res['class'] = (res.get('class', '') + ' ' + g['class']).strip() 

573 else: 

574 res[g['key']] = g['simple'] or g['quoted'].strip() 

575 

576 return res