Coverage for gws-app/gws/lib/vendor/dog/markdown.py: 0%
390 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 23:09 +0200
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 23:09 +0200
1import re
2from typing import List
4import mistune
5from mistune import Markdown
7import pygments
8import pygments.util
9import pygments.lexers
10import pygments.formatters.html
12from . import util
15class Element(util.Data):
16 type: str
18 align: str
19 alt: str
20 children: List['Element']
21 info: str
22 is_head: bool
23 level: int
24 target: str
25 ordered: bool
26 sid: str
27 src: str
28 start: str
29 text: str
30 html: str
31 title: str
33 classname: str # inline_decoration_plugin
34 attributes: dict # link_attributes_plugin
36 def __repr__(self):
37 return repr(vars(self))
40def parser() -> Markdown:
41 md = mistune.create_markdown(renderer=AstRenderer(), plugins=['table', 'url', inline_decoration_plugin, link_attributes_plugin])
42 return md
45# plugin API reference: https://mistune.lepture.com/en/v2.0.5/advanced.html#create-plugins
47# plugin: inline decorations
48# {someclass some text} => <span class="decoration_someclass">some text</span>
51def inline_decoration_plugin(md):
52 name = 'inline_decoration'
53 pattern = r'\{(\w+ .+?)\}'
55 def parser(inline, m, state):
56 return name, *m.group(1).split(None, 1)
58 md.inline.register_rule(name, pattern, parser)
59 md.inline.rules.append(name)
62# plugin: link attributes
63# https://pandoc.org/MANUAL.html#extension-link_attributes
66def link_attributes_plugin(md):
67 name = 'link_attributes'
68 pattern = r'(?<=[)`]){.+?}'
70 def parser(inline, m, state):
71 text = m.group(0)
72 atts = parse_attributes(text[1:-1])
73 if atts:
74 return name, text, atts
75 return 'text', text
77 md.inline.register_rule(name, pattern, parser)
78 md.inline.rules.append(name)
81##
84def process(text):
85 md = parser()
86 els = md(text)
87 rd = HTMLRenderer()
88 return ''.join(rd.render_element(el) for el in els)
91def strip_text_content(el: Element):
92 while el.children:
93 if not el.children[-1].text:
94 return
95 el.children[-1].text = el.children[-1].text.rstrip()
96 if len(el.children[-1].text) > 0:
97 return
98 el.children.pop()
101def text_from_element(el: Element) -> str:
102 if el.text:
103 return el.text.strip()
104 if el.children:
105 return ' '.join(text_from_element(c) for c in el.children).strip()
106 return ''
109# based on mistune/renderers.AstRenderer
112class AstRenderer:
113 NAME = 'ast'
115 def __init__(self):
116 self.parser = Parser()
118 def register(self, name, method):
119 pass
121 def _get_method(self, name):
122 return getattr(self.parser, f'p_{name}')
124 def finalize(self, elements: List[Element]):
125 # merge 'link attributes' with the previous element
126 res = []
127 for el in elements:
128 if el.type == 'link_attributes':
129 if res and res[-1].type in {'image', 'link', 'codespan'}:
130 res[-1].attributes = el.attributes
131 continue
132 else:
133 el.type = 'text'
134 res.append(el)
135 return res
138##
141class Parser:
142 def p_block_code(self, text, info=None):
143 return Element(type='block_code', text=text, info=info)
145 def p_block_error(self, children=None):
146 return Element(type='block_error', children=children)
148 def p_block_html(self, html):
149 return Element(type='block_html', html=html)
151 def p_block_quote(self, children=None):
152 return Element(type='block_quote', children=children)
154 def p_block_text(self, children=None):
155 return Element(type='block_text', children=children)
157 def p_codespan(self, text):
158 return Element(type='codespan', text=text)
160 def p_emphasis(self, children):
161 return Element(type='emphasis', children=children)
163 def p_heading(self, children, level):
164 return Element(type='heading', children=children, level=level)
166 def p_image(self, src, alt='', title=None):
167 return Element(type='image', src=src, alt=alt, title=title)
169 def p_inline_decoration(self, classname, text):
170 return Element(type='inline_decoration', classname=classname, text=text)
172 def p_inline_html(self, html):
173 return Element(type='inline_html', html=html)
175 def p_linebreak(self):
176 return Element(type='linebreak')
178 def p_link(self, target, children=None, title=None):
179 if isinstance(children, str):
180 children = [Element(type='text', text=children)]
181 return Element(type='link', target=target, children=children, title=title)
183 def p_link_attributes(self, text, attributes):
184 return Element(type='link_attributes', text=text, attributes=attributes)
186 def p_list_item(self, children, level):
187 return Element(type='list_item', children=children, level=level)
189 def p_list(self, children, ordered, level, start=None):
190 return Element(type='list', children=children, ordered=ordered, level=level, start=start)
192 def p_newline(self):
193 return Element(type='newline')
195 def p_paragraph(self, children=None):
196 return Element(type='paragraph', children=children)
198 def p_strong(self, children=None):
199 return Element(type='strong', children=children)
201 def p_table_body(self, children=None):
202 return Element(type='table_body', children=children)
204 def p_table_cell(self, children, align=None, is_head=False):
205 return Element(type='table_cell', children=children, align=align, is_head=is_head)
207 def p_table_head(self, children=None):
208 return Element(type='table_head', children=children)
210 def p_table(self, children=None):
211 return Element(type='table', children=children)
213 def p_table_row(self, children=None):
214 return Element(type='table_row', children=children)
216 def p_text(self, text):
217 return Element(type='text', text=text)
219 def p_thematic_break(self):
220 return Element(type='thematic_break')
223class _Renderer:
224 def render_children(self, el: Element):
225 if el.children:
226 return ''.join(self.render_element(c) for c in el.children)
227 return ''
229 def render_element(self, el: Element):
230 fn = getattr(self, f'r_{el.type}')
231 return fn(el)
234class MarkdownRenderer(_Renderer):
235 def render_link(self, href, title, content, el):
236 title = f' "{title}"' if title else ''
237 return f'[{content}]({el.target}{title})'
239 def r_block_code(self, el: Element):
240 lang = ''
241 if el.info:
242 lang = el.info.split(None, 1)[0]
243 return f'```{lang}\n{el.text}\n```\n'
245 def r_block_error(self, el: Element):
246 c = self.render_children(el)
247 return f'> **ERROR:** {c}\n\n'
249 def r_block_html(self, el: Element):
250 return el.html + '\n\n'
252 def r_block_quote(self, el: Element):
253 c = self.render_children(el)
254 lines = c.split('\n')
255 return ''.join(f'> {line}\n' for line in lines) + '\n'
257 def r_block_text(self, el: Element):
258 return self.render_children(el)
260 def r_codespan(self, el: Element):
261 return f'`{el.text}`'
263 def r_emphasis(self, el: Element):
264 return f'*{self.render_children(el)}*'
266 def r_heading(self, el: Element):
267 c = self.render_children(el)
268 return f'{"#" * el.level} {c}\n\n'
270 def r_image(self, el: Element):
271 title = f' "{el.title}"' if el.title else ''
272 return f''
274 def r_inline_decoration(self, el: Element):
275 return f'{{{el.classname} {el.text}}}'
277 def r_inline_html(self, el: Element):
278 return el.html
280 def r_linebreak(self, el: Element):
281 return '\n'
283 def r_link(self, el: Element):
284 c = self.render_children(el)
285 return self.render_link(el.target, el.title, c or el.target, el)
287 def r_list_item(self, el: Element):
288 c = self.render_children(el)
289 indent = ' ' * (el.level - 1)
290 marker = '1. ' if getattr(el, 'ordered', False) else '- '
291 return f'{indent}{marker}{c}\n'
293 def r_list(self, el: Element):
294 c = self.render_children(el)
295 return c + '\n'
297 def r_newline(self, el: Element):
298 return '\n'
300 def r_paragraph(self, el: Element):
301 c = self.render_children(el)
302 return f'{c}\n\n'
304 def r_strong(self, el: Element):
305 return f'**{self.render_children(el)}**'
307 def r_table(self, el: Element):
308 return self.render_children(el) + '\n'
310 def r_table_head(self, el: Element):
311 cells = [child for child in el.children if child.type == 'table_cell']
312 header = '| ' + ' | '.join(self.render_children(cell) for cell in cells) + ' |\n'
314 # Create the separator row based on alignment
315 separators = []
316 for cell in cells:
317 if cell.align == 'center':
318 separators.append(':---:')
319 elif cell.align == 'right':
320 separators.append('---:')
321 else: # left or None
322 separators.append('---')
324 separator = '| ' + ' | '.join(separators) + ' |\n'
325 return header + separator
327 def r_table_body(self, el: Element):
328 return self.render_children(el)
330 def r_table_row(self, el: Element):
331 cells = [child for child in el.children if child.type == 'table_cell']
332 return '| ' + ' | '.join(self.render_children(cell) for cell in cells) + ' |\n'
334 def r_table_cell(self, el: Element):
335 return self.render_children(el)
337 def r_text(self, el: Element):
338 return el.text
340 def r_thematic_break(self, el: Element):
341 return '---\n\n'
344class HTMLRenderer(_Renderer):
345 def render_link(self, href, title, content, el):
346 a = {'href': href}
347 if title:
348 a['title'] = escape(title)
349 if el.attributes:
350 a.update(el.attributes)
351 return f'<a{attributes(a)}>{content or href}</a>'
353 ##
355 def r_block_code(self, el: Element):
356 lang = ''
357 atts = {}
359 lines = [s.rstrip() for s in el.text.split('\n')]
360 while lines and not lines[0]:
361 lines.pop(0)
362 while lines and not lines[-1]:
363 lines.pop()
364 text = '\n'.join(lines)
366 if el.info:
367 # 'javascript' or 'javascript title=...' or 'title=...'
368 m = re.match(r'^(\w+(?=(\s|$)))?(.*)$', el.info.strip())
369 if m:
370 lang = m.group(1)
371 atts = parse_attributes(m.group(3))
373 lang = lang or 'text'
374 try:
375 lexer = pygments.lexers.get_lexer_by_name(lang, stripall=True)
376 except pygments.util.ClassNotFound:
377 util.log.warning(f'pygments lexer {lang!r} not found')
378 lexer = pygments.lexers.get_lexer_by_name('text', stripall=True)
380 kwargs = dict(
381 noclasses=True,
382 nobackground=True,
383 )
384 if 'numbers' in atts:
385 kwargs['linenos'] = 'table'
386 kwargs['linenostart'] = atts['numbers']
388 formatter = pygments.formatters.html.HtmlFormatter(**kwargs)
389 html = pygments.highlight(text, lexer, formatter)
391 if 'title' in atts:
392 html = f'<p class="highlighttitle">{escape(atts["title"])}</p>' + html
394 return html
396 def r_block_error(self, el: Element):
397 c = self.render_children(el)
398 return f'<div class="error">{c}</div>\n'
400 def r_block_html(self, el: Element):
401 return el.html
403 def r_block_quote(self, el: Element):
404 c = self.render_children(el)
405 return f'<blockquote>\n{c}</blockquote>\n'
407 def r_block_text(self, el: Element):
408 return self.render_children(el)
410 def r_codespan(self, el: Element):
411 c = escape(el.text)
412 return f'<code{attributes(el.attributes)}>{c}</code>'
414 def r_emphasis(self, el: Element):
415 c = self.render_children(el)
416 return f'<em>{c}</em>'
418 def r_heading(self, el: Element):
419 c = self.render_children(el)
420 tag = 'h' + str(el.level)
421 s = ''
422 if el.id:
423 s += f' id="{el.id}"'
424 return f'<{tag}{s}>{c}</{tag}>\n'
426 def r_image(self, el: Element):
427 a = {}
428 if el.src:
429 a['src'] = el.src
430 if el.alt:
431 a['alt'] = escape(el.alt)
432 if el.title:
433 a['title'] = escape(el.title)
434 if el.attributes:
435 a.update(el.attributes)
436 n = a.pop('width', '')
437 if n:
438 if n.isdigit():
439 n += 'px'
440 a['style'] = f'width:{n};' + a.get('style', '')
441 n = a.pop('height', '')
442 if n:
443 if n.isdigit():
444 n += 'px'
445 a['style'] = f'height:{n};' + a.get('style', '')
447 return f'<img{attributes(a)}/>'
449 def r_inline_decoration(self, el: Element):
450 c = escape(el.text)
451 return f'<span class="decoration_{el.classname}">{c}</span>'
453 def r_inline_html(self, el: Element):
454 return el.html
456 def r_linebreak(self, el: Element):
457 return '<br/>\n'
459 def r_link(self, el: Element):
460 c = self.render_children(el)
461 return self.render_link(el.target, el.title, c, el)
463 def r_list_item(self, el: Element):
464 c = self.render_children(el)
465 return f'<li>{c}</li>\n'
467 def r_list(self, el: Element):
468 c = self.render_children(el)
469 tag = 'ol' if el.ordered else 'ul'
470 a = {}
471 if el.start:
472 a['start'] = el.start
473 return f'<{tag}{attributes(a)}>\n{c}\n</{tag}>\n'
475 def r_newline(self, el: Element):
476 return ''
478 def r_paragraph(self, el: Element):
479 c = self.render_children(el)
480 return f'<p>{c}</p>\n'
482 def r_strong(self, el: Element):
483 c = self.render_children(el)
484 return f'<strong>{c}</strong>'
486 def r_table_body(self, el: Element):
487 c = self.render_children(el)
488 return f'<tbody>\n{c}</tbody>\n'
490 def r_table_cell(self, el: Element):
491 c = self.render_children(el)
492 tag = 'th' if el.is_head else 'td'
493 a = {}
494 if el.align:
495 a['style'] = f'text-align:{el.align}'
496 return f'<{tag}{attributes(a)}>{c}</{tag}>'
498 def r_table_head(self, el: Element):
499 c = self.render_children(el)
500 return f'<thead>\n<tr>{c}</tr>\n</thead>\n'
502 def r_table(self, el: Element):
503 c = self.render_children(el)
504 return f'<table class="markdown-table">{c}</table>\n'
506 def r_table_row(self, el: Element):
507 c = self.render_children(el)
508 return f'<tr>{c}</tr>\n'
510 def r_text(self, el: Element):
511 return escape(el.text)
513 def r_thematic_break(self, el: Element):
514 return '<hr/>\n'
517def escape(s, quote=True):
518 s = s.replace('&', '&')
519 s = s.replace('<', '<')
520 s = s.replace('>', '>')
521 if quote:
522 s = s.replace('"', '"')
523 return s
526def attributes(attrs):
527 s = ''
528 if attrs:
529 for k, v in attrs.items():
530 s += f' {k}="{v}"'
531 return s
534##
537_ATTRIBUTE_RE = r"""(?x)
538 (
539 (\# (?P<id> [\w-]+) )
540 |
541 (\. (?P<class> [\w-]+) )
542 |
543 (
544 (?P<key> \w+)
545 =
546 (
547 " (?P<quoted> [^"]*) "
548 |
549 (?P<simple> \S+)
550 )
551 )
552 )
553 \x20
554"""
557def parse_attributes(text):
558 text = text.strip() + ' '
559 res = {}
561 while text:
562 m = re.match(_ATTRIBUTE_RE, text)
563 if not m:
564 return {}
566 text = text[m.end() :].lstrip()
568 g = m.groupdict()
569 if g['id']:
570 res['id'] = g['id']
571 elif g['class']:
572 res['class'] = (res.get('class', '') + ' ' + g['class']).strip()
573 else:
574 res[g['key']] = g['simple'] or g['quoted'].strip()
576 return res