Coverage for gws-app/gws/spec/generator/parser.py: 90%
427 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 23:09 +0200
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 23:09 +0200
1"""Parse py source files and create a list of units of interest"""
3import ast
4import re
6from typing import cast
8from . import base, util
11def parse(gen: base.Generator, parse_all=False):
12 _init_parser(gen)
14 for chunk in gen.chunks:
15 for path in chunk.paths.get('python', []):
16 _parse_path(gen, path, chunk.name, chunk.sourceDir, parse_all)
19def _init_parser(gen: base.Generator):
20 for b in base.v.BUILTINS:
21 gen.add_type(c=base.c.ATOM, name=b)
24def _parse_path(gen: base.Generator, path: str, base_name: str, base_dir: str, parse_all=True):
25 pp = None
27 base.log.debug(f'parsing {path=}')
29 try:
30 # <base_dir>/a/b/__init__.py => <base_name>.a.b
31 # <base_dir>/a/b/c.py => <base_name>.a.b.c
33 if not path.startswith(base_dir):
34 raise ValueError(f'invalid path {path!r}')
35 p = path[len(base_dir) :].split('/')
36 f = p.pop().split(DOT)[0]
37 if f != '__init__':
38 p.append(f)
39 mod_name = base_name + DOT.join(p)
41 text = util.read_file(path)
42 pp = _PythonParser(gen, mod_name, path, text, parse_all)
43 pp.run()
45 except Exception as exc:
46 lineno = '?'
47 if pp and pp.context:
48 lineno = pp.context[-1].lineno
49 msg = str(exc.args[0]) if hasattr(exc, 'args') else repr(exc)
50 raise base.GeneratorError(f'parse error: {msg} in {path}:{lineno}')
53##
56class _PythonParser:
57 lines: list[str]
58 moduleNode: ast.Module
59 moduleName: str
60 docs: dict[int, str]
61 imports: dict[str, str]
63 def __init__(self, gen: base.Generator, module_name: str, path: str, text: str, parse_all: bool):
64 self.gen = gen
65 self.moduleName = module_name
66 self.modulePath = path
67 self.tModule = ''
68 self.text = text
69 self.source_lines = [''] + self.text.splitlines()
70 self.is_init = path.endswith('__init__.py')
71 self.context: list = []
72 self.parseAll = parse_all
74 def run(self):
75 if any('# gws:nospec' in ln for ln in self.source_lines):
76 return
78 tree = ast.parse(self.text)
80 for node in ast.walk(tree):
81 if _cls(node) == 'Module':
82 self.moduleNode = cast(ast.Module, node)
83 break
84 else:
85 raise ValueError('module node not found')
87 typ = self.add(
88 c=base.c.MODULE,
89 name=self.moduleName,
90 modPath=self.modulePath,
91 doc=self.inner_doc(self.moduleNode),
92 )
93 self.tModule = typ.uid
95 self.imports = self.prepare_imports()
97 for node in self.nodes(self.moduleNode.body):
98 cc = _cls(node)
99 if cc == 'Expr':
100 self.parse_ext_declaration(node)
101 elif cc == 'ClassDef':
102 self.parse_class(node)
103 elif cc in {'Assign', 'AnnAssign'}:
104 self.parse_assign(
105 node,
106 self.outer_doc(node, self.moduleNode.body),
107 annotated=(cc == 'AnnAssign'),
108 )
110 def prepare_imports(self):
111 # map import names to module names
112 imp = {}
114 # "import a.b.c as foo" => {foo: a.b.c}
115 for node in self.nodes(self.moduleNode.body, 'Import'):
116 for nn in node.names:
117 imp[nn.asname or nn.name] = nn.name
119 for node in self.nodes(self.moduleNode.body, 'ImportFrom'):
120 # "from a.b.c import foo" => {foo: a.b.c.foo}
121 if node.level == 0:
122 for nn in node.names:
123 imp[nn.asname or nn.name] = node.module + DOT + nn.name
124 continue
126 # "from . import foo" => {foo: "<mod-name>.{
127 # "from .. import foo" => "<mod-name-before-dot>.foo"
128 # "from ..abc import foo" => "<mod-name-before-dot>.abc.foo"
130 m = self.moduleName.split(DOT)
131 level = node.level - self.is_init
132 if level:
133 m = m[:-level]
134 m = DOT.join(m)
135 if node.module:
136 m += DOT + node.module
137 for nn in node.names:
138 imp[nn.asname or nn.name] = m + DOT + nn.name
140 # create aliases for imported types
141 for alias, target in imp.items():
142 if _is_type_name(alias) and not _builtin_name(alias):
143 self.gen.aliases[self.moduleName + DOT + alias] = target
145 return imp
147 def parse_ext_declaration(self, node):
148 if _cls(node.value) != 'Call':
149 return
150 call = cast(ast.Call, node.value)
151 try:
152 decl = _name(call.func)
153 except ValueError:
154 return
155 if not decl.startswith(base.v.EXT_DECL_PREFIX):
156 return
157 if not call.args:
158 raise ValueError('invalid gws.ext declaration')
160 args = list(call.args)
161 tail = decl.split(DOT).pop() + DOT + _name(args.pop(0))
162 self.add(c=base.c.EXT, extName=base.v.EXT_OBJECT_PREFIX + tail, tTarget=self.qname(args.pop(0) if args else base.v.EXT_OBJECT_CLASS))
163 self.add(c=base.c.EXT, extName=base.v.EXT_CONFIG_PREFIX + tail, tTarget=self.qname(args.pop(0) if args else base.v.EXT_CONFIG_CLASS))
164 self.add(c=base.c.EXT, extName=base.v.EXT_PROPS_PREFIX + tail, tTarget=self.qname(args.pop(0) if args else base.v.EXT_PROPS_CLASS))
166 def parse_assign(self, node, doc, annotated):
167 """Parse a module level assignment, possibly a type alias or a constant."""
169 if annotated:
170 name_node = node.target
171 else:
172 if len(node.targets) > 1:
173 return
174 name_node = node.targets[0]
176 if _cls(name_node) != 'Name' or not _is_type_name(name_node.id):
177 return
179 typ = None
180 if hasattr(node, 'annotation'):
181 typ = self.type_from_node(node.annotation)
183 if typ and typ.name == 'TypeAlias':
184 # type alias
185 target_type = self.type_from_node(node.value)
186 self.add(
187 c=base.c.TYPE,
188 doc=doc,
189 ident=name_node.id,
190 name=self.qname(name_node),
191 tTarget=target_type.uid,
192 )
194 return
196 # possibly, a constant
198 c, value = self.parse_const_value(node.value)
199 if c == base.c.LITERAL:
200 self.add(
201 c=base.c.CONSTANT,
202 doc=doc,
203 ident=name_node.id,
204 name=self.qname(name_node),
205 constValue=value,
206 )
208 def parse_class(self, node):
209 if not _is_type_name(node.name):
210 return
212 supers = [self.qname(b) for b in node.bases]
213 if supers and _builtin_name(supers[0]) == 'Enum':
214 return self.parse_enum(node)
216 typ = self.add(
217 c=base.c.CLASS,
218 doc=self.inner_doc(node),
219 ident=node.name,
220 name=self.qname(node),
221 tSupers=[self.type_from_name(s).uid for s in supers if not _builtin_name(s)],
222 )
224 for nn in self.nodes(node.body):
225 cc = _cls(nn)
226 if cc in {'Assign', 'AnnAssign'}:
227 doc = self.outer_doc(nn, node.body)
228 self.parse_property(typ, nn, doc, annotated=(cc == 'AnnAssign'))
229 elif cc == 'FunctionDef':
230 self.parse_method(typ, nn)
232 def parse_enum(self, node):
233 docs = {}
234 vals = {}
236 for nn in self.nodes(node.body):
237 if _cls(nn) == 'Assign':
238 ident = nn.targets[0].id
239 c, value = self.parse_const_value(nn.value)
240 if c != base.c.LITERAL:
241 raise ValueError(f'invalid Enum item {ident!r}')
242 docs[ident] = self.outer_doc(nn, node.body)
243 vals[ident] = value
245 self.add(
246 c=base.c.ENUM,
247 doc=self.inner_doc(node),
248 ident=node.name,
249 name=self.qname(node),
250 enumDocs=docs,
251 enumValues=vals,
252 )
254 def parse_property(self, owner_typ: base.Type, node, doc: str, annotated: bool):
255 ident = node.target.id if annotated else node.targets[0].id
256 if ident.startswith('_'):
257 return
259 typ = self.add(
260 c=base.c.PROPERTY,
261 name=owner_typ.name + DOT + ident,
262 doc=doc,
263 ident=ident,
264 tOwner=owner_typ.uid,
265 tValue='any',
266 defaultValue=None,
267 hasDefault=False,
268 )
270 c, value = self.parse_const_value(node.value)
271 if c == base.c.LITERAL:
272 typ.defaultValue = value
273 typ.hasDefault = True
274 if c == base.c.EXPR:
275 # evaluated later on in normalizer._evaluate_defaults
276 typ.defaultExpression = [c, value]
278 property_type = None
279 if hasattr(node, 'annotation'):
280 property_type = self.type_from_node(node.annotation)
282 if not property_type:
283 t = 'any'
284 if typ.hasDefault:
285 t = type(typ.defaultValue).__name__
286 property_type = self.type_from_name(t)
288 if property_type:
289 if property_type.c == base.c.OPTIONAL:
290 typ.tValue = property_type.tTarget
291 if not typ.hasDefault:
292 typ.defaultValue = None
293 typ.hasDefault = True
294 else:
295 typ.tValue = property_type.uid
297 def parse_method(self, owner_typ: base.Type, node):
298 ext = self.gws_decorator(node, 'method')
300 if not ext and not self.parseAll:
301 return
303 typ = self.add(
304 c=base.c.METHOD,
305 doc=self.inner_doc(node),
306 ident=node.name,
307 name=owner_typ.name + DOT + node.name,
308 tOwner=owner_typ.uid,
309 tArgs=[],
310 tArg='',
311 tReturn='any',
312 extName=ext,
313 )
315 args = node.args.args
316 if not self.parseAll:
317 # ext methods have only one spec'able arg (the last one)
318 args = args[-1:]
320 for arg_node in args:
321 t = 'any'
322 if arg_node.annotation:
323 arg_type = self.type_from_node(arg_node.annotation)
324 if arg_type:
325 t = arg_type.uid
326 typ.tArgs.append(t)
327 typ.tArg = t
329 if node.returns:
330 ret_type = self.type_from_node(node.returns)
331 typ.tReturn = ret_type.uid if ret_type else 'any'
333 def gws_decorator(self, node, kind):
334 for d in getattr(node, 'decorator_list', []):
335 if _cls(d) != 'Call' or len(d.args) != 1:
336 continue
338 name = _name(d.func)
339 if not name.startswith(base.v.EXT_PREFIX):
340 continue
342 name = name + DOT + _name(d.args[0])
343 ns = name.split(DOT)
345 if kind == 'method':
346 if len(ns) == 5:
347 # gws.ext.command.api.mapGetBox
348 return name
349 raise ValueError(f'invalid function decorator {name!r}')
351 return ''
353 ##
355 def type_from_node(self, node) -> base.Type:
356 # here, node is a type declaration (an alias or an annotation)
358 cc = _cls(node)
360 # foo: SomeType
361 if cc in {'Str', 'Name', 'Attribute', 'Constant'}:
362 return self.type_from_name(self.qname(node))
364 # foo: Generic[SomeType]
365 if cc == 'Subscript':
366 # Subscript(slice=Index(value=Name... in py3.8
367 # Subscript(slice=Name... in py3.9
368 return self.type_from_name(self.qname(node.value), node.slice.value if _cls(node.slice) == 'Index' else node.slice)
370 # foo: [SomeType, SomeType]
371 if cc in {'List', 'Tuple'}:
372 item_types = [self.type_from_node(e) for e in node.elts]
373 return self.add(c=base.c.TUPLE, tItems=[typ.uid for typ in item_types])
375 # foo: SomeType | SomeType | ...
376 if cc == 'BinOp' and _cls(node.op) == 'BitOr':
377 item_types = []
378 while _cls(node) == 'BinOp' and _cls(node.op) == 'BitOr':
379 item_types.insert(0, self.type_from_node(node.right))
380 node = node.left
381 item_types.insert(0, self.type_from_node(node))
382 return self.add(c=base.c.UNION, tItems=[typ.uid for typ in item_types])
384 raise ValueError(f'unsupported type: {cc!r}')
386 def type_from_name(self, name: str, param=None) -> base.Type:
387 if not param and name in self.gen.typeDict:
388 return self.gen.typeDict[name]
390 g = _builtin_name(name)
392 if g == 'Any':
393 return self.gen.typeDict['any']
395 # literal - 'param' is a value or a tuple of values
396 if g == 'Literal':
397 if not param:
398 raise ValueError('invalid literal')
399 elts = param.elts if _cls(param) == 'Tuple' else [param]
400 vals = [self.parse_literal_value(e) for e in elts]
401 return self.add(c=base.c.LITERAL, literalValues=vals)
403 # in other cases, 'param' is a type or a tuple of types
405 param_typ = param_items = None
406 if param:
407 param_typ = self.type_from_node(param)
408 if param_typ.c == base.c.TUPLE:
409 param_items = param_typ.tItems
411 if g == 'Optional':
412 if not param_typ:
413 raise ValueError('invalid optional type')
414 return self.add(c=base.c.OPTIONAL, tTarget=param_typ.uid)
416 if g.lower() == 'list':
417 return self.add(c=base.c.LIST, tItem=param_typ.uid if param_typ else 'any')
419 if g.lower() == 'iterable':
420 return self.add(c=base.c.LIST, tItem=param_typ.uid if param_typ else 'any')
422 if g.lower() == 'set':
423 return self.add(c=base.c.SET, tItem=param_typ.uid if param_typ else 'any')
425 if g.lower() == 'dict':
426 if param_items:
427 if len(param_items) != 2:
428 raise ValueError('invalid dict arguments')
429 key, val = param_items
430 elif param_typ:
431 key = 'str'
432 val = param_typ.uid
433 else:
434 key = 'str'
435 val = 'any'
436 return self.add(c=base.c.DICT, tKey=key, tValue=val)
438 if g == 'Union':
439 if not param_items:
440 raise ValueError('invalid Union')
441 return self.add(c=base.c.UNION, tItems=sorted(param_items))
443 if g.lower() == 'tuple':
444 if not param_typ:
445 return self.add(c=base.c.TUPLE, tItems=[])
446 if not param_items:
447 raise ValueError('invalid Tuple')
448 return self.add(c=base.c.TUPLE, tItems=list(param_items))
450 if g.lower() == 'callable':
451 if not param_typ:
452 return self.add(c=base.c.CALLABLE, tItems=[])
453 if not param_items:
454 raise ValueError('invalid Callable')
455 return self.add(c=base.c.CALLABLE, tItems=list(param_items))
457 if param:
458 raise ValueError('invalid generic type')
460 if g:
461 base.log.debug(f'created ATOM for {name!r}, builtin {g!r}')
462 return self.add(c=base.c.ATOM, name=name)
464 return self.add(c=base.c.UNDEFINED, name=name)
466 ##
468 @property
469 def pos(self):
470 return self.modulePath + ':' + str(self.context[-1].lineno if self.context else 0)
472 def add(self, **kwargs) -> base.Type:
473 kwargs['pos'] = self.pos
474 kwargs['tModule'] = self.tModule
475 kwargs['doc'] = (kwargs.get('doc') or '').strip()
476 typ = self.gen.add_type(**kwargs)
477 base.log.debug(f'added {typ.uid=} {vars(typ)=}')
478 return typ
480 def inner_doc(self, node):
481 """Returns a normal docstring (first child of the node)."""
483 return self.docstring_from(node.body[0]) if node.body else ''
485 def outer_doc(self, node, nodes):
486 """Returns a docstring which immediately follows this node in a list of nodes."""
488 try:
489 nxt = nodes[nodes.index(node) + 1]
490 except IndexError:
491 return ''
492 return self.docstring_from(nxt)
494 def docstring_from(self, node):
495 """If node is a docstring, return its content."""
497 if _cls(node) == 'Expr':
498 if _cls(node.value) == 'Constant':
499 v = node.value.value
500 if isinstance(v, str):
501 return v.strip()
502 if _cls(node.value) == 'Str':
503 return node.value.s.strip()
504 return ''
506 def qname(self, node):
507 name = _name(node)
508 b = _builtin_name(name)
509 if b:
510 return b
511 name = self.qualified(name)
512 return name
514 def qualified(self, name):
515 for alias, mod in self.imports.items():
516 if name == mod or name.startswith(mod + DOT):
517 return name
518 if name == alias:
519 return mod
520 if name.startswith(alias + DOT):
521 return mod + DOT + name[(len(alias) + 1) :]
522 return self.moduleName + DOT + name
524 def nodes(self, where, *cls):
525 for node in where:
526 if not cls or _cls(node) in cls:
527 self.context.append(node)
528 yield node
529 self.context.pop()
531 ##
533 def parse_literal_value(self, node):
534 c, value = self.parse_const_value(node)
535 if c == base.c.LITERAL and _is_scalar(value):
536 return value
537 raise ValueError(f'invalid literal value')
539 def parse_const_value(self, node):
540 if node is None:
541 return None, None
543 cc = _cls(node)
545 if cc == 'Num':
546 return base.c.LITERAL, node.n
548 if cc in ('Str', 'Bytes'):
549 return base.c.LITERAL, node.s
551 if cc in ('Constant', 'NameConstant'):
552 return base.c.LITERAL, node.value
554 if cc in {'Name', 'Attribute'}:
555 # SomeConstant or Something.someKey - possible constant/enum value
556 return base.c.EXPR, self.qname(node)
558 if cc in {'List', 'Tuple'}:
559 exprlst, lst = [], []
560 for elt in node.elts:
561 c, value = self.parse_const_value(elt)
562 if not c:
563 return False, None
564 if c == base.c.LITERAL:
565 lst.append(value)
566 exprlst.append([c, value])
567 if len(lst) == len(exprlst):
568 return base.c.LITERAL, lst
569 return base.c.EXPR, exprlst
571 if cc == 'Dict':
572 exprdct, dct = {}, {}
573 for k, v in zip(node.keys, node.values):
574 c, key = self.parse_const_value(k)
575 if c != base.c.LITERAL:
576 return False, None
577 c, value = self.parse_const_value(v)
578 if not c:
579 return False, None
580 if c == base.c.LITERAL:
581 dct[key] = value
582 exprdct[key] = [c, value]
583 if len(dct) == len(exprdct):
584 return base.c.LITERAL, dct
585 return base.c.EXPR, exprdct
587 return None, None
590##
593def _is_scalar(val):
594 return isinstance(val, (str, bytes, int, float, bool))
597def _is_type_name(name: str) -> bool:
598 return bool(name) and bool(re.match(r'^[A-Z]', name))
601def _builtin_name(name: str) -> str:
602 if name in base.v.BUILTINS:
603 return name
604 if name in base.v.BUILTIN_TYPES:
605 return name
606 for b in base.v.BUILTIN_TYPES:
607 if name.endswith(DOT + b):
608 return b
609 if name.startswith(b + DOT):
610 return b
611 return ''
614def _is_a(full_name: str, name: str) -> bool:
615 # if the name is like 'Object', check if the full name ends with it
616 # if the name is like 'some.module', check if the full name starts with it
617 if name[0].isupper():
618 return full_name == name or full_name.endswith(DOT + name)
619 return full_name == name or full_name.startswith(name + DOT)
622def _cls(node):
623 return node.__class__.__name__
626def _name(node):
627 if isinstance(node, str):
628 return node
630 cc = _cls(node)
632 if cc == 'Name':
633 return node.id
634 if cc == 'Attribute':
635 return _name(node.value) + DOT + node.attr
636 if cc == 'Str':
637 return node.s
638 if cc == 'Constant':
639 v = node.value
640 return v if isinstance(v, str) else repr(v)
641 if cc == 'ClassDef':
642 return node.name
643 if cc == 'FunctionDef':
644 return node.name
646 raise ValueError(f'node name missing in {cc!r}')
649def _camelize(name):
650 p = name.split('_')
651 return p[0] + ''.join(_ucfirst(s) for s in p[1:])
654def _ucfirst(s):
655 return s[0].upper() + s[1:]
658DOT = '.'