Coverage for gws-app/gws/gis/cache/core.py: 30%
157 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 23:09 +0200
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 23:09 +0200
1"""Cache management."""
3import math
4import os
5import re
7import yaml
9import gws
10import gws.config
11import gws.gis.mpx.config
12import gws.lib.osx
13import gws.lib.lock
14import gws.lib.datetimex as datetimex
16DEFAULT_MAX_TIME = 600
17DEFAULT_CONCURRENCY = 1
18DEFAULT_MAX_AGE = 7 * 24 * 3600
19DEFAULT_MAX_LEVEL = 3
22class Config(gws.Config):
23 """Global cache options"""
25 seedingMaxTime: gws.Duration = '600'
26 """Max. time for a seeding job."""
27 seedingConcurrency: int = 1
28 """Number of concurrent seeding jobs."""
31class Grid(gws.Data):
32 uid: str
33 z: int
34 res: float
35 maxX: int
36 maxY: int
37 totalTiles: int
38 cachedTiles: int
41class Entry(gws.Data):
42 uid: str
43 layers: list[gws.Layer]
44 mpxCache: dict
45 grids: dict[int, Grid]
46 config: dict
47 counts: dict
48 dirname: str
51class Status(gws.Data):
52 entries: list[Entry]
53 staleDirs: list[str]
56def status(root: gws.Root, layer_uids=None, with_counts=True) -> Status:
57 """Retrieve cache status information.
59 Args:
60 root: Application root object.
61 layer_uids: Optional list of layer UIDs to filter by.
62 with_counts: Whether to include tile counts in the status.
64 Returns:
65 Status object containing cache entries and stale directories.
66 """
67 mpx_config = gws.gis.mpx.config.create(root)
69 entries = []
70 if mpx_config:
71 entries = _enum_entries(root, mpx_config, layer_uids)
73 if entries and with_counts:
74 _update_file_counts(entries)
76 all_dirs = list(gws.lib.osx.find_directories(gws.c.MAPPROXY_CACHE_DIR, deep=False))
77 valid_dirs = set(e.dirname for e in entries)
79 return Status(
80 entries=entries,
81 staleDirs=[d for d in all_dirs if d not in valid_dirs],
82 )
85def cleanup(root: gws.Root):
86 """Remove stale cache directories.
88 Args:
89 root: Application root object.
91 Returns:
92 None. Stale directories are removed from the filesystem.
93 """
94 s = status(root, with_counts=False)
95 for d in s.staleDirs:
96 _remove_dir(d)
99def drop(root: gws.Root, layer_uids=None):
100 """Remove active cache directories.
102 Args:
103 root: Application root object.
104 layer_uids: Optional list of layer UIDs to filter by.
106 Returns:
107 None. Cache directories are removed from the filesystem.
108 """
109 s = status(root, layer_uids=layer_uids, with_counts=False)
110 for e in s.entries:
111 _remove_dir(e.dirname)
113PIXEL_PNG8 = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x03\x00\x00\x00(\xcb4\xbb\x00\x00\x00\x06PLTE\xff\xff\xff\x00\x00\x00U\xc2\xd3~\x00\x00\x00\x01tRNS\x00@\xe6\xd8f\x00\x00\x00\x0cIDATx\xdab`\x00\x080\x00\x00\x02\x00\x01OmY\xe1\x00\x00\x00\x00IEND\xaeB`\x82'
115def seed(root: gws.Root, entries: list[Entry], levels: list[int]):
116 """Generate and populate the cache for specified layers and zoom levels.
118 Args:
119 root: Application root object.
120 entries: List of cache entries to seed.
121 levels: List of zoom levels to generate cache for.
123 Returns:
124 None. Cache is populated with generated tiles.
125 """
126 # https://mapproxy.github.io/mapproxy/latest/seed.html#seeds
127 seeds = {}
129 for e in entries:
130 cache_uid = e.uid
132 c = e.layers[0].cache or gws.Data()
133 max_age = c.get('maxAge') or DEFAULT_MAX_AGE
134 max_level = c.get('maxLevel') or DEFAULT_MAX_LEVEL
136 seeds[cache_uid] = dict(
137 caches=[cache_uid],
138 # grids=e.mpxCache['grids'],
139 levels=levels or range(max_level + 1),
140 refresh_before={
141 'time': datetimex.to_iso_string(datetimex.to_utc(datetimex.add(seconds=-max_age)), with_tz=''),
142 }
143 )
145 if not seeds:
146 gws.log.info('no layers to seed')
147 return
149 lock_path = gws.c.CONFIG_DIR + '/mapproxy.seed.lock'
151 with gws.lib.lock.SoftFileLock(lock_path) as ok:
152 if not ok:
153 gws.log.info('seeding already running')
154 return
156 mpx_config = gws.gis.mpx.config.create(root)
157 mpx_config_path = gws.c.CONFIG_DIR + '/mapproxy.seed.main.yml'
158 gws.u.write_file(mpx_config_path, yaml.dump(mpx_config))
160 seed_config_path = gws.c.CONFIG_DIR + '/mapproxy.seed.yml'
161 gws.u.write_file(seed_config_path, yaml.dump(dict(seeds=seeds)))
163 max_time = root.app.cfg('cache.seedingMaxTime', default=DEFAULT_MAX_TIME)
164 concurrency = root.app.cfg('cache.seedingConcurrency', default=DEFAULT_CONCURRENCY)
166 # monkeypatch mapproxy to simply store an empty image in case of error
167 empty_pixel_path = gws.c.CONFIG_DIR + '/mapproxy.seed.empty.png'
168 gws.u.write_file_b(empty_pixel_path, PIXEL_PNG8)
169 py = '/usr/local/lib/python3.10/dist-packages/mapproxy/client/http.py'
170 s = gws.u.read_file(py)
171 s = re.sub(r"raise HTTPClientError\('response is not an image.+", f'return ImageSource({empty_pixel_path!r})', s)
172 gws.u.write_file(py, s)
174 ts = gws.u.stime()
175 gws.log.info(f'START SEEDING jobs={len(seeds)} {max_time=} {concurrency=}')
176 gws.log.info(f'^C ANYTIME TO STOP...')
178 cmd = f'''
179 /usr/local/bin/mapproxy-seed
180 -f {mpx_config_path}
181 -c {concurrency}
182 {seed_config_path}
183 '''
184 res = False
185 try:
186 gws.lib.osx.run(cmd, echo=True, timeout=max_time or DEFAULT_MAX_TIME)
187 res = True
188 except gws.lib.osx.TimeoutError:
189 pass
190 except KeyboardInterrupt:
191 pass
193 try:
194 for p in gws.lib.osx.find_directories(gws.c.MAPPROXY_CACHE_DIR, deep=False):
195 gws.lib.osx.run(f'chown -R {gws.c.UID}:{gws.c.GID} {p}', echo=True)
196 except Exception as exc:
197 gws.log.error('failed to chown cache dir: {exc!r}')
199 gws.log.info(f'TIME: {gws.u.stime() - ts} sec')
200 gws.log.info(f'SEEDING COMPLETE' if res else 'SEEDING INCOMPLETE, PLEASE TRY AGAIN')
203def store_in_web_cache(url: str, img: bytes):
204 """Store an image in the web cache.
206 Args:
207 url: URL path to use as the cache key.
208 img: Binary image data to store.
210 Returns:
211 None. Image is stored in the cache.
212 """
213 path = gws.c.FASTCACHE_DIR + url
214 dirname = os.path.dirname(path)
215 tmp = dirname + '/' + gws.u.random_string(64)
216 try:
217 os.makedirs(dirname, 0o755, exist_ok=True)
218 gws.u.write_file_b(tmp, img)
219 os.rename(tmp, path)
220 except OSError:
221 gws.log.warning(f'store_in_web_cache FAILED path={path!r}')
224def _update_file_counts(entries: list[Entry]):
225 """Update cached tile counts for each entry.
227 Args:
228 entries: List of cache entries to update.
230 Returns:
231 None. The entries are updated in-place.
232 """
233 files = list(gws.lib.osx.find_files(gws.c.MAPPROXY_CACHE_DIR))
235 for path in files:
236 for e in entries:
237 if path.startswith(e.dirname):
238 # we use the mp layout all the way: zz/xxxx/xxxx/yyyy/yyyy.format
239 m = re.search(r'(\d+)/(\d+)/(\d+)/(\d+)/(\d+)\.png$', path)
240 z0, x1, x2, y1, y2 = m.groups()
241 x = int(x1) * 1000 + int(x2)
242 y = int(y1) * 1000 + int(y2)
243 z = int(z0)
244 g = e.grids.get(z)
245 if g:
246 g.cachedTiles += 1
249def _enum_entries(root: gws.Root, mpx_config: dict, layer_uids=None) -> list[Entry]:
250 """Enumerate cache entries based on layer configuration.
252 Args:
253 root: Application root object.
254 mpx_config: MapProxy configuration dictionary.
255 layer_uids: Optional list of layer UIDs to filter by.
257 Returns:
258 List of cache Entry objects.
259 """
260 entries_map: dict[str, Entry] = {}
262 for layer in root.find_all(gws.ext.object.layer):
264 if layer_uids and layer.uid not in layer_uids:
265 continue
267 for uid, mpx_cache in mpx_config['caches'].items():
268 if mpx_cache.get('disable_storage') or gws.u.get(layer, 'mpxCacheUid') != uid:
269 continue
271 if uid in entries_map:
272 entries_map[uid].layers.append(layer)
273 continue
275 mpx_grids = [mpx_config['grids'][guid] for guid in mpx_cache['grids']]
276 crs = mpx_grids[0]['srs'].replace(':', '')
278 e = Entry(
279 uid=uid,
280 layers=[layer],
281 mpxCache=mpx_cache,
282 grids={},
283 config={},
284 dirname=f'{gws.c.MAPPROXY_CACHE_DIR}/{uid}_{crs}',
285 )
287 for g in mpx_grids:
288 # see _calc_grids in mapproxy/grid.py
289 bbox = g['bbox']
290 w = bbox[2] - bbox[0]
291 h = bbox[3] - bbox[1]
292 ts = g['tile_size']
293 for z, res in enumerate(sorted(g['res'], reverse=True)):
294 maxx = max(math.ceil(w // res / ts[0]), 1)
295 maxy = max(math.ceil(h // res / ts[1]), 1)
296 e.grids[z] = Grid(
297 z=z,
298 res=res,
299 maxX=maxx,
300 maxY=maxy,
301 totalTiles=maxx * maxy,
302 cachedTiles=0,
303 )
305 entries_map[uid] = e
307 return list(entries_map.values())
310def _remove_dir(dirname: str):
311 """Remove a directory and its contents.
313 Args:
314 dirname: Path to the directory to remove.
316 Returns:
317 None. The directory is removed from the filesystem.
318 """
319 cmd = ['rm', '-fr', dirname]
320 gws.lib.osx.run(cmd, echo=True)
321 gws.log.info(f'removed {dirname}')