Coverage for gws-app/gws/gis/cache/core.py: 30%

157 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-16 22:59 +0200

1"""Cache management.""" 

2 

3import math 

4import os 

5import re 

6 

7import yaml 

8 

9import gws 

10import gws.config 

11import gws.gis.mpx.config 

12import gws.lib.osx 

13import gws.lib.lock 

14import gws.lib.datetimex as datetimex 

15 

16DEFAULT_MAX_TIME = 600 

17DEFAULT_CONCURRENCY = 1 

18DEFAULT_MAX_AGE = 7 * 24 * 3600 

19DEFAULT_MAX_LEVEL = 3 

20 

21 

22class Config(gws.Config): 

23 """Global cache options""" 

24 

25 seedingMaxTime: gws.Duration = '600' 

26 """Max. time for a seeding job.""" 

27 seedingConcurrency: int = 1 

28 """Number of concurrent seeding jobs.""" 

29 

30 

31class Grid(gws.Data): 

32 uid: str 

33 z: int 

34 res: float 

35 maxX: int 

36 maxY: int 

37 totalTiles: int 

38 cachedTiles: int 

39 

40 

41class Entry(gws.Data): 

42 uid: str 

43 layers: list[gws.Layer] 

44 mpxCache: dict 

45 grids: dict[int, Grid] 

46 config: dict 

47 counts: dict 

48 dirname: str 

49 

50 

51class Status(gws.Data): 

52 entries: list[Entry] 

53 staleDirs: list[str] 

54 

55 

56def status(root: gws.Root, layer_uids=None, with_counts=True) -> Status: 

57 """Retrieve cache status information. 

58 

59 Args: 

60 root: Application root object. 

61 layer_uids: Optional list of layer UIDs to filter by. 

62 with_counts: Whether to include tile counts in the status. 

63 

64 Returns: 

65 Status object containing cache entries and stale directories. 

66 """ 

67 mpx_config = gws.gis.mpx.config.create(root) 

68 

69 entries = [] 

70 if mpx_config: 

71 entries = _enum_entries(root, mpx_config, layer_uids) 

72 

73 if entries and with_counts: 

74 _update_file_counts(entries) 

75 

76 all_dirs = list(gws.lib.osx.find_directories(gws.c.MAPPROXY_CACHE_DIR, deep=False)) 

77 valid_dirs = set(e.dirname for e in entries) 

78 

79 return Status( 

80 entries=entries, 

81 staleDirs=[d for d in all_dirs if d not in valid_dirs], 

82 ) 

83 

84 

85def cleanup(root: gws.Root): 

86 """Remove stale cache directories. 

87 

88 Args: 

89 root: Application root object. 

90 

91 Returns: 

92 None. Stale directories are removed from the filesystem. 

93 """ 

94 s = status(root, with_counts=False) 

95 for d in s.staleDirs: 

96 _remove_dir(d) 

97 

98 

99def drop(root: gws.Root, layer_uids=None): 

100 """Remove active cache directories. 

101 

102 Args: 

103 root: Application root object. 

104 layer_uids: Optional list of layer UIDs to filter by. 

105 

106 Returns: 

107 None. Cache directories are removed from the filesystem. 

108 """ 

109 s = status(root, layer_uids=layer_uids, with_counts=False) 

110 for e in s.entries: 

111 _remove_dir(e.dirname) 

112 

113PIXEL_PNG8 = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x03\x00\x00\x00(\xcb4\xbb\x00\x00\x00\x06PLTE\xff\xff\xff\x00\x00\x00U\xc2\xd3~\x00\x00\x00\x01tRNS\x00@\xe6\xd8f\x00\x00\x00\x0cIDATx\xdab`\x00\x080\x00\x00\x02\x00\x01OmY\xe1\x00\x00\x00\x00IEND\xaeB`\x82' 

114 

115def seed(root: gws.Root, entries: list[Entry], levels: list[int]): 

116 """Generate and populate the cache for specified layers and zoom levels. 

117 

118 Args: 

119 root: Application root object. 

120 entries: List of cache entries to seed. 

121 levels: List of zoom levels to generate cache for. 

122 

123 Returns: 

124 None. Cache is populated with generated tiles. 

125 """ 

126 # https://mapproxy.github.io/mapproxy/latest/seed.html#seeds 

127 seeds = {} 

128 

129 for e in entries: 

130 cache_uid = e.uid 

131 

132 c = e.layers[0].cache or gws.Data() 

133 max_age = c.get('maxAge') or DEFAULT_MAX_AGE 

134 max_level = c.get('maxLevel') or DEFAULT_MAX_LEVEL 

135 

136 seeds[cache_uid] = dict( 

137 caches=[cache_uid], 

138 # grids=e.mpxCache['grids'], 

139 levels=levels or range(max_level + 1), 

140 refresh_before={ 

141 'time': datetimex.to_iso_string(datetimex.to_utc(datetimex.add(seconds=-max_age)), with_tz=''), 

142 } 

143 ) 

144 

145 if not seeds: 

146 gws.log.info('no layers to seed') 

147 return 

148 

149 lock_path = gws.c.CONFIG_DIR + '/mapproxy.seed.lock' 

150 

151 with gws.lib.lock.SoftFileLock(lock_path) as ok: 

152 if not ok: 

153 gws.log.info('seeding already running') 

154 return 

155 

156 mpx_config = gws.gis.mpx.config.create(root) 

157 mpx_config_path = gws.c.CONFIG_DIR + '/mapproxy.seed.main.yml' 

158 gws.u.write_file(mpx_config_path, yaml.dump(mpx_config)) 

159 

160 seed_config_path = gws.c.CONFIG_DIR + '/mapproxy.seed.yml' 

161 gws.u.write_file(seed_config_path, yaml.dump(dict(seeds=seeds))) 

162 

163 max_time = root.app.cfg('cache.seedingMaxTime', default=DEFAULT_MAX_TIME) 

164 concurrency = root.app.cfg('cache.seedingConcurrency', default=DEFAULT_CONCURRENCY) 

165 

166 # monkeypatch mapproxy to simply store an empty image in case of error 

167 empty_pixel_path = gws.c.CONFIG_DIR + '/mapproxy.seed.empty.png' 

168 gws.u.write_file_b(empty_pixel_path, PIXEL_PNG8) 

169 py = '/usr/local/lib/python3.10/dist-packages/mapproxy/client/http.py' 

170 s = gws.u.read_file(py) 

171 s = re.sub(r"raise HTTPClientError\('response is not an image.+", f'return ImageSource({empty_pixel_path!r})', s) 

172 gws.u.write_file(py, s) 

173 

174 ts = gws.u.stime() 

175 gws.log.info(f'START SEEDING jobs={len(seeds)} {max_time=} {concurrency=}') 

176 gws.log.info(f'^C ANYTIME TO STOP...') 

177 

178 cmd = f''' 

179 /usr/local/bin/mapproxy-seed 

180 -f {mpx_config_path} 

181 -c {concurrency} 

182 {seed_config_path} 

183 ''' 

184 res = False 

185 try: 

186 gws.lib.osx.run(cmd, echo=True, timeout=max_time or DEFAULT_MAX_TIME) 

187 res = True 

188 except gws.lib.osx.TimeoutError: 

189 pass 

190 except KeyboardInterrupt: 

191 pass 

192 

193 try: 

194 for p in gws.lib.osx.find_directories(gws.c.MAPPROXY_CACHE_DIR, deep=False): 

195 gws.lib.osx.run(f'chown -R {gws.c.UID}:{gws.c.GID} {p}', echo=True) 

196 except Exception as exc: 

197 gws.log.error('failed to chown cache dir: {exc!r}') 

198 

199 gws.log.info(f'TIME: {gws.u.stime() - ts} sec') 

200 gws.log.info(f'SEEDING COMPLETE' if res else 'SEEDING INCOMPLETE, PLEASE TRY AGAIN') 

201 

202 

203def store_in_web_cache(url: str, img: bytes): 

204 """Store an image in the web cache. 

205 

206 Args: 

207 url: URL path to use as the cache key. 

208 img: Binary image data to store. 

209 

210 Returns: 

211 None. Image is stored in the cache. 

212 """ 

213 path = gws.c.FASTCACHE_DIR + url 

214 dirname = os.path.dirname(path) 

215 tmp = dirname + '/' + gws.u.random_string(64) 

216 try: 

217 os.makedirs(dirname, 0o755, exist_ok=True) 

218 gws.u.write_file_b(tmp, img) 

219 os.rename(tmp, path) 

220 except OSError: 

221 gws.log.warning(f'store_in_web_cache FAILED path={path!r}') 

222 

223 

224def _update_file_counts(entries: list[Entry]): 

225 """Update cached tile counts for each entry. 

226 

227 Args: 

228 entries: List of cache entries to update. 

229 

230 Returns: 

231 None. The entries are updated in-place. 

232 """ 

233 files = list(gws.lib.osx.find_files(gws.c.MAPPROXY_CACHE_DIR)) 

234 

235 for path in files: 

236 for e in entries: 

237 if path.startswith(e.dirname): 

238 # we use the mp layout all the way: zz/xxxx/xxxx/yyyy/yyyy.format 

239 m = re.search(r'(\d+)/(\d+)/(\d+)/(\d+)/(\d+)\.png$', path) 

240 z0, x1, x2, y1, y2 = m.groups() 

241 x = int(x1) * 1000 + int(x2) 

242 y = int(y1) * 1000 + int(y2) 

243 z = int(z0) 

244 g = e.grids.get(z) 

245 if g: 

246 g.cachedTiles += 1 

247 

248 

249def _enum_entries(root: gws.Root, mpx_config: dict, layer_uids=None) -> list[Entry]: 

250 """Enumerate cache entries based on layer configuration. 

251 

252 Args: 

253 root: Application root object. 

254 mpx_config: MapProxy configuration dictionary. 

255 layer_uids: Optional list of layer UIDs to filter by. 

256 

257 Returns: 

258 List of cache Entry objects. 

259 """ 

260 entries_map: dict[str, Entry] = {} 

261 

262 for layer in root.find_all(gws.ext.object.layer): 

263 

264 if layer_uids and layer.uid not in layer_uids: 

265 continue 

266 

267 for uid, mpx_cache in mpx_config['caches'].items(): 

268 if mpx_cache.get('disable_storage') or gws.u.get(layer, 'mpxCacheUid') != uid: 

269 continue 

270 

271 if uid in entries_map: 

272 entries_map[uid].layers.append(layer) 

273 continue 

274 

275 mpx_grids = [mpx_config['grids'][guid] for guid in mpx_cache['grids']] 

276 crs = mpx_grids[0]['srs'].replace(':', '') 

277 

278 e = Entry( 

279 uid=uid, 

280 layers=[layer], 

281 mpxCache=mpx_cache, 

282 grids={}, 

283 config={}, 

284 dirname=f'{gws.c.MAPPROXY_CACHE_DIR}/{uid}_{crs}', 

285 ) 

286 

287 for g in mpx_grids: 

288 # see _calc_grids in mapproxy/grid.py 

289 bbox = g['bbox'] 

290 w = bbox[2] - bbox[0] 

291 h = bbox[3] - bbox[1] 

292 ts = g['tile_size'] 

293 for z, res in enumerate(sorted(g['res'], reverse=True)): 

294 maxx = max(math.ceil(w // res / ts[0]), 1) 

295 maxy = max(math.ceil(h // res / ts[1]), 1) 

296 e.grids[z] = Grid( 

297 z=z, 

298 res=res, 

299 maxX=maxx, 

300 maxY=maxy, 

301 totalTiles=maxx * maxy, 

302 cachedTiles=0, 

303 ) 

304 

305 entries_map[uid] = e 

306 

307 return list(entries_map.values()) 

308 

309 

310def _remove_dir(dirname: str): 

311 """Remove a directory and its contents. 

312 

313 Args: 

314 dirname: Path to the directory to remove. 

315 

316 Returns: 

317 None. The directory is removed from the filesystem. 

318 """ 

319 cmd = ['rm', '-fr', dirname] 

320 gws.lib.osx.run(cmd, echo=True) 

321 gws.log.info(f'removed {dirname}')