Coverage for gws-app/gws/lib/pdf/__init__.py: 70%

53 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-16 22:59 +0200

1"""PDF utilities.""" 

2 

3import pypdf 

4import gws.lib.mime 

5import gws.lib.osx 

6import gws.lib.image 

7 

8 

9def overlay(a_path: str, b_path: str, out_path: str) -> str: 

10 """Overlay two pdfs page-wise. 

11 

12 Args: 

13 a_path: Path to pdf a. 

14 b_path: Path to pdf b, which will be placed on top. 

15 out_path: Path to the output pdf. 

16 

17 Returns: 

18 Path to the output pdf. 

19 """ 

20 

21 fa = open(a_path, 'rb') 

22 fb = open(b_path, 'rb') 

23 

24 ra = pypdf.PdfReader(fa) 

25 rb = pypdf.PdfReader(fb) 

26 

27 w = pypdf.PdfWriter() 

28 

29 for n, page in enumerate(ra.pages): 

30 other = None 

31 try: 

32 other = rb.pages[n] 

33 except IndexError: 

34 pass 

35 if other: 

36 # https://github.com/py-pdf/pypdf/issues/2139 

37 page.transfer_rotation_to_content() 

38 page.merge_page(other) 

39 w.add_page(page) 

40 

41 with open(out_path, 'wb') as out_fp: 

42 w.write(out_fp) 

43 

44 fa.close() 

45 fb.close() 

46 

47 return out_path 

48 

49 

50def concat(paths: list[str], out_path: str) -> str: 

51 """Concatenate multiple pdfs into one. 

52 

53 Args: 

54 paths: Paths to the pdfs. 

55 out_path: Path to the output pdf. 

56 

57 Returns: 

58 Path to the concatenated pdf. 

59 """ 

60 

61 # only one path given - just return it 

62 if len(paths) == 1: 

63 return paths[0] 

64 

65 # NB: readers must be kept around until the writer is done 

66 

67 files = [open(p, 'rb') for p in paths] 

68 readers = [pypdf.PdfReader(fp) for fp in files] 

69 

70 w = pypdf.PdfWriter() 

71 

72 for r in readers: 

73 w.append_pages_from_reader(r) 

74 

75 with open(out_path, 'wb') as out_fp: 

76 w.write(out_fp) 

77 

78 for fp in files: 

79 fp.close() 

80 

81 return out_path 

82 

83 

84def page_count(path: str) -> int: 

85 """Returns the amount of pages for a given pdf. 

86 

87 Args: 

88 path: Path to the pdf. 

89 """ 

90 

91 with open(path, 'rb') as fp: 

92 r = pypdf.PdfReader(fp) 

93 return len(r.pages) 

94 

95 

96def to_image_path( 

97 in_path: str, 

98 out_path: str, 

99 size: gws.Size, 

100 mime: str = gws.lib.mime.PNG, 

101 page: int = 1, 

102) -> str: 

103 """Convert a pdf to an image. 

104 

105 Args: 

106 in_path: Path to the input pdf. 

107 out_path: Path to the output image. 

108 size: Size of the output image. 

109 mime: Mime type of the output image. Must be either PNG or JPEG. 

110 page: Page number to convert (1-indexed). Defaults to 1. 

111 

112 Returns: 

113 Path to the output image. 

114 """ 

115 

116 if mime == gws.lib.mime.PNG: 

117 device = 'png16m' 

118 elif mime == gws.lib.mime.JPEG: 

119 device = 'jpeg' 

120 else: 

121 raise ValueError(f'invalid mime type {mime!r}') 

122 

123 w, h = size 

124 cmd = [ 

125 'gs', 

126 '-q', 

127 f'-dNOPAUSE', 

128 f'-dBATCH', 

129 f'-dFirstPage={page}', 

130 f'-dLastPage={page}', 

131 f'-dDEVICEWIDTHPOINTS={w}', 

132 f'-dDEVICEHEIGHTPOINTS={h}', 

133 f'-dPDFFitPage=true', 

134 f'-sDEVICE={device}', 

135 f'-dTextAlphaBits=4', 

136 f'-dGraphicsAlphaBits=4', 

137 f'-sOutputFile={out_path}', 

138 f'{in_path}', 

139 ] 

140 

141 gws.log.debug(' '.join(cmd)) 

142 gws.lib.osx.run(cmd) 

143 

144 return out_path