[reportlab-users] including existing PDF files

Dirk Holtwick holtwick at spirito.de
Thu Jan 19 06:04:26 EST 2006


Hi Joeri,

I have got the same problems. There is a commercial tool from Reportlab
called pageCatcher which does all you need.

But there are some other ways to do similar things:

1. Use the very new pyPDF: http://stompstompstomp.com/pyPdf/

2. Use pdfTK command line tool: http://www.accesspdf.com/pdftk/
   (My Python wrapper is attached to this mail)

3. Write a Python wrapper for iText ;-) http://www.lowagie.com/iText/

Dirk

Joeri van Ruth wrote:
> I would like to be able to include existing PDF files in my generated
> documents.  That is, I would like to generate a report and include
> existing documents as an appendix.
> 
> It is possible to do this using Reportlab?  I could not find a suitable
> operation (readPDF?) in the manual.  Did I miss anything?
> 
> The best I could come up with is writing the document into a file and
> then using external tools (pdf2ps, psmerge, ps2pdf) to merge them into
> a single document.
> 
> Does anyone on this list know a better way?
> 
> Joeri

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

spirito GmbH
http://www.spirito.de
-------------- next part --------------
# -*- coding: ISO-8859-1 -*-
#############################################
## (C)opyright by Dirk Holtwick, 2005      ##
## All rights reserved                     ##
#############################################

__version__ = "$Revision: 1.10 $"
__author__  = "$Author: holtwick $"
__date__    = "$Date: 2005/11/08 15:08:57 $"

import sys
import os
import os.path
import tempfile
import string
import popen2

_tempdir = None
_tempopen = 0
_tempcounter = 0

_debug = 0

class PDFTKFile:

    def __init__(self, suffix="pdf"):
        global _tempdir, _tempopen, _tempcounter
        if _tempopen <= 0:
            _tempdir = tempfile.mkdtemp()
        self.filename = os.path.join(
            _tempdir,
            "temp%09d.%s" % (_tempcounter, suffix))
        self.filehandle = None
        _tempopen += 1
        _tempcounter += 1

    def open(self, mode="wb"):
        self.filehandle = file(self.filename, mode)
        return self.filehandle

    def __str__(self):
        return self.filename

    def __del__(self):
        global _tempdir, _tempopen, _tempcounter
        if self.filehandle:
            try:
                self.filehandle.close()
            except Exception, e:
                pass
                #print e
        try:
            os.unlink(self.filename)
        except Exception, e:
            # print e
            pass
        _tempopen -= 1
        if _tempopen <= 0:
            try:
                os.rmdir(_tempdir)
            except Exception, e:
                # print e
                pass

class PDFTK:

    Printing = "Printing"
    DegradedPrinting = "DegradedPrinting"
    ModifyContents = "ModifyContents"
    Assembly = "Assembly"
    CopyContents = "CopyContents"
    ScreenReaders = "ScreenReaders"
    ModifyAnnotations = "ModifyAnnotations"
    FillIn = "FillIn"
    AllFeatures = "AllFeatures"

    Encrypt40Bit = 40
    Encrypt128Bit = 128

    def __init__(self,
                 path="",
                 compress=False,
                 user_pw="",
                 owner_pw="",
                 encrypt=0,
                 log=None
                 ):
        if not path:
            if sys.platform=="win32":
                if os.path.exists(r"c:\command\pdftk.exe"):
                    path = r"c:\command\pdftk.exe"
            else:
                if os.path.exists("/usr/bin/pdftk"):
                    path = "/usr/bin/pdftk"
                elif os.path.exists("/usr/local/bin/pdftk"):
                    path = "/usr/local/bin/pdftk"
        self._pdftkpath = path
        self._input = []
        self._output = ""
        self._compress = compress
        self._owner_pw = owner_pw
        self._user_pw = user_pw
        self._encrypt = encrypt
        self._error = ""
        self._allow = []
        self._icount = 0
        self._background = ""
        self._cat = ""
        self._tmp = []
        self._log = log

    def log(self, s):
        if self._log:
            print s
    
    def test(self):
        if os.path.exists(self._pdftkpath):
            return True
        return False

    def _popen(self, command, catch_output=1):
        err = out = ""
        self.log(command)
        if sys.platform=="win32":
            stdin, stdout, stderr = os.popen3(command, "b")
            stdin.close()
            out = stdout.read()
            err = stderr.read()           
        else:
            try:
                if catch_output:
                    stdin, stdout = os.popen2(command, "b")
                    stdin.close()
                    out = stdout.read()
                    stdout.close()
                else:
                    os.system(command)
            except:
                pass
        self.log("result size: %d" % len(out))
        self.log(err)
        return out, err

    def _build_cmd(self):
        cmd = self._pdftkpath
        cmd += " " + (" ".join(self._input))
        if self._background:
            cmd += " background " + self._background
        elif self._cat:
            cmd += " cat " + self._cat
        if self._output:
            cmd += " output " + self._output
        else:
            cmd += " output -"
        if self._encrypt == 40:
            cmd += " encrypt_40bit"
        if self._encrypt == 128:
            cmd += " encrypt_128bit"
        if self._owner_pw:
            cmd += " owner_pw " + self._owner_pw
        if self._user_pw:
            cmd += " user_pw " + self._user_pw
        if self._compress:
            cmd += " compress"
        else:
            cmd += " uncompress"
        cmd += " dont_ask"
        if _debug:
            print "SYSTEM:", cmd
        out, err = self._popen(cmd, not self._output)
        if err:
            raise Exception, cmd + "\n" + err
        return out

    def _file(self, f):
        # if types.FileType == type(f):
        if PDFTK==f.__class__:
            nf = PDFTKFile()
            self._tmp.append(nf)
            f.generate(str(nf))
            return str(nf)
        return f

    def set_input(self, f, password=""):
        self._input = []
        self._cat = ""
        name = self.add_input(f, password)
        self._cat = ""
        return name

    def add_input(self, f, password=""):
        name = string.uppercase[self._icount]
        self._icount += 1
        self._input.append("%s=%s" % (name, self._file(f)))
        if not self._cat:
            self._cat = "A B"
        else:
            self._cat += " " + name
        return name

    def set_allow(self):
        pass

    def set_compress(self, value=True):
        self._compress = value

    def set_owner_pw(self, value=""):
        self._owner_pw = value

    def set_user_pw(self, value=""):
        self._user_pw = value

    def set_background(self, f):
        self._background = self._file(f)

    def set_cat(self, cat):
        self._cat = cat

    set_watermark = set_background

    def generate(self, f=""):
        self._output = f
        return self._build_cmd()

def _test():
    t1 = PDFTKFile()
    t2 = PDFTKFile()

    print t2
    f = t1.open()
    f.write("test")
    f.flush()

    raw_input("wait for RETURN")

def _test_first_bg():
    o1 = PDFTK()
    o1.set_input("blank.pdf")
    o1.set_cat("A1")

    o2 = PDFTK()
    o2.set_input(o1)
    o2.set_background("organisationsplan.pdf")

    o3 = PDFTK()
    o3.set_input(o2)
    o3.add_input("blank.pdf")
    o3.set_cat("A B")

    o3.generate("test.pdf")
    # os.system("open test.pdf")
    os.system("start test.pdf")

if __name__=="__main__":
    _test_first_bg()
    #print PDFTK(path="c:\\command\\pdftk.exe").test()


More information about the reportlab-users mailing list