[reportlab-users] TTF unicode support patch

Mon Apr 4 06:47:08 EDT 2005

Here the examples.  See my comment in "textobject.py"

andy at reportlab.com wrote:

> Can you send us a small, 'Hello World'-type script which
> says something in Thai, and also tell us what fonts or
> Acrobat reader plugins are needed to view it?  I would be
> happy to add this to the test suite immediately and to try
> out this patch.  We have to be very careful though, and
> make sure it does not affect any customer applications.
>
> Best Regards,
>
> Andy Robinson

-------------- next part --------------
A non-text attachment was scrubbed...
Name: hello.pdf
Type: application/pdf
Size: 11271 bytes
Desc: not available
Url : http://two.pairlist.net/pipermail/reportlab-users/attachments/20050404/33b40150/hello-0001.pdf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hello.gif
Type: image/gif
Size: 4062 bytes
Desc: not available
Url : http://two.pairlist.net/pipermail/reportlab-users/attachments/20050404/33b40150/hello-0001.gif
-------------- next part --------------
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/pdfgen/textobject.py
__version__=''' $Id: textobject.py 2395 2004-06-25 18:10:50Z rgbecker $ '''
__doc__="""
PDFTextObject is an efficient way to add text to a Canvas. Do not
instantiate directly, obtain one from the Canvas instead.

Progress Reports:
8.83, 2000-01-13, gmcm:
    created from pdfgen.py
"""

import string
from types import *
from reportlab.lib import colors
from reportlab.lib.colors import ColorType
from reportlab.lib.utils import fp_str
from reportlab.pdfbase import pdfmetrics

_SeqTypes=(TupleType,ListType)

class PDFTextObject:
    """PDF logically separates text and graphics drawing; text
    operations need to be bracketed between BT (Begin text) and
    ET operators. This class ensures text operations are
    properly encapusalted. Ask the canvas for a text object
    with beginText(x, y).  Do not construct one directly.
    Do not use multiple text objects in parallel; PDF is
    not multi-threaded!

    It keeps track of x and y coordinates relative to its origin."""

    def __init__(self, canvas, x=0,y=0):
        self._code = ['BT']    #no point in [] then append RGB
        self._canvas = canvas  #canvas sets this so it has access to size info
        self._fontname = self._canvas._fontname
        self._fontsize = self._canvas._fontsize
        self._leading = self._canvas._leading
        font = pdfmetrics.getFont(self._fontname)
        self._dynamicFont = getattr(font, '_dynamicFont', 0)
        self._curSubset = -1

        self.setTextOrigin(x, y)

    def getCode(self):
        "pack onto one line; used internally"
        self._code.append('ET')
        return string.join(self._code, ' ')

    def setTextOrigin(self, x, y):
        if self._canvas.bottomup:
            self._code.append('1 0 0 1 %s Tm' % fp_str(x, y)) #bottom up
        else:
            self._code.append('1 0 0 -1 %s Tm' % fp_str(x, y))  #top down

        # The current cursor position is at the text origin
        self._x0 = self._x = x
        self._y0 = self._y = y

    def setTextTransform(self, a, b, c, d, e, f):
        "Like setTextOrigin, but does rotation, scaling etc."
        if not self._canvas.bottomup:
            c = -c    #reverse bottom row of the 2D Transform
            d = -d
        self._code.append('%s Tm' % fp_str(a, b, c, d, e, f))

        # The current cursor position is at the text origin Note that
        # we aren't keeping track of all the transform on these
        # coordinates: they are relative to the rotations/sheers
        # defined in the matrix.
        self._x0 = self._x = e
        self._y0 = self._y = f

    def moveCursor(self, dx, dy):

        """Starts a new line at an offset dx,dy from the start of the
        current line. This does not move the cursor relative to the
        current position, and it changes the current offset of every
        future line drawn (i.e. if you next do a textLine() call, it
        will move the cursor to a position one line lower than the
        position specificied in this call.  """

        # Check if we have a previous move cursor call, and combine
        # them if possible.
        if self._code and self._code[-1][-3:]==' Td':
            L = string.split(self._code[-1])
            if len(L)==3:
                del self._code[-1]
            else:
                self._code[-1] = string.join(L[:-4])

            # Work out the last movement
            lastDx = float(L[-3])
            lastDy = float(L[-2])

            # Combine the two movement
            dx += lastDx
            dy -= lastDy

            # We will soon add the movement to the line origin, so if
            # we've already done this for lastDx, lastDy, remove it
            # first (so it will be right when added back again).
            self._x0 -= lastDx
            self._y0 -= lastDy

        # Output the move text cursor call.
        self._code.append('%s Td' % fp_str(dx, -dy))

        # Keep track of the new line offsets and the cursor position
        self._x0 += dx
        self._y0 += dy
        self._x = self._x0
        self._y = self._y0

    def setXPos(self, dx):
        """Starts a new line dx away from the start of the
        current line - NOT from the current point! So if
        you call it in mid-sentence, watch out."""
        self.moveCursor(dx,0)

    def getCursor(self):
        """Returns current text position relative to the last origin."""
        return (self._x, self._y)

    def getStartOfLine(self):
        """Returns a tuple giving the text position of the start of the
        current line."""
        return (self._x0, self._y0)

    def getX(self):
        """Returns current x position relative to the last origin."""
        return self._x

    def getY(self):
        """Returns current y position relative to the last origin."""
        return self._y

    def _setFont(self, psfontname, size):
        """Sets the font and fontSize
        Raises a readable exception if an illegal font
        is supplied.  Font names are case-sensitive! Keeps track
        of font anme and size for metrics."""
        self._fontname = psfontname
        self._fontsize = size
        font = pdfmetrics.getFont(self._fontname)
        self._dynamicFont = getattr(font, '_dynamicFont', 0)
        if self._dynamicFont:
            self._curSubset = -1
        else:
            pdffontname = self._canvas._doc.getInternalFontName(psfontname)
            self._code.append('%s %s Tf' % (pdffontname, fp_str(size)))

    def setFont(self, psfontname, size, leading = None):
        """Sets the font.  If leading not specified, defaults to 1.2 x
        font size. Raises a readable exception if an illegal font
        is supplied.  Font names are case-sensitive! Keeps track
        of font anme and size for metrics."""
        self._fontname = psfontname
        self._fontsize = size
        if leading is None:
            leading = size * 1.2
        self._leading = leading
        font = pdfmetrics.getFont(self._fontname)
        self._dynamicFont = getattr(font, '_dynamicFont', 0)
        if self._dynamicFont:
            self._curSubset = -1
        else:
            pdffontname = self._canvas._doc.getInternalFontName(psfontname)
            self._code.append('%s %s Tf %s TL' % (pdffontname, fp_str(size), fp_str(leading)))

    def setCharSpace(self, charSpace):
         """Adjusts inter-character spacing"""
         self._charSpace = charSpace
         self._code.append('%s Tc' % fp_str(charSpace))

    def setWordSpace(self, wordSpace):
        """Adjust inter-word spacing.  This can be used
        to flush-justify text - you get the width of the
        words, and add some space between them."""
        self._wordSpace = wordSpace
        self._code.append('%s Tw' % fp_str(wordSpace))

    def setHorizScale(self, horizScale):
        "Stretches text out horizontally"
        self._horizScale = 100 + horizScale
        self._code.append('%s Tz' % fp_str(horizScale))

    def setLeading(self, leading):
        "How far to move down at the end of a line."
        self._leading = leading
        self._code.append('%s TL' % fp_str(leading))

    def setTextRenderMode(self, mode):
        """Set the text rendering mode.

        0 = Fill text
        1 = Stroke text
        2 = Fill then stroke
        3 = Invisible
        4 = Fill text and add to clipping path
        5 = Stroke text and add to clipping path
        6 = Fill then stroke and add to clipping path
        7 = Add to clipping path"""

        assert mode in (0,1,2,3,4,5,6,7), "mode must be in (0,1,2,3,4,5,6,7)"
        self._textRenderMode = mode
        self._code.append('%d Tr' % mode)

    def setRise(self, rise):
        "Move text baseline up or down to allow superscrip/subscripts"
        self._rise = rise
        self._y = self._y - rise    # + ?  _textLineMatrix?
        self._code.append('%s Ts' % fp_str(rise))

    def setStrokeColorRGB(self, r, g, b):
        self._strokeColorRGB = (r, g, b)
        self._code.append('%s RG' % fp_str(r,g,b))

    def setFillColorRGB(self, r, g, b):
        self._fillColorRGB = (r, g, b)
        self._code.append('%s rg' % fp_str(r,g,b))

    def setFillColorCMYK(self, c, m, y, k):
        """Takes 4 arguments between 0.0 and 1.0"""
        self._fillColorCMYK = (c, m, y, k)
        self._code.append('%s k' % fp_str(c, m, y, k))

    def setStrokeColorCMYK(self, c, m, y, k):
        """Takes 4 arguments between 0.0 and 1.0"""
        self._strokeColorCMYK = (c, m, y, k)
        self._code.append('%s K' % fp_str(c, m, y, k))

    def setFillColor(self, aColor):
        """Takes a color object, allowing colors to be referred to by name"""
        if type(aColor) == ColorType:
            rgb = (aColor.red, aColor.green, aColor.blue)
            self._fillColorRGB = rgb
            self._code.append('%s rg' % fp_str(rgb) )
        elif type(aColor) in _SeqTypes:
            l = len(aColor)
            if l==3:
                self._fillColorRGB = aColor
                self._code.append('%s rg' % fp_str(aColor) )
            elif l==4:
                self.setFillColorCMYK(self, aColor[0], aColor[1], aColor[2], aColor[3])
            else:
                raise 'Unknown color', str(aColor)
        else:
            raise 'Unknown color', str(aColor)

    def setStrokeColor(self, aColor):
        """Takes a color object, allowing colors to be referred to by name"""
        if type(aColor) == ColorType:
            rgb = (aColor.red, aColor.green, aColor.blue)
            self._strokeColorRGB = rgb
            self._code.append('%s RG' % fp_str(rgb) )
        elif type(aColor) in _SeqTypes:
            l = len(aColor)
            if l==3:
                self._strokeColorRGB = aColor
                self._code.append('%s RG' % fp_str(aColor) )
            elif l==4:
                self.setStrokeColorCMYK(self, aColor[0], aColor[1], aColor[2], aColor[3])
            else:
                raise 'Unknown color', str(aColor)
        else:
            raise 'Unknown color', str(aColor)

    def setFillGray(self, gray):
        """Sets the gray level; 0.0=black, 1.0=white"""
        self._fillColorRGB = (gray, gray, gray)
        self._code.append('%s g' % fp_str(gray))

    def setStrokeGray(self, gray):
        """Sets the gray level; 0.0=black, 1.0=white"""
        self._strokeColorRGB = (gray, gray, gray)
        self._code.append('%s G' % fp_str(gray))

    ########################################################################################################################

    def _formatText(self, text):
        "Generates PDF text output operator(s)"
        if self._dynamicFont:
            #it's a truetype font and should be utf8.  If an error is raised,

            # Sakesun: if it's unicode, why not convert it to utf-8 here ?
            if isinstance(text, unicode): text = text.encode('utf-8')

            results = []
            font = pdfmetrics.getFont(self._fontname)
            try: #assume UTF8
                stuff = font.splitString(text, self._canvas._doc)
            except UnicodeDecodeError:

                # Sakesun: I don't like the idea of assuming latin1 right here.
                #   Python has a mechanism that allow users to set their own default-encoding
                #   through sitecustomize.py. Why shouldn't we try that first before give up
                #   to latin1 ?

                #assume latin1 as fallback
                #from reportlab.pdfbase.ttfonts import latin1_to_utf8
                #from reportlab.lib.logger import warnOnce
                #warnOnce('non-utf8 data fed to truetype font, assuming latin-1 data')
                #text = latin1_to_utf8(text)
                #stuff = font.splitString(text, self._canvas._doc)

                # Sakesun: Try default-encoding (if any), then convert it to utf-8.
                try:
                    # unicode(text) will convert the text to unicode with python's defaultencoding.
                    text = unicode(text).encode('utf-8')
                    stuff = font.splitString(text, self._canvas._doc)
                except UnicodeDecodeError:
                    # Sakesun: Ok. default-encoding is not working, then it's fine to try latin1.
                    from reportlab.pdfbase.ttfonts import latin1_to_utf8
                    from reportlab.lib.logger import warnOnce
                    warnOnce('non-utf8 data fed to truetype font, assuming latin-1 data')
                    text = latin1_to_utf8(text)
                    stuff = font.splitString(text, self._canvas._doc)

            for subset, chunk in stuff:
                if subset != self._curSubset:
                    pdffontname = font.getSubsetInternalName(subset, self._canvas._doc)
                    results.append("%s %s Tf %s TL" % (pdffontname, fp_str(self._fontsize), fp_str(self._leading)))
                    self._curSubset = subset
                chunk = self._canvas._escape(chunk)
                results.append("(%s) Tj" % chunk)
            return string.join(results, ' ')
        else:
            text = self._canvas._escape(text)
            return "(%s) Tj" % text

    ########################################################################################################################

    def _textOut(self, text, TStar=0):
        "prints string at current point, ignores text cursor"
        self._code.append('%s%s' % (self._formatText(text), (TStar and ' T*' or '')))

    def textOut(self, text):
        """prints string at current point, text cursor moves across."""
        self._x = self._x + self._canvas.stringWidth(text, self._fontname, self._fontsize)
        self._code.append(self._formatText(text))

    def textLine(self, text=''):
        """prints string at current point, text cursor moves down.
        Can work with no argument to simply move the cursor down."""

        # Update the coordinates of the cursor
        self._x = self._x0
        if self._canvas.bottomup:
            self._y = self._y - self._leading
        else:
            self._y = self._y + self._leading

        # Update the location of the start of the line
        # self._x0 is unchanged
        self._y0 = self._y

        # Output the text followed by a PDF newline command
        self._code.append('%s T*' % self._formatText(text))

    def textLines(self, stuff, trim=1):
        """prints multi-line or newlined strings, moving down.  One
        comon use is to quote a multi-line block in your Python code;
        since this may be indented, by default it trims whitespace
        off each line and from the beginning; set trim=0 to preserve
        whitespace."""
        if type(stuff) == StringType:
            lines = string.split(string.strip(stuff), '\n')
            if trim==1:
                lines = map(string.strip,lines)
        elif type(stuff) == ListType:
            lines = stuff
        elif type(stuff) == TupleType:
            lines = stuff
        else:
            assert 1==0, "argument to textlines must be string,, list or tuple"

        # Output each line one at a time. This used to be a long-hand
        # copy of the textLine code, now called as a method.
        for line in lines:
            self.textLine(line)

    def __nonzero__(self):
        'PDFTextObject is true if it has something done after the init'
        return self._code != ['BT']
-------------- next part --------------
#!/usr/bin/env python
# -*- coding: cp874 -*-

from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

THAI_HELLO = u"\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35\u0e04\u0e23\u0e31\u0e1a'"

pdfmetrics.registerFont(TTFont('Tahoma', 'Tahoma.ttf'))

def hello(c):
    c.setFont('Tahoma', 30)
    c.drawString(100,500, '"%s" is "Hello"' % THAI_HELLO)

c = canvas.Canvas("hello.pdf")
hello(c)
c.showPage()
c.save()