[reportlab-users] TrueType fonts and text justification -- solved

Albertas Agejevas alga at pov.lt
Thu Jun 9 13:14:54 EDT 2005


On Tue, May 31, 2005 at 10:20:27AM +0100, Robin Becker wrote:
> Robin Becker wrote:
> >Marius Gedminas wrote:
> >....Thanks Marius, seems like a good solution. However, I cannot get the 
> >patch to apply to standard test_pdfbase_encodings, but it might apply to 
> >the so called version2 branch.
> >
> >Since we've left the CVS repository behind the CVS version 1.8 isn't 
> >much use.
> 
> I am seeing this error with the patched python21-stable branch version

Sorry, here is the correct patch.

Albertas
-------------- next part --------------
This is a patch works for both python21-stable branch of ReportLab and
for the trunk.

It adds the first 128 Unicode codepoints to the first subset of each
TrueType font.  This is needed for word spacing PDF attribute to work,
as the space glyph must have a code 32.  The additional bonus of this
patch is that all ASCII characters in output are readable.

Also, the 32nd codepoint in each subsequent subset is mapped to a space glyph,
so we don't get random characters receive word spacing after them.

Index: test/test_pdfbase_ttfonts.py
===================================================================
--- test/test_pdfbase_ttfonts.py	(revision 2605)
+++ test/test_pdfbase_ttfonts.py	(working copy)
@@ -254,18 +254,34 @@
         "Tests TTFont.splitString"
         doc = PDFDocument()
         font = TTFont("TestFont", "luxiserif.ttf")
-        text = string.join(map(utf8, range(0, 512)), "")
+        text = string.join(map(utf8, range(0, 511)), "")
         allchars = string.join(map(chr, range(0, 256)), "")
-        chunks = [(0, allchars), (1, allchars)]
+        nospace = allchars[:32] + allchars[33:]
+        chunks = [(0, allchars), (1, nospace)]
         self.assertEquals(font.splitString(text, doc), chunks)
         # Do it twice
         self.assertEquals(font.splitString(text, doc), chunks)
 
-        text = string.join(map(utf8, range(511, -1, -1)), "")
+        text = string.join(map(utf8, range(510, -1, -1)), "")
         allchars = string.join(map(chr, range(255, -1, -1)), "")
-        chunks = [(1, allchars), (0, allchars)]
+        nospace = allchars[:223] + allchars[224:]
+        chunks = [(1, nospace), (0, allchars)]
         self.assertEquals(font.splitString(text, doc), chunks)
 
+    def testSplitStringSpaces(self):
+        # In order for justification (word spacing) to work, the space
+        # glyph must have a code 32, and no other character should have
+        # that code in any subset, or word spacing will be applied to it.
+
+        doc = PDFDocument()
+        font = TTFont("TestFont", "luxiserif.ttf")
+        text = string.join(map(utf8, range(512, -1, -1)), "")
+        chunks = font.splitString(text, doc)
+        state = font.state[doc]
+        self.assertEquals(state.assignments[32], 32)
+        self.assertEquals(state.subsets[0][32], 32)
+        self.assertEquals(state.subsets[1][32], 32)
+
     def testSubsetInternalName(self):
         "Tests TTFont.getSubsetInternalName"
         doc = PDFDocument()
@@ -305,10 +321,16 @@
         doc1 = PDFDocument()
         doc2 = PDFDocument()
         font = TTFont("TestFont", "luxiserif.ttf")
-        self.assertEquals(font.splitString('ab', doc1), [(0, '\0\1')])
-        self.assertEquals(font.splitString('b', doc2), [(0, '\0')])
+        self.assertEquals(font.splitString(u'hello ', doc1), [(0, 'hello ')])
+        self.assertEquals(font.splitString(u'hello ', doc2), [(0, 'hello ')])
+        self.assertEquals(
+            font.splitString(u'\u0410\u0411'.encode('UTF-8'), doc1),
+            [(0, '\x80\x81')])
+        self.assertEquals(font.splitString(u'\u0412'.encode('UTF-8'), doc2),
+                          [(0, '\x80')])
         font.addObjects(doc1)
-        self.assertEquals(font.splitString('c', doc2), [(0, '\1')])
+        self.assertEquals(font.splitString(u'\u0413'.encode('UTF-8'), doc2),
+                          [(0, '\x81')])
         font.addObjects(doc2)
 
     def testAddObjects(self):
@@ -324,8 +346,8 @@
         self.assertEquals(pdfFont.Name, internalName)
         self.assertEquals(pdfFont.BaseFont, "SUBSET+LuxiSerif+0")
         self.assertEquals(pdfFont.FirstChar, 0)
-        self.assertEquals(pdfFont.LastChar, 0)
-        self.assertEquals(len(pdfFont.Widths.sequence), 1)
+        self.assertEquals(pdfFont.LastChar, 127)
+        self.assertEquals(len(pdfFont.Widths.sequence), 128)
         toUnicode = doc.idToObject[pdfFont.ToUnicode.name]
         self.assert_(toUnicode.content != "")
         fontDescriptor = doc.idToObject[pdfFont.FontDescriptor.name]
Index: pdfbase/ttfonts.py
===================================================================
--- pdfbase/ttfonts.py	(revision 2605)
+++ pdfbase/ttfonts.py	(working copy)
@@ -935,10 +935,19 @@
         def __init__(self):
             self.assignments = {}
             self.nextCode = 0
-            self.subsets = []
+            self.subsets = [[]]
             self.internalName = None
             self.frozen = 0
 
+            # Let's add the first 128 unicodes to the 0th subset, so ' '
+            # always has code 32 (for word spacing to work) and the ASCII
+            # output is readable
+            for self.nextCode in range(128):
+                n = self.nextCode
+                self.assignments[n] = n
+                self.subsets[0].append(n)
+            self.nextCode = self.nextCode + 1
+
     def __init__(self, name, filename, validate=0):
         """Loads a TrueType font from filename.
 
@@ -976,6 +985,10 @@
             else:
                 if state.frozen:
                     raise pdfdoc.PDFError, "Font %s is already frozen, cannot add new character U+%04X" % (self.fontName, code)
+                if state.nextCode & 0xFF == 32:
+                    # make code 32 always be a space character
+                    state.subsets[state.nextCode >> 8].append(32)
+                    state.nextCode = state.nextCode + 1
                 n = state.nextCode
                 state.nextCode = state.nextCode + 1
                 state.assignments[code] = n


More information about the reportlab-users mailing list