[reportlab-users] new pyRXP
Stuart Bishop
reportlab-users@reportlab.com
Fri, 11 Apr 2003 07:28:26 +1000
--Apple-Mail-4-835946544
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
format=flowed
On Thursday, April 10, 2003, at 09:56 PM, Robin Becker wrote:
>>>> import pyRXPU
>>>> pyRXPU.Parser()(open('001.xml','r').read())
> Traceback (most recent call last):
> File "<interactive input>", line 1, in ?
> Error: Error: EOE in comment in entity "e" defined at line 2 char 1 of
> file:///C:/Python/reportlab/rl_addons/pyRXP/test/sb/001.ent
> in unnamed entity at line 3 char 4 of
> file:///C:/Python/reportlab/rl_ad
> dons/pyRXP/test/sb/001.ent
> EOE in commentParse Failed!
>>>>
That is the correct output (it is not well formed XML). However, when I
run it under OS X it reports malloc errors.
> I certainly don't see any crashes though. I'm using Python 2.2 under
> win32.
OSX just spews out lots of warnings, and segfaults if the test is run
with
the -v flag:
.*** malloc[12626]: Deallocation of a pointer not malloced: 0x7478e2;
This could be a double free(), or free() called with the middle of an
allocated block; Try setting environment variable MallocHelp to see
tools to help debug
Redhat 7.2 seg faults.
The Win32 malloc routines might be more forgiving.
Here is the stack trace from OS X. I may be able to trace this through
with a debugger later today:
#0 0x900042e0 in free_list_remove_ptr ()
#1 0x90003f60 in szone_free ()
#2 0x00620750 in Fclose (file=0x720150) at build/_pyRXPU/stdio16U.c:427
#3 0x0061ec60 in SourceClose (source=0x7234e0) at
build/_pyRXPU/inputU.c:130
#4 0x00618240 in FreeParser (p=0x4568f0) at
build/_pyRXPU/xmlparserU.c:657
#5 0x00624a14 in pyRXPParser_parse (xself=0x39b010, args=0x62e410,
kw=0x7234e0) at build/_pyRXPU/pyRXPU.c:769
Here is a version of pyRXP.c that causes everything to use Python's
memory routines instead of malloc/free. To use it you need to remove
the reference to 'system.c' in setup.py:
--Apple-Mail-4-835946544
Content-Disposition: attachment;
filename=test_xmltestsuite.py
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
x-unix-mode=0644;
name="test_xmltestsuite.py"
#!/usr/bin/env python
'''
$Id: test_xmltestsuite.py,v 1.1 2003/02/08 16:35:39 zen Exp $
Test parsing and validation against James Clark's test cases,
as downloaded from http://www.jclark.com/xml/
The .zip file should be in the same directory as this script.
Note that the .zip file can be freely distributed in unmodified form
so it could be added to the pyRXP distribution.
'''
__rcs_id__ = '$Id: test_xmltestsuite.py,v 1.1 2003/02/08 16:35:39 zen Exp $'
__version__ = '$Revision: 1.1 $'[11:-2]
__author__ = 'Stuart Bishop <stuart@stuartbishop.net>'
debug = 0
import unittest
import zipfile
import sys
import os
import os.path
import codecs
from glob import glob
sys.path.extend(glob('../build/*'))
print >> sys.stderr,'Path is %r' % sys.path
import pyRXPU
# Debug is to help me trace down memory bugs
if debug: import time
# 2.2 compatibility - sort of
try:
__file__
except NameError:
__file__ = os.path.join(os.getcwd(),'oops')
class test_pyRXPU(unittest.TestCase):
mod = pyRXPU
def parse(self,filename,**kw):
if debug: print >> sys.stderr,'About to parse %s' % filename
kw = kw.copy()
kw['ReturnComments'] = 1
kw['ExpandEmpty'] = 1
kw['ReturnProcessingInstructions'] = 1
parser = self.mod.Parser(**kw)
# Change directory in case we are loading entities from cwd
retdir = os.getcwd()
d,n = os.path.split(filename)
os.chdir(d)
try:
f = open(n)
xml = f.read()
return parser.parse(xml)
finally:
try:
f.close()
except:
pass
os.chdir(retdir)
if debug: print >> sys.stderr,'Done parsing %s' % filename
if debug: print >> sys.stderr,'='*60
if debug: time.sleep(1)
def getcanonical(self,filename):
''' Parse in the named file, and return it as canonical XML '''
return self._getcan(self.parse(filename))
def _getcan(self,node):
if type(node) in (type(''),type(u'')):
#if node.startswith(u'<?') or node.startswith(u'<!'):
# return node
#else:
return self._quote(node)
tag,attrs,kids,junk = node
if tag == self.mod.commentTagName:
return u'<!--%s-->' % (kids[0])
elif tag == self.mod.piTagName:
return u'<?%s %s?>' % (attrs['name'],kids[0])
if attrs is None:
attrs = ''
else:
keys = attrs.keys()
keys.sort() # Attributes in lexical order
attrs = ' '.join(
['%s="%s"' % (k,self._quote(attrs[k])) for k in keys]
)
if attrs:
attrs = ' ' + attrs
text = ''.join([self._getcan(kid) for kid in kids])
return '<%s%s>%s</%s>' % (tag,attrs,text,tag)
def _quote(self,txt):
txt = txt.replace('&','&')
txt = txt.replace('<','<')
txt = txt.replace('>','>')
txt = txt.replace('"','"')
txt = txt.replace('\x09','	')
txt = txt.replace('\x0a',' ')
txt = txt.replace('\x0d',' ')
return txt
def _test_valid(self,inname,outname):
inxml = self.getcanonical(inname)
f = codecs.open(outname,mode='r',encoding='utf8')
outxml = f.read()
f.close()
self.assertEqual(inxml,outxml)
def _test_invalid_parse(self,inname):
try:
self.parse(inname,Validate=0)
except self.mod.error,x:
self.fail('Failed to parse %r in non-validating mode' % inname)
def _test_invalid_validate(self,inname):
try:
self.parse(inname,Validate=1)
self.fail('Failed to detect validity error in %r' % inname)
except self.mod.error:
pass
def _test_notwf(self,inname):
try:
self.parse(inname,Validate=0)
self.fail(
'Failed to detect that %r was not well formed' % inname
)
except self.mod.error:
pass
def buildup_test(cls=test_pyRXPU):
''' Add test methods to the TestCase '''
cls.valid = []
cls.invalid = []
cls.notwf = []
testdir = os.path.dirname(__file__)
zipf = zipfile.ZipFile(os.path.join(testdir,'xmltest.zip'))
for zipname in zipf.namelist():
# Extract the files if they don't alrady exist
osname = os.path.join(*zipname.split('/')) # For non-unixes
osname = os.path.join(testdir,osname)
dir = os.path.dirname(osname)
if not os.path.isdir(dir):
os.makedirs(dir)
if not os.path.isfile(osname):
f = open(osname,'wb')
f.write(zipf.read(zipname))
f.close()
# Add input files to our lists
if os.path.splitext(osname)[1] == '.xml' and zipname.find('out') == -1:
if zipname.find('invalid') != -1:
cls.invalid.append(osname)
elif zipname.find('not-wf') != -1:
cls.notwf.append(osname)
elif zipname.find('valid') != -1:
outname = os.path.join(dir,'out',os.path.basename(osname))
cls.valid.append( (osname,outname) )
# Add 'valid' tests
for inname,outname in cls.valid:
num = int(os.path.splitext(os.path.basename(inname))[0])
dir = os.path.split(os.path.split(inname)[0])[1]
mname = 'test_Valid_%s_%03d' % (dir,num)
def doTest(self,inname=inname,outname=outname):
self._test_valid(inname,outname)
setattr(cls,mname,doTest)
# Add 'invalid' tests
for inname in cls.invalid:
num = int(os.path.splitext(os.path.basename(inname))[0])
mname = 'test_InvalidParse_%03d' % (num)
def doTest(self,inname=inname):
self._test_invalid_parse(inname)
setattr(cls,mname,doTest)
mname = 'test_InvalidValidate_%03d' % (num)
def doTest(self,inname=inname):
self._test_invalid_validate(inname)
setattr(cls,mname,doTest)
# Add 'not wellformed' tests
for inname in cls.notwf:
num = int(os.path.splitext(os.path.basename(inname))[0])
dir = os.path.split(os.path.split(inname)[0])[1]
mname = 'test_NotWellFormed_%s_%03d' % (dir,num)
def doTest(self,inname=inname):
self._test_notwf(inname)
setattr(cls,mname,doTest)
buildup_test()
if __name__ == '__main__':
if debug: raw_input('Enter to start')
unittest.main()
if debug: raw_input('Enter to end')
--Apple-Mail-4-835946544
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
format=flowed
The Python docs say this is a good idea as it helps Python manage memory
better. I have no idea if this causes any speed or memory hits.
--
Stuart Bishop <zen@shangri-la.dropbear.id.au>
http://shangri-la.dropbear.id.au/
--Apple-Mail-4-835946544--