From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Jacob Boerema Date: Thu, 4 Feb 2021 15:31:12 -0500 Subject: [PATCH 1/3] tools: start migration to python3 by running 2to3 (cherry picked from commit c5079e972d62858eead44c9029d276d456daf90d) --- tools/xml2po.py | 14 ++++++------ tools/xml2po/__init__.py | 40 +++++++++++++++++----------------- tools/xml2po/modes/docbook.py | 20 ++++++++--------- tools/xml2po/modes/gimphelp.py | 14 ++++++------ 4 files changed, 44 insertions(+), 44 deletions(-) diff --git a/tools/xml2po.py b/tools/xml2po.py index 037e61f27..b3e110dc3 100755 --- a/tools/xml2po.py +++ b/tools/xml2po.py @@ -41,9 +41,9 @@ NULL_STRING = '/dev/null' if not os.path.exists('/dev/null'): NULL_STRING = 'NUL' def usage (with_help = False): - print >> sys.stderr, "Usage: %s [OPTIONS] [XMLFILE]..." % (sys.argv[0]) + print("Usage: %s [OPTIONS] [XMLFILE]..." % (sys.argv[0]), file=sys.stderr) if with_help: - print >> sys.stderr, """ + print(""" OPTIONS may be some of: -a --automatic-tags Automatically decides if tags are to be considered "final" or not @@ -72,7 +72,7 @@ EXAMPLES: using -p option for each XML file: %(command)s -p de.po chapter1.xml > chapter1.de.xml %(command)s -p de.po chapter2.xml > chapter2.de.xml -""" % {'command': sys.argv[0]} +""" % {'command': sys.argv[0]}, file=sys.stderr) def main(argv): @@ -148,7 +148,7 @@ def main(argv): sys.exit(0) if operation == 'update' and output != "-": - print >> sys.stderr, "Option '-o' is not yet supported when updating translations directly. Ignoring this option." + print("Option '-o' is not yet supported when updating translations directly. Ignoring this option.", file=sys.stderr) # Treat remaining arguments as XML files filenames = [] @@ -158,16 +158,16 @@ def main(argv): try: xml2po_main = Main(default_mode, operation, output, options) except IOError: - print >> sys.stderr, "Error: cannot open file %s for writing." % (output) + print("Error: cannot open file %s for writing." % (output), file=sys.stderr) sys.exit(5) if operation == 'merge': if len(filenames) > 1: - print >> sys.stderr, "Error: You can merge translations with only one XML file at a time." + print("Error: You can merge translations with only one XML file at a time.", file=sys.stderr) sys.exit(2) if not mofile: - print >> sys.stderr, "Error: You must specify MO file when merging translations." + print("Error: You must specify MO file when merging translations.", file=sys.stderr) sys.exit(3) xml2po_main.merge(mofile, filenames[0]) diff --git a/tools/xml2po/__init__.py b/tools/xml2po/__init__.py index eec197947..7a07eb226 100644 --- a/tools/xml2po/__init__.py +++ b/tools/xml2po/__init__.py @@ -86,14 +86,14 @@ class MessageOutput: self.messages.append(t) if spacepreserve: self.nowrap[t] = True - if t in self.linenos.keys(): + if t in list(self.linenos.keys()): self.linenos[t].append((self.filename, tag, lineno)) else: self.linenos[t] = [ (self.filename, tag, lineno) ] if (not self.do_translations) and comment and not t in self.comments: self.comments[t] = comment else: - if t in self.linenos.keys(): + if t in list(self.linenos.keys()): self.linenos[t].append((self.filename, tag, lineno)) else: self.linenos[t] = [ (self.filename, tag, lineno) ] @@ -166,7 +166,7 @@ class XMLDocument(object): elif node.isText(): if node.isBlankNode(): if self.app.options.get('expand_entities') or \ - (not (node.prev and not node.prev.isBlankNode() and node.next and not node.next.isBlankNode()) ): + (not (node.prev and not node.prev.isBlankNode() and node.__next__ and not node.next.isBlankNode()) ): #print >>sys.stderr, "BLANK" node.setContent('') else: @@ -176,7 +176,7 @@ class XMLDocument(object): child = node.children while child: self.normalizeNode(child) - child = child.next + child = child.__next__ def normalizeString(self, text, spacepreserve = False): """Normalizes string to be used as key for gettext lookup. @@ -200,7 +200,7 @@ class XMLDocument(object): tree = ctxt.doc() newnode = tree.getRootElement() except: - print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text) + print("""Error while normalizing string as XML:\n"%s"\n""" % (text), file=sys.stderr) return text self.normalizeNode(newnode) @@ -209,7 +209,7 @@ class XMLDocument(object): child = newnode.children while child: result += child.serialize('utf-8') - child = child.next + child = child.__next__ result = re.sub('^ ','', result) result = re.sub(' $','', result) @@ -235,7 +235,7 @@ class XMLDocument(object): ctxt.parseDocument() tree = ctxt.doc() if next: - newnode = tree.children.next + newnode = tree.children.__next__ else: newnode = tree.children @@ -243,7 +243,7 @@ class XMLDocument(object): child = newnode.children while child: result += child.serialize('utf-8') - child = child.next + child = child.__next__ tree.freeDoc() return result @@ -262,7 +262,7 @@ class XMLDocument(object): result += child.content.decode('utf-8') else: result += self.myAttributeSerialize(child) - child = child.next + child = child.__next__ else: result = node.serialize('utf-8') return result @@ -338,7 +338,7 @@ class XMLDocument(object): pass if not newnode: - print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8')) + print("""Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8')), file=sys.stderr) return newelem = newnode.getRootElement() @@ -346,13 +346,13 @@ class XMLDocument(object): if newelem and newelem.children: free = node.children while free: - next = free.next + next = free.__next__ free.unlinkNode() free = next if node: copy = newelem.copyNodeList() - next = node.next + next = node.__next__ node.replaceNode(newelem.copyNodeList()) node.next = next @@ -377,7 +377,7 @@ class XMLDocument(object): if child.isText() and child.content.strip() != '': return True else: - child = child.next + child = child.__next__ return False @@ -441,7 +441,7 @@ class XMLDocument(object): outtxt += '<%s>%s' % (starttag, content, endtag) else: outtxt += self.doSerialize(child) - child = child.next + child = child.__next__ if self.app.operation == 'merge': norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node)) @@ -521,7 +521,7 @@ class XMLDocument(object): outtxt = '' while child: outtxt += self.doSerialize(child) - child = child.next + child = child.__next__ return outtxt def xml_error_handler(arg, ctxt): @@ -565,7 +565,7 @@ class Main(object): try: doc = XMLDocument(xmlfile, self) except Exception as e: - print >> sys.stderr, "Unable to parse XML file '%s': %s" % (xmlfile, str(e)) + print("Unable to parse XML file '%s': %s" % (xmlfile, str(e)), file=sys.stderr) sys.exit(1) self.current_mode.preProcessXml(doc.doc, self.msg) doc.generate_messages() @@ -578,13 +578,13 @@ class Main(object): try: doc = XMLDocument(xmlfile, self) except Exception as e: - print >> sys.stderr, str(e) + print(str(e), file=sys.stderr) sys.exit(1) try: mfile = open(mofile, "rb") except: - print >> sys.stderr, "Can't open MO file '%s'." % (mofile) + print("Can't open MO file '%s'." % (mofile), file=sys.stderr) self.gt = gettext.GNUTranslations(mfile) self.gt.add_fallback(NoneTranslations()) # Has preProcessXml use cases for merge? @@ -607,7 +607,7 @@ class Main(object): try: doc = XMLDocument(xmlfile, self) except Exception as e: - print >> sys.stderr, str(e) + print(str(e), file=sys.stderr) sys.exit(1) doc.generate_messages() @@ -615,7 +615,7 @@ class Main(object): try: doc = XMLDocument(origxml, self) except Exception as e: - print >> sys.stderr, str(e) + print(str(e), file=sys.stderr) sys.exit(1) doc.generate_messages() self.output_po() diff --git a/tools/xml2po/modes/docbook.py b/tools/xml2po/modes/docbook.py index 276a9d9bb..5658ffc1a 100644 --- a/tools/xml2po/modes/docbook.py +++ b/tools/xml2po/modes/docbook.py @@ -43,7 +43,7 @@ try: except ImportError: from md5 import new as md5_new -from basic import basicXmlMode +from .basic import basicXmlMode class docbookXmlMode(basicXmlMode): """Class for special handling of DocBook document types. @@ -95,7 +95,7 @@ class docbookXmlMode(basicXmlMode): ret = self._find_articleinfo(child) if ret: return ret - child = child.next + child = child.__next__ return None def _find_lastcopyright(self, node): @@ -131,7 +131,7 @@ class docbookXmlMode(basicXmlMode): hash = self._md5_for_file(fullpath) else: hash = "THIS FILE DOESN'T EXIST" - print >>sys.stderr, "Warning: image file '%s' not found." % fullpath + print("Warning: image file '%s' not found." % fullpath, file=sys.stderr) msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(), "When image changes, this message will be marked fuzzy or untranslated for you.\n"+ @@ -140,7 +140,7 @@ class docbookXmlMode(basicXmlMode): child = node.children while child: self._output_images(child,msg) - child = child.next + child = child.__next__ def preProcessXml(self, doc, msg): @@ -157,7 +157,7 @@ class docbookXmlMode(basicXmlMode): root = doc.getRootElement() # DocBook documents can be something other than article, handle that as well in the future while root and root.name != 'article' and root.name != 'book': - root = root.next + root = root.__next__ if root and (root.name == 'article' or root.name == 'book'): root.setProp('lang', language) else: @@ -198,10 +198,10 @@ class docbookXmlMode(basicXmlMode): # Perform some tests when ran standalone if __name__ == '__main__': test = docbookXmlMode() - print "Ignored tags : " + repr(test.getIgnoredTags()) - print "Final tags : " + repr(test.getFinalTags()) - print "Space-preserve tags: " + repr(test.getSpacePreserveTags()) + print("Ignored tags : " + repr(test.getIgnoredTags())) + print("Final tags : " + repr(test.getFinalTags())) + print("Space-preserve tags: " + repr(test.getSpacePreserveTags())) - print "Credits from string: '%s'" % test.getStringForTranslators() - print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators() + print("Credits from string: '%s'" % test.getStringForTranslators()) + print("Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()) diff --git a/tools/xml2po/modes/gimphelp.py b/tools/xml2po/modes/gimphelp.py index 24d4154e6..d72f518ee 100644 --- a/tools/xml2po/modes/gimphelp.py +++ b/tools/xml2po/modes/gimphelp.py @@ -31,7 +31,7 @@ try: except ImportError: from md5 import new as md5_new -from docbook import docbookXmlMode +from .docbook import docbookXmlMode class gimphelpXmlMode(docbookXmlMode): """Class for special handling of gimp-help DocBook document types. @@ -81,7 +81,7 @@ class gimphelpXmlMode(docbookXmlMode): child = node.children while child: self._output_images(child,msg) - child = child.next + child = child.__next__ def preProcessXml(self, doc, msg): """Add additional messages of interest here.""" @@ -91,10 +91,10 @@ class gimphelpXmlMode(docbookXmlMode): # Perform some tests when ran standalone if __name__ == '__main__': test = gimphelpXmlMode() - print "Ignored tags : " + repr(test.getIgnoredTags()) - print "Final tags : " + repr(test.getFinalTags()) - print "Space-preserve tags: " + repr(test.getSpacePreserveTags()) + print("Ignored tags : " + repr(test.getIgnoredTags())) + print("Final tags : " + repr(test.getFinalTags())) + print("Space-preserve tags: " + repr(test.getSpacePreserveTags())) - print "Credits from string: '%s'" % test.getStringForTranslators() - print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators() + print("Credits from string: '%s'" % test.getStringForTranslators()) + print("Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()) -- 2.29.2 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Jacob Boerema Date: Mon, 29 Mar 2021 17:51:48 -0400 Subject: [PATCH 2/3] tools: complete port of xml2po to python3 Also: - Let it work correctly on Windows by setting encoding for stdout and stderr to utf-8. - Fix setting correct timezone in po files. - Better error handling. - Detect errors in tags in the translated xml text. (cherry picked from commit 299cfb7150857f0045ae42978d8ed917b4b0e4ff) --- tools/xml2po.py | 19 +++- tools/xml2po/__init__.py | 189 +++++++++++++++++++++++++-------- tools/xml2po/modes/gimphelp.py | 2 +- 3 files changed, 163 insertions(+), 47 deletions(-) diff --git a/tools/xml2po.py b/tools/xml2po.py index b3e110dc3..ef8d725da 100755 --- a/tools/xml2po.py +++ b/tools/xml2po.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*- encoding: utf-8 -*- # Copyright (c) 2004, 2005, 2006 Danilo Ĺ egan . # Copyright (c) 2009 Claude Paroz . @@ -21,7 +21,7 @@ # # xml2po -- translate XML documents -VERSION = "0.18.0 (patched by GIMP Documentation Team)" +VERSION = "0.19.0 (patched by GIMP Documentation Team)" # Versioning system (I use this for a long time, so lets explain it to # those Linux-versioning-scheme addicts): @@ -37,11 +37,13 @@ import os import getopt import tempfile +DEBUG_VERBOSITY = 0 + NULL_STRING = '/dev/null' if not os.path.exists('/dev/null'): NULL_STRING = 'NUL' def usage (with_help = False): - print("Usage: %s [OPTIONS] [XMLFILE]..." % (sys.argv[0]), file=sys.stderr) + print(f"Usage: {sys.argv[0]} [OPTIONS] [XMLFILE]...", file=sys.stderr) if with_help: print(""" OPTIONS may be some of: @@ -86,6 +88,11 @@ def main(argv): from xml2po import Main + + # Make sure stdout and stderr output utf-8 even on Windows where it's not the default + sys.stdout = open(sys.stdout.fileno(), 'w', encoding='utf-8', closefd=False) + sys.stderr = open(sys.stderr.fileno(), 'w', encoding='utf-8', closefd=False) + # Default parameters default_mode = 'docbook' operation = 'pot' # 'pot', 'merge', 'update' @@ -136,6 +143,8 @@ def main(argv): operation = 'merge' if 'translationlanguage' not in options: options['translationlanguage'] = os.path.split(os.path.splitext(pofile)[0])[1] + if DEBUG_VERBOSITY > 0: + print(f"Converting {pofile} to {mofile_tmppath} using msgfmt") os.system("msgfmt -o %s %s >%s" % (mofile_tmppath, pofile, NULL_STRING)) and sys.exit(7) mofile = mofile_tmppath elif opt in ('-o', '--output'): @@ -170,6 +179,10 @@ def main(argv): print("Error: You must specify MO file when merging translations.", file=sys.stderr) sys.exit(3) + if DEBUG_VERBOSITY > 0: + print(f"Merge mo file {mofile} with {filenames[0]}") + if pofile: + xml2po_main.pofile = pofile xml2po_main.merge(mofile, filenames[0]) elif operation == 'update': diff --git a/tools/xml2po/__init__.py b/tools/xml2po/__init__.py index 7a07eb226..42fa81f39 100644 --- a/tools/xml2po/__init__.py +++ b/tools/xml2po/__init__.py @@ -26,6 +26,7 @@ import tempfile import gettext import libxml2 +DEBUG_VERBOSITY = 0 NULL_STRING = '/dev/null' if not os.path.exists('/dev/null'): NULL_STRING = 'NUL' @@ -86,14 +87,14 @@ class MessageOutput: self.messages.append(t) if spacepreserve: self.nowrap[t] = True - if t in list(self.linenos.keys()): + if t in self.linenos.keys(): self.linenos[t].append((self.filename, tag, lineno)) else: self.linenos[t] = [ (self.filename, tag, lineno) ] if (not self.do_translations) and comment and not t in self.comments: self.comments[t] = comment else: - if t in list(self.linenos.keys()): + if t in self.linenos.keys(): self.linenos[t].append((self.filename, tag, lineno)) else: self.linenos[t] = [ (self.filename, tag, lineno) ] @@ -101,7 +102,11 @@ class MessageOutput: self.comments[t] = comment def outputHeader(self, out): - import time + from datetime import datetime + # Using time.strftime was not working correctly for me: instead of a + # timezone offset a timezone name was added. This fixes it. + dt = datetime.now() + tz = dt.astimezone().tzinfo out.write("""msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\\n" @@ -113,7 +118,7 @@ msgstr "" "Content-Type: text/plain; charset=UTF-8\\n" "Content-Transfer-Encoding: 8bit\\n" -""" % (time.strftime("%Y-%m-%d %H:%M%z"))) +""" % (dt.astimezone(tz).strftime("%Y-%m-%d %H:%M%z"))) def outputAll(self, out): self.outputHeader(out) @@ -138,6 +143,7 @@ msgstr "" class XMLDocument(object): def __init__(self, filename, app): + self.filename = filename self.app = app self.expand_entities = self.app.options.get('expand_entities') self.ignored_tags = self.app.current_mode.getIgnoredTags() @@ -145,7 +151,13 @@ class XMLDocument(object): ctxt.lineNumbers(1) if self.app.options.get('expand_all_entities'): ctxt.replaceEntities(1) - ctxt.parseDocument() + + try: + ctxt.parseDocument() + except Exception as e: + print("Error parsing XML file '%s': %s" % (filename, str(e)), file=sys.stderr) + sys.exit(1) + self.doc = ctxt.doc() if self.doc.name != filename: raise Exception("Error: I tried to open '%s' but got '%s' -- how did that happen?" % (filename, self.doc.name)) @@ -166,22 +178,23 @@ class XMLDocument(object): elif node.isText(): if node.isBlankNode(): if self.app.options.get('expand_entities') or \ - (not (node.prev and not node.prev.isBlankNode() and node.__next__ and not node.next.isBlankNode()) ): - #print >>sys.stderr, "BLANK" + (not (node.prev and not node.prev.isBlankNode() and node.next and not node.next.isBlankNode()) ): node.setContent('') else: - node.setContent(re.sub('\s+',' ', node.content)) + node.setContent(re.sub(r'\s+',' ', node.content)) elif node.children and node.type == 'element': child = node.children while child: + nextchild = child.next self.normalizeNode(child) - child = child.__next__ + child = nextchild def normalizeString(self, text, spacepreserve = False): """Normalizes string to be used as key for gettext lookup. Removes all unnecessary whitespace.""" + mytext = text if spacepreserve: return text try: @@ -203,13 +216,20 @@ class XMLDocument(object): print("""Error while normalizing string as XML:\n"%s"\n""" % (text), file=sys.stderr) return text + # Not sure if saving the doc here is really necessary. It was one of the + # things done in debugging and don't want to spend time now to check if + # we can remove it. + save_doc = self.doc + self.doc = ctxt.doc() self.normalizeNode(newnode) + self.doc = save_doc result = '' child = newnode.children while child: + nextchild = child.next result += child.serialize('utf-8') - child = child.__next__ + child = nextchild result = re.sub('^ ','', result) result = re.sub(' $','', result) @@ -235,15 +255,16 @@ class XMLDocument(object): ctxt.parseDocument() tree = ctxt.doc() if next: - newnode = tree.children.__next__ + newnode = tree.children.next else: newnode = tree.children result = '' child = newnode.children while child: + nextchild = child.next result += child.serialize('utf-8') - child = child.__next__ + child = nextchild tree.freeDoc() return result @@ -252,6 +273,7 @@ class XMLDocument(object): result = '' if node.children: child = node.children + nextchild = child.next while child: if child.type=='text': result += self.doc.encodeEntitiesReentrant(child.content) @@ -262,7 +284,7 @@ class XMLDocument(object): result += child.content.decode('utf-8') else: result += self.myAttributeSerialize(child) - child = child.__next__ + child = nextchild else: result = node.serialize('utf-8') return result @@ -308,11 +330,81 @@ class XMLDocument(object): return None def replaceAttributeContentsWithText(self, node, text): - node.setContent(text) + try: + node.setContent(text.decode('utf-8')) + except TypeError: + sys.stderr.write("--> replaceAttributeContentsWithText: Failed to decode text to utf-8.") + sys.exit(1) + + def CheckMatchedTags(self, text): + stack = [] + textblock = text + + log=sys.stdout + + # It might be even better to do the below with regex, see e.g. + # https://datadependence.com/2016/03/find-unclosed-tags-using-stacks/ + # However I'm not sure it really matters that much since the text + # blocks usually are fairly small and most don't have a lot of tags. + start_tag = textblock.find('<') + while start_tag > -1: + textblock = textblock[start_tag+1:] + end_tag = textblock.find('>') + if end_tag > -1: + # Found left and right brackets: grab tag + tag = textblock[: end_tag] + # Check that it's not a tag that closes itself and comment tags starting with -1: + tag = tag[: space] + + open_tag = (len(tag) > 0 and tag[0] != '/') + if open_tag: + # Add tag to stack + stack.append(tag) + else: + tag = tag[1:] + if len(stack) == 0: + pass + else: + if stack[-1] == tag: + # Close the block + stack.pop() + else: + print(f"\n========================", file=log) + print(f"Source xml: {self.filename}", file=log) + print(f"Source po : {self.app.pofile}", file=log) + print(f"Translated msgstr:\n{text}\n", file=log) + print(f"WARNING: Found closing tag [{tag}], however we expected [{stack[0]}].", file=log) + print(f"Remaining tags: {str(stack)}", file=log) + if tag in stack: + stack.remove(tag) + print(" Assuming incorrect tag order, found and removed tag from the stack", file=log) + print(f"========================\n", file=log) + textblock = textblock[end_tag+1:] + start_tag = textblock.find('<') + else: + start_tag = -1 + + + if len(stack): + print(f"\n========================", file=log) + print(f"Source xml: {self.filename}", file=log) + print(f"Source po : {self.app.pofile}", file=log) + print(f"ERROR: Found unmatched tags in po msgstr:\n{text}\n", file=log) + print(f"Tags not matched: {str(stack)}", file=log) + print(f"========================\n", file=log) + return False + return True def replaceNodeContentsWithText(self, node, text): """Replaces all subnodes of a node with contents of text treated as XML.""" + if not self.CheckMatchedTags(text): + return + if node.children: starttag = self.startTagForNode(node) endtag = self.endTagForNode(node) @@ -326,7 +418,7 @@ class XMLDocument(object): pass content = '<%s>%s' % (starttag, text, endtag) - tmp = tmp + content.encode('utf-8') + tmp = tmp + content newnode = None try: @@ -338,7 +430,9 @@ class XMLDocument(object): pass if not newnode: - print("""Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8')), file=sys.stderr) + print(f"\n--> Error parsing translation as XML:\n{text}") + # See: https://gitlab.gnome.org/GNOME/libxml2/-/issues/64 + print("--> Note: this might be caused by a bug in libxml2.\n") return newelem = newnode.getRootElement() @@ -346,15 +440,14 @@ class XMLDocument(object): if newelem and newelem.children: free = node.children while free: - next = free.__next__ + nextchild = free.next free.unlinkNode() - free = next + free = nextchild if node: - copy = newelem.copyNodeList() - next = node.__next__ + nextnode = node.next node.replaceNode(newelem.copyNodeList()) - node.next = next + node.__next__ = nextnode else: # In practice, this happens with tags such as " " (only whitespace in between) @@ -374,10 +467,11 @@ class XMLDocument(object): return True child = node.children while child: + nextchild = child.next if child.isText() and child.content.strip() != '': return True else: - child = child.__next__ + child = nextchild return False @@ -432,6 +526,10 @@ class XMLDocument(object): child = node.children while child: + # Although I do not know why, child or child.next gets changed inside the if part below. + # This makes child.next fail when it shouldn't. That's why we store nextchild here + # before going into the if and use that at the end of the loop + nextchild = child.next if (self.isFinalNode(child)) or (child.type == 'element' and self.worthOutputting(child)): myrepl.append(self.processElementTag(child, myrepl, True)) outtxt += '' % (len(myrepl)) @@ -441,20 +539,20 @@ class XMLDocument(object): outtxt += '<%s>%s' % (starttag, content, endtag) else: outtxt += self.doSerialize(child) - child = child.__next__ + child = nextchild if self.app.operation == 'merge': norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node)) translation = self.app.getTranslation(norm_outtxt) else: - translation = outtxt.decode('utf-8') + translation = outtxt starttag = self.startTagForNode(node) endtag = self.endTagForNode(node) worth = self.worthOutputting(node) if not translation: - translation = outtxt.decode('utf-8') + translation = outtxt if worth and self.app.options.get('mark_untranslated'): node.setLang('C') @@ -463,7 +561,7 @@ class XMLDocument(object): # repl[0] may contain translated attributes with # non-ASCII chars, so implicit conversion to may fail replacement = '<%s>%s' % \ - (repl[0].decode('utf-8'), repl[3], repl[2]) + (repl[0], repl[3], repl[2]) translation = translation.replace('' % (i+1), replacement) if worth: @@ -479,7 +577,7 @@ class XMLDocument(object): def isExternalGeneralParsedEntity(self, node): try: # it would be nice if debugDumpNode could use StringIO, but it apparently cannot - tmp = tempfile.TemporaryFile() + tmp = tempfile.TemporaryFile(encoding='utf-8') node.debugDumpNode(tmp,0) tmp.seek(0) tmpstr = tmp.read() @@ -507,25 +605,31 @@ class XMLDocument(object): if self.isExternalGeneralParsedEntity(node): return node.serialize('utf-8') else: - return self.stringForEntity(node) #content #content #serialize("utf-8") - elif node.type == 'entity_decl': + return self.stringForEntity(node) + elif node.type == 'entity_decl --> serialize': return node.serialize('utf-8') #'<%s>%s' % (startTagForNode(node), node.content, node.name) elif node.type == 'text': - return node.serialize('utf-8') + nodetext = node.serialize('utf-8') + return nodetext elif node.type == 'element': repl = [] (starttag, content, endtag, translation) = self.processElementTag(node, repl, True) - return '<%s>%s' % (starttag, content, endtag) + return '<%s>%s' % (starttag, content.encode('utf-8'), endtag) else: child = node.children outtxt = '' while child: + # Not sure if the same problem with using next.child happens here too + # but we will use nextchild here too just to be sure + nextchild = child.next outtxt += self.doSerialize(child) - child = child.__next__ + child = nextchild return outtxt -def xml_error_handler(arg, ctxt): +def xml_error_handler(ctxt, error): #deactivate error messages from the validation + if DEBUG_VERBOSITY > 0: + print(f"--> xml_error_handler: {error}") pass class Main(object): @@ -538,11 +642,11 @@ class Main(object): self.current_mode = self.load_mode(mode)() # Prepare output if operation == 'update': - self.out = tempfile.TemporaryFile() + self.out = tempfile.TemporaryFile(encoding='utf-8') elif output == '-': self.out = sys.stdout else: - self.out = file(output, 'w') + self.out = open(output, 'w', encoding='utf-8', buffering=1) def load_mode(self, modename): try: @@ -565,7 +669,7 @@ class Main(object): try: doc = XMLDocument(xmlfile, self) except Exception as e: - print("Unable to parse XML file '%s': %s" % (xmlfile, str(e)), file=sys.stderr) + print("Error parsing XML file '%s': %s" % (xmlfile, str(e)), file=sys.stderr) sys.exit(1) self.current_mode.preProcessXml(doc.doc, self.msg) doc.generate_messages() @@ -578,13 +682,13 @@ class Main(object): try: doc = XMLDocument(xmlfile, self) except Exception as e: - print(str(e), file=sys.stderr) + print("Error parsing XML file '%s': %s" % (xmlfile, str(e)), file=sys.stderr) sys.exit(1) - try: mfile = open(mofile, "rb") except: - print("Can't open MO file '%s'." % (mofile), file=sys.stderr) + print("Error opening MO file '%s': %s." % (mofile, str(e)), file=sys.stderr) + sys.exit(1) self.gt = gettext.GNUTranslations(mfile) self.gt.add_fallback(NoneTranslations()) # Has preProcessXml use cases for merge? @@ -607,7 +711,7 @@ class Main(object): try: doc = XMLDocument(xmlfile, self) except Exception as e: - print(str(e), file=sys.stderr) + print("Error parsing XML file '%s': %s" % (xmlfile, str(e)), file=sys.stderr) sys.exit(1) doc.generate_messages() @@ -615,7 +719,7 @@ class Main(object): try: doc = XMLDocument(origxml, self) except Exception as e: - print(str(e), file=sys.stderr) + print("Error parsing XML file '%s': %s" % (origxml, str(e)), file=sys.stderr) sys.exit(1) doc.generate_messages() self.output_po() @@ -646,11 +750,10 @@ class Main(object): text should be a string to look for. """ - #print >>sys.stderr,"getTranslation('%s')" % (text.encode('utf-8')) if not text or text.strip() == '': return text if self.gt: - res = self.gt.ugettext(text.decode('utf-8')) + res = self.gt.gettext(text) return res return text diff --git a/tools/xml2po/modes/gimphelp.py b/tools/xml2po/modes/gimphelp.py index d72f518ee..0e7e31c73 100644 --- a/tools/xml2po/modes/gimphelp.py +++ b/tools/xml2po/modes/gimphelp.py @@ -81,7 +81,7 @@ class gimphelpXmlMode(docbookXmlMode): child = node.children while child: self._output_images(child,msg) - child = child.__next__ + child = child.next def preProcessXml(self, doc, msg): """Add additional messages of interest here.""" -- 2.29.2 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Jacob Boerema Date: Mon, 29 Mar 2021 17:55:35 -0400 Subject: [PATCH 3/3] tools: also update docbook.py to python3. (cherry picked from commit b2d8952713245cbec4fef5b613b984549ca2b2fd) --- tools/xml2po/modes/docbook.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/xml2po/modes/docbook.py b/tools/xml2po/modes/docbook.py index 5658ffc1a..ed395080e 100644 --- a/tools/xml2po/modes/docbook.py +++ b/tools/xml2po/modes/docbook.py @@ -95,7 +95,7 @@ class docbookXmlMode(basicXmlMode): ret = self._find_articleinfo(child) if ret: return ret - child = child.__next__ + child = child.next return None def _find_lastcopyright(self, node): @@ -140,7 +140,7 @@ class docbookXmlMode(basicXmlMode): child = node.children while child: self._output_images(child,msg) - child = child.__next__ + child = child.next def preProcessXml(self, doc, msg): @@ -157,7 +157,7 @@ class docbookXmlMode(basicXmlMode): root = doc.getRootElement() # DocBook documents can be something other than article, handle that as well in the future while root and root.name != 'article' and root.name != 'book': - root = root.__next__ + root = root.next if root and (root.name == 'article' or root.name == 'book'): root.setProp('lang', language) else: @@ -184,7 +184,7 @@ class docbookXmlMode(basicXmlMode): else: ai.addChild(copy) if match.group(3): - copy.newChild(None, "year", match.group(3).encode('utf-8')) + copy.newChild(None, "year", match.group(3)) if match.group(1) and match.group(2): holder = match.group(1)+"(%s)" % match.group(2) elif match.group(1): @@ -193,7 +193,7 @@ class docbookXmlMode(basicXmlMode): holder = match.group(2) else: holder = "???" - copy.newChild(None, "holder", holder.encode('utf-8')) + copy.newChild(None, "holder", holder) # Perform some tests when ran standalone if __name__ == '__main__': -- 2.31.1