Merge pull request #209961 from barryfm/fix/dictd-python3-update

Fix/dictd python3 update
2024-11-17 07:13:23 +01:00 · 2023-01-11 09:34:28 +00:00 · 2023-01-11 09:34:28 +00:00 · 2cab4c5d8b
commit 2cab4c5d8b
parent 2cc918c113 9287d37501
5 changed files with 75 additions and 78 deletions
--- a/pkgs/servers/dict/dictd-wordnet.nix
+++ b/pkgs/servers/dict/dictd-wordnet.nix
@ -1,10 +1,10 @@
-{lib, stdenv, python2, wordnet, writeScript}:
+{lib, stdenv, python3, wordnet, writeScript}:

 stdenv.mkDerivation rec {
  version = "542";
  pname = "dict-db-wordnet";

-  buildInputs = [python2 wordnet];
+  buildInputs = [python3 wordnet];
  convert = ./wordnet_structures.py;

  builder = writeScript "builder.sh" ''
--- a/pkgs/servers/dict/wiktionary/default.nix
+++ b/pkgs/servers/dict/wiktionary/default.nix
@ -1,4 +1,4 @@
-{ lib, stdenv, fetchurl, python2, dict, glibcLocales }:
+{ lib, stdenv, fetchurl, python3, dict, glibcLocales }:

 stdenv.mkDerivation rec {
  pname = "dict-db-wiktionary";
@ -9,8 +9,7 @@ stdenv.mkDerivation rec {
    sha256 = "qsha26LL2513SDtriE/0zdPX1zlnpzk1KKk+R9dSdew=";
  };

-  # script in nixpkgs does not support python2
-  nativeBuildInputs = [ python2 dict glibcLocales ];
+  nativeBuildInputs = [ python3 dict glibcLocales ];

  dontUnpack = true;

@ -18,7 +17,7 @@ stdenv.mkDerivation rec {
    mkdir -p $out/share/dictd/
    cd $out/share/dictd

-    ${python2.interpreter} -O ${./wiktionary2dict.py} "${src}"
+    ${python3.interpreter} -O ${./wiktionary2dict.py} "${src}"
    dictzip wiktionary-en.dict
    echo en_US.UTF-8 > locale
  '';
--- a/pkgs/servers/dict/wiktionary/latest_version.py
+++ b/pkgs/servers/dict/wiktionary/latest_version.py
@ -25,18 +25,18 @@ def nix_prefetch_url(url, algo='sha256'):
    """Prefetches the content of the given URL."""
    print(f'nix-prefetch-url {url}')
    out = subprocess.check_output(['nix-prefetch-url', '--type', algo, url])
-    return out.decode('utf-8').rstrip()
+    return out.rstrip()


 current_version = subprocess.check_output([
    'nix', 'eval', '--raw',
    '-f', dirname(abspath(__file__)) + '/../../../..',
    'dictdDBs.wiktionary.version',
-]).decode('utf-8')
+])

 parser = WiktionaryLatestVersionParser(current_version)

 with urlopen('https://dumps.wikimedia.org/enwiktionary/') as resp:
-    parser.feed(resp.read().decode('utf-8'))
+    parser.feed(resp.read())

 print(parser.latest_version)
--- a/pkgs/servers/dict/wiktionary/wiktionary2dict.py
+++ b/pkgs/servers/dict/wiktionary/wiktionary2dict.py
@ -2,7 +2,6 @@
 # Based on code from wiktiondict by Greg Hewgill
 import re
 import sys
-import codecs
 import os
 import textwrap
 import time
@ -48,7 +47,7 @@ class Delimiter:
        return self.c

 def Tokenise(s):
-    s = unicode(s)
+    s = str(s)
    stack = []
    last = 0
    i = 0
@ -109,17 +108,17 @@ def Tokenise(s):
        yield s[last:i]

 def processSub(templates, tokens, args):
-    t = tokens.next()
-    if not isinstance(t, unicode):
+    t = next(tokens)
+    if not isinstance(t, str):
        raise SyntaxError
    name = t
-    t = tokens.next()
+    t = next(tokens)
    default = None
    if isinstance(t, Delimiter) and t.c == '|':
        default = ""
        while True:
-            t = tokens.next()
-            if isinstance(t, unicode):
+            t = next(tokens)
+            if isinstance(t, str):
                default += t
            elif isinstance(t, OpenDouble):
                default += processTemplateCall(templates, tokens, args)
@ -128,7 +127,7 @@ def processSub(templates, tokens, args):
            elif isinstance(t, CloseTriple):
                break
            else:
-                print "Unexpected:", t
+                print("Unexpected:", t)
                raise SyntaxError()
    if name in args:
        return args[name]
@ -142,14 +141,14 @@ def processTemplateCall(templates, tokens, args):
    template = tokens.next().strip().lower()
    args = {}
    a = 1
-    t = tokens.next()
+    t = next(tokens)
    while True:
        if isinstance(t, Delimiter):
-            name = unicode(a)
+            name = str(a)
            arg = ""
            while True:
-                t = tokens.next()
-                if isinstance(t, unicode):
+                t = next(tokens)
+                if isinstance(t, str):
                    arg += t
                elif isinstance(t, OpenDouble):
                    arg += processTemplateCall(templates, tokens, args)
@ -163,9 +162,9 @@ def processTemplateCall(templates, tokens, args):
                name = arg.strip()
                arg = ""
                while True:
-                    t = tokens.next()
-                    if isinstance(t, (unicode, Equals)):
-                        arg += unicode(t)
+                    t = next(tokens)
+                    if isinstance(t, (str, Equals)):
+                        arg += str(t)
                    elif isinstance(t, OpenDouble):
                        arg += processTemplateCall(templates, tokens, args)
                    elif isinstance(t, OpenTriple):
@ -181,7 +180,7 @@ def processTemplateCall(templates, tokens, args):
        elif isinstance(t, CloseDouble):
            break
        else:
-            print "Unexpected:", t
+            print("Unexpected:", t)
            raise SyntaxError
    #print template, args
    if template[0] == '#':
@ -208,7 +207,7 @@ def processTemplateCall(templates, tokens, args):
            else:
                return ""
        else:
-            print "Unknown ParserFunction:", template
+            print("Unknown ParserFunction:", template)
            sys.exit(1)
    if template not in templates:
        return "{{%s}}" % template
@ -225,13 +224,13 @@ def process(templates, s, args = {}):
    tokens = Tokenise(s)
    try:
        while True:
-            t = tokens.next()
+            t = next(tokens)
            if isinstance(t, OpenDouble):
                r += processTemplateCall(templates, tokens, args)
            elif isinstance(t, OpenTriple):
                r += processSub(templates, tokens, args)
            else:
-                r += unicode(t)
+                r += str(t)
    except StopIteration:
        pass
    return r
@ -250,11 +249,11 @@ def test():
        't6': "t2demo|a",
    }
    def t(text, expected):
-        print "text:", text
+        print("text:", text)
        s = process(templates, text)
        if s != expected:
-            print "got:", s
-            print "expected:", expected
+            print("got:", s)
+            print("expected:", expected)
            sys.exit(1)
    t("{{Name-example}}", "I am a template example, my first name is '''{{{firstName}}}''' and my last name is '''{{{lastName}}}'''. You can reference my page at [[{{{lastName}}}, {{{firstName}}}]].")
    t("{{Name-example | firstName=John | lastName=Smith }}", "I am a template example, my first name is '''John''' and my last name is '''Smith'''. You can reference my page at [[Smith, John]].")
@ -463,7 +462,7 @@ Parts = {
    'Verbal noun': "v.n.",
 }
 PartsUsed = {}
-for p in Parts.keys():
+for p in list(Parts.keys()):
    PartsUsed[p] = 0

 def encode(s):
@ -641,7 +640,7 @@ def formatNormal(word, doc):
                #    r += "  "*(depth-1) + word + " (" + p + ")\n\n"
                r += "  "*(depth-1) + section.heading + "\n\n"
            else:
-                print >>errors, "Unknown part: (%s) %s" % (word, section.heading)
+                print("Unknown part: (%s) %s" % (word, section.heading), file=errors)
                return ""
        elif depth > posdepth:
            return ""
@ -709,8 +708,8 @@ class WikiHandler(xml.sax.ContentHandler):
        if self.element == "text":
            if self.page:
                if self.page in self.long:
-                    print self.page, len(self.text)
-                    print
+                    print(self.page, len(self.text))
+                    print()
                self.doPage(self.page, self.text)
                self.page = None
            self.text = ""
@ -760,8 +759,7 @@ info = """   This file was converted from the original database on:
  Wiktionary is available under the GNU Free Documentation License.
 """ % (time.ctime(), os.path.basename(fn))

-errors = codecs.open("mkdict.err", "w", "utf_8")
-e = codecs.getencoder("utf_8")
+errors = open("mkdict.err", "w")

 Templates = {}
 f = os.popen("bunzip2 -c %s" % fn, "r")
@ -769,10 +767,9 @@ xml.sax.parse(f, TemplateHandler())
 f.close()

 f = os.popen("bunzip2 -c %s" % fn, "r")
-out = codecs.getwriter("utf_8")(
-        os.popen("dictfmt -p wiktionary-en --locale en_US.UTF-8 --columns 0 -u http://en.wiktionary.org", "w"))
+out = os.popen("dictfmt -p wiktionary-en --locale en_US.UTF-8 --columns 0 -u http://en.wiktionary.org", "w")

-out.write(("%%h English Wiktionary\n%s" % info).encode('utf-8'))
+out.write("%%h English Wiktionary\n%s" % info)
 xml.sax.parse(f, WordHandler())
 f.close()
 out.close()
--- a/pkgs/servers/dict/wordnet_structures.py
+++ b/pkgs/servers/dict/wordnet_structures.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #Copyright 2007 Sebastian Hagen
 # This file is part of wordnet_tools.

@ -26,6 +26,7 @@
 # written.

 import datetime
+import math
 from textwrap import TextWrapper

 CAT_ADJECTIVE = 0
@ -227,7 +228,7 @@ original version.\n\n
      rv = ''
      while (e > 0):
         e -= 1
-         d = (i / 64**e)
+         d = math.floor(i / 64**e)
         rv += cls.base64_map[d]
         i = i % (64**e)
      return rv
@ -262,7 +263,7 @@ original version.\n\n
      self.dict_entry_write(file_index, file_data, '00-database-url', '00-database-url\n%s\n' % self.wn_url)


-      words = self.word_data.keys()
+      words = list(self.word_data.keys())
      words.sort()
      for word in words:
         for wi in self.word_data[word]:
@ -306,14 +307,14 @@ if (__name__ == '__main__'):
   wnd = WordnetDict(wn_url=options.wn_url, desc_short=options.desc_short, desc_long=options.desc_long)

   for i in range(0,len(args),2):
-      print 'Opening index file %r...' % args[i]
-      file_index = file(args[i])
-      print 'Opening data file %r...' % args[i+1]
-      file_data = file(args[i+1])
-      print 'Parsing index file and data file...'
+      print('Opening index file %r...' % args[i])
+      file_index = open(args[i])
+      print('Opening data file %r...' % args[i+1])
+      file_data = open(args[i+1])
+      print('Parsing index file and data file...')
      wnd.wn_dict_add(file_index, file_data)

-   print 'All input files parsed. Writing output to index file %r and data file %r.' % (options.oi, options.od)
+   print('All input files parsed. Writing output to index file %r and data file %r.' % (options.oi, options.od))

-   wnd.dict_generate(file(options.oi, 'w'),file(options.od, 'w'))
-   print 'All done.' 
+   wnd.dict_generate(open(options.oi, 'w'),open(options.od, 'w'))
+   print('All done.')