From 065bb52109527ff60f4d9e15fac537dd221e6cb2 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <ams@2ndQuadrant.com>
Date: Mon, 31 Aug 2015 10:37:09 +0530
Subject: [PATCH 1/3] Be systematic about parsing and validating hostnames and
 addresses

This adds a parse_address(pattern) utility function that returns
(host,port), and uses it wherever where we accept IPv4 and IPv6
addresses and hostnames (or host patterns): the inventory parser
the the add_host action plugin.

It also introduces a more extensive set of unit tests that supersedes
the old add_host unit tests (which didn't actually test add_host, but
only the parsing function).
---
 lib/ansible/inventory/__init__.py          |  24 +--
 lib/ansible/inventory/expand_hosts.py      |   2 +-
 lib/ansible/inventory/ini.py               |  57 ++----
 lib/ansible/parsing/utils/addresses.py     | 200 +++++++++++++++++++++
 lib/ansible/plugins/action/add_host.py     |  36 +---
 test/units/parsing/test_addresses.py       |  56 ++++++
 test/units/plugins/action/test_add_host.py |  47 -----
 7 files changed, 283 insertions(+), 139 deletions(-)
 create mode 100644 lib/ansible/parsing/utils/addresses.py
 create mode 100644 test/units/parsing/test_addresses.py
 delete mode 100644 test/units/plugins/action/test_add_host.py

diff --git a/lib/ansible/inventory/__init__.py b/lib/ansible/inventory/__init__.py
index 2eda3e6649d..657084c724d 100644
--- a/lib/ansible/inventory/__init__.py
+++ b/lib/ansible/inventory/__init__.py
@@ -33,6 +33,7 @@ from ansible.inventory.group import Group
 from ansible.inventory.host import Host
 from ansible.plugins import vars_loader
 from ansible.utils.vars import combine_vars
+from ansible.parsing.utils.addresses import parse_address
 
 try:
     from __main__ import display
@@ -83,27 +84,16 @@ class Inventory(object):
                 host_list = host_list.split(",")
                 host_list = [ h for h in host_list if h and h.strip() ]
 
+        self.parser = None
+
         if host_list is None:
-            self.parser = None
+            pass
         elif isinstance(host_list, list):
-            self.parser = None
             all = Group('all')
             self.groups = [ all ]
-            ipv6_re = re.compile('\[([a-f:A-F0-9]*[%[0-z]+]?)\](?::(\d+))?')
-            for x in host_list:
-                m = ipv6_re.match(x)
-                if m:
-                    all.add_host(Host(m.groups()[0], m.groups()[1]))
-                else:
-                    if ":" in x:
-                        tokens = x.rsplit(":", 1)
-                        # if there is ':' in the address, then this is an ipv6
-                        if ':' in tokens[0]:
-                            all.add_host(Host(x))
-                        else:
-                            all.add_host(Host(tokens[0], tokens[1]))
-                    else:
-                        all.add_host(Host(x))
+            for h in host_list:
+                (host, port) = parse_address(h, allow_ranges=False)
+                all.add_host(Host(host, port))
         elif self._loader.path_exists(host_list):
             #TODO: switch this to a plugin loader and a 'condition' per plugin on which it should be tried, restoring 'inventory pllugins'
             if self._loader.is_directory(host_list):
diff --git a/lib/ansible/inventory/expand_hosts.py b/lib/ansible/inventory/expand_hosts.py
index 0d63ba08bbc..7e1c127c27e 100644
--- a/lib/ansible/inventory/expand_hosts.py
+++ b/lib/ansible/inventory/expand_hosts.py
@@ -44,7 +44,7 @@ def detect_range(line = None):
 
     Returnes True if the given line contains a pattern, else False.
     '''
-    if 0 <= line.find("[") < line.find(":") < line.find("]"):
+    if '[' in line:
         return True
     else:
         return False
diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py
index 36115c57975..30408048136 100644
--- a/lib/ansible/inventory/ini.py
+++ b/lib/ansible/inventory/ini.py
@@ -29,6 +29,7 @@ from ansible.inventory.host import Host
 from ansible.inventory.group import Group
 from ansible.inventory.expand_hosts import detect_range
 from ansible.inventory.expand_hosts import expand_hostname_range
+from ansible.parsing.utils.addresses import parse_address
 from ansible.utils.unicode import to_unicode, to_bytes
 
 class InventoryParser(object):
@@ -265,30 +266,20 @@ class InventoryParser(object):
         optional port number that applies to all of them.
         '''
 
-        # Is a port number specified?
-        #
-        # This may be a mandatory :NN suffix on any square-bracketed expression
-        # (IPv6 address, IPv4 address, host name, host pattern), or an optional
-        # :NN suffix on an IPv4 address, host name, or pattern. IPv6 addresses
-        # must be in square brackets if a port is specified.
+        # Can the given hostpattern be parsed as a host with an optional port
+        # specification?
 
-        port = None
+        (pattern, port) = parse_address(hostpattern, allow_ranges=True)
+        if not pattern:
+            self._raise_error("Can't parse '%s' as host[:port]" % hostpattern)
 
-        for type in ['bracketed_hostport', 'hostport']:
-            m = self.patterns[type].match(hostpattern)
-            if m:
-                (hostpattern, port) = m.groups()
-                continue
+        # Once we have separated the pattern, we expand it into list of one or
+        # more hostnames, depending on whether it contains any [x:y] ranges.
 
-        # Now we're left with just the pattern, which results in a list of one
-        # or more hostnames, depending on whether it contains any [x:y] ranges.
-        #
-        # FIXME: We could be more strict here about validation.
-
-        if detect_range(hostpattern):
-            hostnames = expand_hostname_range(hostpattern)
+        if detect_range(pattern):
+            hostnames = expand_hostname_range(pattern)
         else:
-            hostnames = [hostpattern]
+            hostnames = [pattern]
 
         return (hostnames, port)
 
@@ -374,29 +365,3 @@ class InventoryParser(object):
                 $                           # end of the line
             ''', re.X
         )
-
-        # The following patterns match the various ways in which a port number
-        # may be specified on an IPv6 address, IPv4 address, hostname, or host
-        # pattern. All of the above may be enclosed in square brackets with a
-        # mandatory :NN suffix; or all but the first may be given without any
-        # brackets but with an :NN suffix.
-
-        self.patterns['bracketed_hostport'] = re.compile(
-            r'''^
-                \[(.+)\]                    # [host identifier]
-                :([0-9]+)                   # :port number
-                $
-            ''', re.X
-        )
-
-        self.patterns['hostport'] = re.compile(
-            r'''^
-                ((?:                        # We want to match:
-                    [^:\[\]]                # (a non-range character
-                    |                       # ...or...
-                    \[[^\]]*\]              # a complete bracketed expression)
-                )*)                         # repeated as many times as possible
-                :([0-9]+)                   # followed by a port number
-                $
-            ''', re.X
-        )
diff --git a/lib/ansible/parsing/utils/addresses.py b/lib/ansible/parsing/utils/addresses.py
new file mode 100644
index 00000000000..3a128a35249
--- /dev/null
+++ b/lib/ansible/parsing/utils/addresses.py
@@ -0,0 +1,200 @@
+# Copyright 2015 Abhijit Menon-Sen <ams@2ndQuadrant.com>
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+import re
+
+# Components that match a numeric or alphanumeric begin:end or begin:end:step
+# range expression inside square brackets.
+
+numeric_range = r'''
+    \[
+        (?:[0-9]+:[0-9]+)               # numeric begin:end
+        (?::[0-9]+)?                    # numeric :step (optional)
+    \]
+'''
+
+alphanumeric_range = r'''
+    \[
+        (?:
+            [a-z]:[a-z]|                # one-char alphabetic range
+            [0-9]+:[0-9]+               # ...or a numeric one
+        )
+        (?::[0-9]+)?                    # numeric :step (optional)
+    \]
+'''
+
+# Components that match a 16-bit portion of an IPv6 address in hexadecimal
+# notation (0..ffff) or an 8-bit portion of an IPv4 address in decimal notation
+# (0..255) or an [x:y(:z)] numeric range.
+
+ipv6_component = r'''
+    (?:
+        [0-9a-f]{{1,4}}|                # 0..ffff
+        {range}                         # or a numeric range
+    )
+'''.format(range=numeric_range)
+
+ipv4_component = r'''
+    (?:
+        [01]?[0-9]{{1,2}}|              # 0..199
+        2[0-4][0-9]|                    # 200..249
+        25[0-5]|                        # 250..255
+        {range}                         # or a numeric range
+    )
+'''.format(range=numeric_range)
+
+patterns = {
+    # This matches a square-bracketed expression with a port specification. What
+    # is inside the square brackets is validated later.
+
+    'bracketed_hostport': re.compile(
+        r'''^
+            \[(.+)\]                    # [host identifier]
+            :([0-9]+)                   # :port number
+            $
+        ''', re.X
+    ),
+
+    # This matches a bare IPv4 address or hostname (or host pattern including
+    # [x:y(:z)] ranges) with a port specification.
+
+    'hostport': re.compile(
+        r'''^
+            ((?:                        # We want to match:
+                [^:\[\]]                # (a non-range character
+                |                       # ...or...
+                \[[^\]]*\]              # a complete bracketed expression)
+            )*)                         # repeated as many times as possible
+            :([0-9]+)                   # followed by a port number
+            $
+        ''', re.X
+    ),
+
+    # This matches an IPv4 address, but also permits range expressions.
+
+    'ipv4': re.compile(
+        r'''^
+            (?:{i4}\.){{3}}{i4}         # Three parts followed by dots plus one
+            $
+        '''.format(i4=ipv4_component), re.X|re.I
+    ),
+
+    # This matches an IPv6 address, but also permits range expressions.
+    #
+    # This expression looks complex, but it really only spells out the various
+    # combinations in which the basic unit of an IPv6 address (0..ffff) can be
+    # written, from :: to 1:2:3:4:5:6:7:8, plus the IPv4-in-IPv6 variants such
+    # as ::ffff:192.0.2.3.
+    #
+    # Note that we can't just use ipaddress.ip_address() because we also have to
+    # accept ranges in place of each component.
+
+    'ipv6': re.compile(
+        r'''^
+            (?:{0}:){{7}}{0}|           # uncompressed: 1:2:3:4:5:6:7:8
+            (?:{0}:){{1,6}}:|           # compressed variants, which are all
+            (?:{0}:)(?:{0}){{1,6}}|     # a::b for various lengths of a,b
+            (?:{0}:){{2}}(?::{0}){{1,5}}|
+            (?:{0}:){{3}}(?::{0}){{1,4}}|
+            (?:{0}:){{4}}(?::{0}){{1,3}}|
+            (?:{0}:){{5}}(?::{0}){{1,2}}|
+            (?:{0}:){{6}}(?::{0})|      # ...all with 2 <= a+b <= 7
+            :(?::{0}){{1,6}}|           # ::ffff(:ffff...)
+            {0}?::|                     # ffff::, ::
+                                        # ipv4-in-ipv6 variants
+            (?:0:){{6}}(?:{0}\.){{3}}{0}|
+            ::(?:ffff:)?(?:{0}\.){{3}}{0}|
+            (?:0:){{5}}ffff:(?:{0}\.){{3}}{0}
+            $
+        '''.format(ipv6_component), re.X|re.I
+    ),
+
+    # This matches a hostname or host pattern including [x:y(:z)] ranges.
+    #
+    # We roughly follow DNS rules here, but also allow ranges (and underscores).
+    # In the past, no systematic rules were enforced about inventory hostnames,
+    # but the parsing context (e.g. shlex.split(), fnmatch.fnmatch()) excluded
+    # various metacharacters anyway.
+    #
+    # We don't enforce DNS length restrictions here (63 characters per label,
+    # 253 characters total) or make any attempt to process IDNs.
+
+    'hostname': re.compile(
+        r'''^                           # We need at least one label,
+            (?:                         # which comprises:
+                [a-z0-9_-]|             # (a valid domain label character
+                {0}                     # or a bracketed alphanumeric range)
+            )+
+            (?:\.(?:[a-z0-9_-]|{0})+)*  # Followed by zero or more .labels
+            $
+        '''.format(alphanumeric_range), re.X|re.I
+    ),
+}
+
+def parse_address(address, allow_ranges=False):
+    """
+    Takes a string and returns a (host, port) tuple. If the host is None, then
+    the string could not be parsed as a host identifier with an optional port
+    specification. If the port is None, then no port was specified.
+
+    The host identifier may be a hostname (qualified or not), an IPv4 address,
+    or an IPv6 address. If allow_ranges is True, then any of those may contain
+    [x:y] range specifications, e.g. foo[1:3] or foo[0:5]-bar[x-z].
+
+    The port number is an optional :NN suffix on an IPv4 address or host name,
+    or a mandatory :NN suffix on any square-bracketed expression: IPv6 address,
+    IPv4 address, or host name. (This means the only way to specify a port for
+    an IPv6 address is to enclose it in square brackets.)
+    """
+
+    # First, we extract the port number if one is specified.
+
+    port = None
+    for type in ['bracketed_hostport', 'hostport']:
+        m = patterns[type].match(address)
+        if m:
+            (address, port) = m.groups()
+            port = int(port)
+            continue
+
+    # What we're left with now must be an IPv4 or IPv6 address, possibly with
+    # numeric ranges, or a hostname with alphanumeric ranges.
+
+    host = None
+    for type in ['ipv4', 'ipv6', 'hostname']:
+        m = patterns[type].match(address)
+        if m:
+            host = address
+            continue
+
+    # If it isn't any of the above, we don't understand it.
+
+    if not host:
+        return (None, None)
+
+    # If we get to this point, we know that any included ranges are valid. If
+    # the caller is prepared to handle them, all is well. Otherwise we treat
+    # it as a parse failure.
+
+    if not allow_ranges and '[' in host:
+        return (None, None)
+
+    return (host, port)
diff --git a/lib/ansible/plugins/action/add_host.py b/lib/ansible/plugins/action/add_host.py
index cf2dab17373..0e7d3187e56 100644
--- a/lib/ansible/plugins/action/add_host.py
+++ b/lib/ansible/plugins/action/add_host.py
@@ -23,6 +23,8 @@ __metaclass__ = type
 import re
 
 from ansible.plugins.action import ActionBase
+from ansible.parsing.utils.addresses import parse_address
+from ansible.errors import AnsibleError, AnsibleParserError
 
 class ActionModule(ActionBase):
     ''' Create inventory hosts and groups in the memory inventory'''
@@ -40,9 +42,11 @@ class ActionModule(ActionBase):
         new_name = self._task.args.get('name', self._task.args.get('hostname', None))
         #vv("creating host via 'add_host': hostname=%s" % new_name)
 
-        new_name, new_port = _parse_ip_host_and_port(new_name)
-        if new_port:
-            self._task.args['ansible_ssh_port'] = new_port
+        name, port = parse_address(new_name, allow_ranges=False)
+        if not name:
+            raise AnsibleError("Invalid inventory hostname: %s" % new_name)
+        if port:
+            self._task.args['ansible_ssh_port'] = port
 
         groups = self._task.args.get('groupname', self._task.args.get('groups', self._task.args.get('group', ''))) 
         # add it to the group if that was specified
@@ -58,28 +62,4 @@ class ActionModule(ActionBase):
             if not k in [ 'name', 'hostname', 'groupname', 'groups' ]:
                 host_vars[k] = self._task.args[k] 
 
-        return dict(changed=True, add_host=dict(host_name=new_name, groups=new_groups, host_vars=host_vars))
-
-def _parse_ip_host_and_port(hostname):
-    """
-    Attempt to parse the hostname and port from a hostname, e.g.,
-
-    some-host-name
-    some-host-name:80
-    8.8.8.8
-    8.8.8.8:80
-    2001:db8:0:1
-    [2001:db8:0:1]:80
-    """
-    if hostname.count(':') > 1:
-        match = re.match(
-            '\[(?P<ip>[^\]]+)\](:(?P<port>[0-9]+))?',
-            hostname
-        )
-        if match:
-            return match.group('ip'), match.group('port')
-        else:
-            return hostname, None
-    elif ':' in hostname:
-        return hostname.rsplit(':', 1)
-    return hostname, None
+        return dict(changed=True, add_host=dict(host_name=name, groups=new_groups, host_vars=host_vars))
diff --git a/test/units/parsing/test_addresses.py b/test/units/parsing/test_addresses.py
new file mode 100644
index 00000000000..2bfb1102181
--- /dev/null
+++ b/test/units/parsing/test_addresses.py
@@ -0,0 +1,56 @@
+import unittest
+
+from ansible.parsing.utils.addresses import parse_address
+
+class TestParseAddress(unittest.TestCase):
+
+    tests = {
+        # IPv4 addresses
+        '192.0.2.3': ['192.0.2.3', None],
+        '192.0.2.3:23': ['192.0.2.3', 23],
+
+        # IPv6 addresses
+        '::': ['::', None],
+        '::1': ['::1', None],
+        '[::1]:442': ['::1', 442],
+        'abcd:ef98:7654:3210:abcd:ef98:7654:3210': ['abcd:ef98:7654:3210:abcd:ef98:7654:3210', None],
+        '[abcd:ef98:7654:3210:abcd:ef98:7654:3210]:42': ['abcd:ef98:7654:3210:abcd:ef98:7654:3210', 42],
+
+        # Hostnames
+        'some-host': ['some-host', None],
+        'some-host:80': ['some-host', 80],
+        'some.host.com:492': ['some.host.com', 492],
+        '[some.host.com]:493': ['some.host.com', 493],
+
+        # Various errors
+        '': [None, None],
+        'some..host': [None, None],
+        'some.': [None, None],
+        '[example.com]': [None, None],
+    }
+
+    range_tests = {
+        '192.0.2.[3:10]': ['192.0.2.[3:10]', None],
+        '192.0.2.[3:10]:23': ['192.0.2.[3:10]', 23],
+        'abcd:ef98::7654:[1:9]': ['abcd:ef98::7654:[1:9]', None],
+        '[abcd:ef98::7654:[6:32]]:2222': ['abcd:ef98::7654:[6:32]', 2222],
+        'foo[a:b]:42': ['foo[a:b]', 42],
+        'foo[a-b]:32': [None, None],
+        'foo[x-y]': [None, None],
+    }
+
+    def test_without_ranges(self):
+        for t in self.tests:
+            test = self.tests[t]
+
+            (host, port) = parse_address(t)
+            assert host == test[0]
+            assert port == test[1]
+
+    def test_with_ranges(self):
+        for t in self.range_tests:
+            test = self.range_tests[t]
+
+            (host, port) = parse_address(t, allow_ranges=True)
+            assert host == test[0]
+            assert port == test[1]
diff --git a/test/units/plugins/action/test_add_host.py b/test/units/plugins/action/test_add_host.py
deleted file mode 100644
index c694d387a3a..00000000000
--- a/test/units/plugins/action/test_add_host.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import unittest
-
-from ansible.plugins.action import add_host
-
-
-class TestAddHost(unittest.TestCase):
-
-    def test_hostname(self):
-        host, port = add_host._parse_ip_host_and_port('some-remote-host')
-        assert host == 'some-remote-host'
-        assert port is None
-
-    def test_hostname_with_port(self):
-        host, port = add_host._parse_ip_host_and_port('some-remote-host:80')
-        assert host == 'some-remote-host'
-        assert port == '80'
-
-    def test_parse_ip_host_and_port_v4(self):
-        host, port = add_host._parse_ip_host_and_port('8.8.8.8')
-        assert host == '8.8.8.8'
-        assert port is None
-
-    def test_parse_ip_host_and_port_v4_and_port(self):
-        host, port = add_host._parse_ip_host_and_port('8.8.8.8:80')
-        assert host == '8.8.8.8'
-        assert port == '80'
-
-    def test_parse_ip_host_and_port_v6(self):
-        host, port = add_host._parse_ip_host_and_port(
-            'dead:beef:dead:beef:dead:beef:dead:beef'
-        )
-        assert host == 'dead:beef:dead:beef:dead:beef:dead:beef'
-        assert port is None
-
-    def test_parse_ip_host_and_port_v6_with_brackets(self):
-        host, port = add_host._parse_ip_host_and_port(
-            '[dead:beef:dead:beef:dead:beef:dead:beef]'
-        )
-        assert host == 'dead:beef:dead:beef:dead:beef:dead:beef'
-        assert port is None
-
-    def test_parse_ip_host_and_port_v6_with_brackets_and_port(self):
-        host, port = add_host._parse_ip_host_and_port(
-            '[dead:beef:dead:beef:dead:beef:dead:beef]:80'
-        )
-        assert host == 'dead:beef:dead:beef:dead:beef:dead:beef'
-        assert port == '80'

From 7479ab47e07efaded3d0aeb3bd169cbf27dd8f7c Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <ams@2ndQuadrant.com>
Date: Thu, 3 Sep 2015 16:32:06 +0530
Subject: [PATCH 2/3] Be stricter about parsing hostname labels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Labels must start with an alphanumeric character, may contain
alphanumeric characters or hyphens, but must not end with a hyphen.
We enforce those rules, but allow underscores wherever hyphens are
accepted, and allow alphanumeric ranges anywhere.

We relax the definition of "alphanumeric" to include Unicode characters
even though such inventory hostnames cannot be used in practice unless
an ansible_ssh_host is set for each of them.

We still don't enforce length restrictions—the fact that we have to
accept ranges makes it more complex, and it doesn't seem especially
worthwhile.
---
 lib/ansible/parsing/utils/addresses.py | 22 +++++++++++++++-------
 test/units/parsing/test_addresses.py   | 13 +++++++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/lib/ansible/parsing/utils/addresses.py b/lib/ansible/parsing/utils/addresses.py
index 3a128a35249..71675769d27 100644
--- a/lib/ansible/parsing/utils/addresses.py
+++ b/lib/ansible/parsing/utils/addresses.py
@@ -61,6 +61,17 @@ ipv4_component = r'''
     )
 '''.format(range=numeric_range)
 
+# A hostname label, e.g. 'foo' in 'foo.example.com'. Consists of alphanumeric
+# characters plus dashes (and underscores) or valid ranges. The label may not
+# start or end with a hyphen or an underscore. This is interpolated into the
+# hostname pattern below. We don't try to enforce the 63-char length limit.
+
+label = r'''
+    (?:[\w]|{range})                    # Starts with an alphanumeric or a range
+    (?:[\w_-]|{range})*                 # Then zero or more of the same or [_-]
+    (?<![_-])                           # ...as long as it didn't end with [_-]
+'''.format(range=alphanumeric_range)
+
 patterns = {
     # This matches a square-bracketed expression with a port specification. What
     # is inside the square brackets is validated later.
@@ -138,14 +149,11 @@ patterns = {
     # 253 characters total) or make any attempt to process IDNs.
 
     'hostname': re.compile(
-        r'''^                           # We need at least one label,
-            (?:                         # which comprises:
-                [a-z0-9_-]|             # (a valid domain label character
-                {0}                     # or a bracketed alphanumeric range)
-            )+
-            (?:\.(?:[a-z0-9_-]|{0})+)*  # Followed by zero or more .labels
+        r'''^
+            {label}                     # We must have at least one label
+            (?:\.{label})*              # Followed by zero or more .labels
             $
-        '''.format(alphanumeric_range), re.X|re.I
+        '''.format(label=label), re.X|re.I|re.UNICODE
     ),
 }
 
diff --git a/test/units/parsing/test_addresses.py b/test/units/parsing/test_addresses.py
index 2bfb1102181..8f4fe3696e4 100644
--- a/test/units/parsing/test_addresses.py
+++ b/test/units/parsing/test_addresses.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import unittest
 
 from ansible.parsing.utils.addresses import parse_address
@@ -21,12 +23,19 @@ class TestParseAddress(unittest.TestCase):
         'some-host:80': ['some-host', 80],
         'some.host.com:492': ['some.host.com', 492],
         '[some.host.com]:493': ['some.host.com', 493],
+        'a-b.3foo_bar.com:23': ['a-b.3foo_bar.com', 23],
+        u'fóöbär': [u'fóöbär', None],
+        u'fóöbär:32': [u'fóöbär', 32],
+        u'fóöbär.éxàmplê.com:632': [u'fóöbär.éxàmplê.com', 632],
 
         # Various errors
         '': [None, None],
         'some..host': [None, None],
         'some.': [None, None],
         '[example.com]': [None, None],
+        'some-': [None, None],
+        'some-.foo.com': [None, None],
+        'some.-foo.com': [None, None],
     }
 
     range_tests = {
@@ -34,7 +43,11 @@ class TestParseAddress(unittest.TestCase):
         '192.0.2.[3:10]:23': ['192.0.2.[3:10]', 23],
         'abcd:ef98::7654:[1:9]': ['abcd:ef98::7654:[1:9]', None],
         '[abcd:ef98::7654:[6:32]]:2222': ['abcd:ef98::7654:[6:32]', 2222],
+        u'fóöb[a:c]r.éxàmplê.com:632': [u'fóöb[a:c]r.éxàmplê.com', 632],
+        '[a:b]foo.com': ['[a:b]foo.com', None],
+        'foo[a:b].com': ['foo[a:b].com', None],
         'foo[a:b]:42': ['foo[a:b]', 42],
+        'foo[a-b]-.com': [None, None],
         'foo[a-b]:32': [None, None],
         'foo[x-y]': [None, None],
     }

From 88a20e7a20926728e20f498b5c4efd707ebe38fe Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <ams@2ndQuadrant.com>
Date: Thu, 10 Sep 2015 10:07:19 +0530
Subject: [PATCH 3/3] Fix broken integration test with unicode hostnames

1. The test did "name: '{{hostnames}}.{{item}}'" inside a with_sequence
   loop, which didn't do what was intended: it expanded hostnames into
   an array, appended ".1", and set name to the resulting string. This
   can be converted to a simple with_items loop.

2. Some of the entries in hostnames contained punctuation characters,
   which I see no reason to support in inventory hostnames anyway.

3. Once the add_host failures are fixed, the playbook later fails when
   the unicode hostnames are interpolated into debug output in ssh.py
   due to an encoding error. This is only one of the many places that
   may fail when using unicode inventory hostnames; we work around it
   by providing an ansible_ssh_host setting.
---
 test/integration/unicode.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test/integration/unicode.yml b/test/integration/unicode.yml
index 1044c252705..6e8e073a79d 100644
--- a/test/integration/unicode.yml
+++ b/test/integration/unicode.yml
@@ -4,10 +4,9 @@
   connection: local
   vars:
     test_var: 'Ī ī Ĭ ĭ Į į İ ı IJ ij Ĵ ĵ Ķ ķ ĸ Ĺ ĺ Ļ ļ Ľ ľ Ŀ ŀ Ł ł Ń ń Ņ ņ Ň ň ʼn Ŋ ŋ Ō ō Ŏ ŏ Ő ő Œ'
-    num_hosts: 5
     hostnames:
-        - 'host-#ϬϭϮϯϰ'
-        - 'host-ͰͱͲͳʹ͵'
+        - 'host-ϬϭϮϯϰ'
+        - 'host-fóöbär'
         - 'host-ΙΚΛΜΝΞ'
         - 'host-στυφχψ'
         - 'host-ϬϭϮϯϰϱ'
@@ -29,11 +28,11 @@
         - 'ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď ď Đ đ Ē ē Ĕ ĕ Ė ė Ę ę Ě ě Ĝ ĝ Ğ ğ Ġ ġ Ģ ģ Ĥ ĥ Ħ ħ Ĩ ĩ'
 
     - add_host:
-        name: '{{hostnames}}.{{item}}'
+        name: '{{item}}'
         groups: 'ĪīĬĭ'
+        ansible_ssh_host: 127.0.0.1
         ansible_connection: local
-        host_id: '{{item}}'
-      with_sequence: start=1 end={{num_hosts}} format=%d
+      with_items: hostnames
 
     - name: 'A task with unicode extra vars'
       debug: var=extra_var