From: Chandrika Srinivasan <chandrika.srinivasan@citrix.com>
Date: Wed, 31 Jan 2018 14:13:28 +0000
CA-267460: Make device multipath eligibility check more robust

Checks for multipath eligibility now include

* multipath -ll on SCSI ID to check if device is already multipathed
* Skip check if device path is not up yet
* Use all devices associated with a SCSI ID for the multipath -c check

Signed-off-by: Chandrika Srinivasan <chandrika.srinivasan@citrix.com>
diff --git a/drivers/mpath_dmp.py b/drivers/mpath_dmp.py
index 94a8dcb..662f970 100755
--- a/drivers/mpath_dmp.py
+++ b/drivers/mpath_dmp.py
@@ -209,35 +209,54 @@ def refresh(sid,npaths):
 
 
 def _is_valid_multipath_device(sid):
-    by_id_path = "/dev/disk/by-id/scsi-"+sid
-    real_path = util.get_real_path(by_id_path)
-    (ret, stdout, stderr) = util.doexec(['/usr/sbin/multipath', '-a', sid])
-    if ret == 1:
-        util.SMlog("Failed to add {}: wwid could be explicitly blacklisted\n"
-                   " Continue with multipath disabled for this SR".format(sid))
-        return False
-    (ret, stdout, stderr) = util.doexec(['/usr/sbin/multipath', '-c',
-                                         real_path])
-    if ret == 1:
-        # This is very fragile but it is not a good sign to fail without
-        # any output. At least until multipath 0.4.9, for example, multipath -c
-        # fails without any log if it is able to retrieve the wwid of the
-        # device.
-        # In this case it is better to fail immediately.
-        if not stdout+stderr:
-            # Attempt to cleanup wwids file before raising
-            try:
-                (ret, stdout, stderr) = util.doexec(['/usr/sbin/multipath',
-                                                     '-w', sid])
-            except OSError:
-                util.SMlog("Error removing {} from wwids file".format(sid))
-            raise xs_errors.XenError('MultipathGenericFailure',
-                                     '"multipath -c" failed without any'
-                                     ' output on {}'.format(real_path))
-        util.SMlog("When dealing with {} returned with:\n"
-                   " {}{} Continue with multipath disabled for this SR"
-                   .format(sid, stdout, stderr))
-        return False
+
+    # Check if device is already multipathed
+    (ret, stdout, stderr) = util.doexec(['/usr/sbin/multipath', '-ll', sid])
+    if not stdout+stderr:
+        (ret, stdout, stderr) = util.doexec(['/usr/sbin/multipath', '-a', sid])
+        if ret == 1:
+            util.SMlog("Failed to add {}: wwid could be explicitly "
+                       "blacklisted\n Continue with multipath disabled for "
+                       "this SR".format(sid))
+            return False
+
+        by_scsid_path = "/dev/disk/by-scsid/"+sid
+        if os.path.exists(by_scsid_path):
+            devs = os.listdir(by_scsid_path)
+        else:
+            util.SMlog("Device {} is not ready yet, skipping multipath check"
+                       .format(by_scsid_path))
+            return False
+        ret = 1
+        # Some paths might be down, check all associated devices
+        for dev in devs:
+            devpath = os.path.join(by_scsid_path, dev)
+            real_path = util.get_real_path(devpath)
+            (ret, stdout, stderr) = util.doexec(['/usr/sbin/multipath', '-c',
+                                                 real_path])
+            if ret == 0:
+                break
+
+        if ret == 1:
+            # This is very fragile but it is not a good sign to fail without
+            # any output. At least until multipath 0.4.9, for example,
+            # multipath -c fails without any log if it is able to retrieve the
+            # wwid of the device.
+            # In this case it is better to fail immediately.
+            if not stdout+stderr:
+                # Attempt to cleanup wwids file before raising
+                try:
+                    (ret, stdout, stderr) = util.doexec(['/usr/sbin/multipath',
+                                                         '-w', sid])
+                except OSError:
+                    util.SMlog("Error removing {} from wwids file".format(sid))
+                raise xs_errors.XenError('MultipathGenericFailure',
+                                         '"multipath -c" failed without any'
+                                         ' output on {}'.format(real_path))
+            util.SMlog("When dealing with {} multipath status returned:\n "
+                       "{}{} Continue with multipath disabled for this SR"
+                       .format(sid, stdout, stderr))
+            return False
     return True
 
 
diff --git a/tests/test_mpath_dmp.py b/tests/test_mpath_dmp.py
index e1282d4..09803c9 100644
--- a/tests/test_mpath_dmp.py
+++ b/tests/test_mpath_dmp.py
@@ -20,7 +20,8 @@ class TestMpathDmp(unittest.TestCase):
 
     @testlib.with_context
     @mock.patch('mpath_dmp.util', autospec=True)
-    def test_is_valid_multipath_device(self, context, util_mod):
+    @mock.patch('mpath_dmp.os', autospec=True)
+    def test_is_valid_multipath_device(self, context, mock_os, util_mod):
         """
         Tests for checking validity of multipath device
         """
@@ -28,26 +29,48 @@ class TestMpathDmp(unittest.TestCase):
         # Setup errors codes
         context.setup_error_codes()
 
+        # Test 'multipath -ll' success
+        util_mod.doexec.side_effect = [(0, "out", "err")]
+        self.assertTrue(mpath_dmp._is_valid_multipath_device("fake_dev"))
+
         # Test 'multipath -a' failure
-        util_mod.doexec.side_effect = [(1, "out", "err")]
+        util_mod.doexec.side_effect = [(0, "", ""), (1, "out", "err")]
+        self.assertFalse(mpath_dmp._is_valid_multipath_device("fake_dev"))
+
+        # Test failure when device is not available
+        mock_os.path.exists.return_value = False
+        util_mod.doexec.side_effect = [(0, "", ""), (0, "out", "err")]
         self.assertFalse(mpath_dmp._is_valid_multipath_device("fake_dev"))
 
         # Test 'multipath -c' with error and empty output
-        util_mod.doexec.side_effect = [(0, "out", "err"), (1, "", ""), OSError()]
+        mock_os.path.exists.return_value = True
+        mock_os.listdir.side_effect = [['sdc']]
+        util_mod.doexec.side_effect = [(0, "", ""), (0, "out", "err"),
+                                       (1, "", ""), OSError()]
         with self.assertRaises(SR.SROSError) as exc:
             mpath_dmp._is_valid_multipath_device("xx")
         self.assertEqual(exc.exception.errno, 431)
 
         # Test 'multipath -c' with error but some output
-        util_mod.doexec.side_effect = [(0, "out", "err"), (1, "xx", "")]
+        mock_os.listdir.side_effect = [['sdc']]
+        util_mod.doexec.side_effect = [(0, "", ""), (0, "out", "err"),
+                                       (1, "xx", "")]
         self.assertFalse(mpath_dmp._is_valid_multipath_device("fake_dev"))
-        util_mod.doexec.side_effect = [(0, "out", "err"), (1, "xx", "yy")]
+
+        mock_os.listdir.side_effect = [['sdc']]
+        util_mod.doexec.side_effect = [(0, "", ""), (0, "out", "err"),
+                                       (1, "xx", "yy")]
         self.assertFalse(mpath_dmp._is_valid_multipath_device("fake_dev"))
-        util_mod.doexec.side_effect = [(0, "out", "err"), (1, "", "yy")]
+
+        mock_os.listdir.side_effect = [['sdc']]
+        util_mod.doexec.side_effect = [(0, "", ""), (0, "out", "err"),
+                                       (1, "", "yy")]
         self.assertFalse(mpath_dmp._is_valid_multipath_device("fake_dev"))
 
         # Test when everything is fine
-        util_mod.doexec.side_effect = [(0, "out", "err"), (0, "out", "err")]
+        mock_os.listdir.side_effect = [['sdc']]
+        util_mod.doexec.side_effect = [(0, "", ""), (0, "out", "err"),
+                                       (0, "out", "err")]
         self.assertTrue(mpath_dmp._is_valid_multipath_device("fake_dev"))
 
     @mock.patch('mpath_dmp.mpp_luncheck.is_RdacLun', autospec=True)
