BioPandas · rasbt · May 11, 2022 · Apr 27, 2022 · Apr 27, 2022 · Apr 27, 2022
diff --git a/.pep8speaks.yml b/.pep8speaks.yml
@@ -0,0 +1,24 @@
+# File : .pep8speaks.yml
+
+scanner:
+    diff_only: True  # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
+    linter: flake8  # Other option is flake8
+
+flake8:  # Same as scanner.linter value. Other option is flake8
+    max-line-length: 88  # Default is 79 in PEP 8
+    ignore:  # Errors and warnings to ignore
+        - W504  # line break after binary operator
+
+no_blank_comment: False # If True, no comment is made on PR without any errors.
+descending_issues_order: False  # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file
+
+message:  # Customize the comment made by the bot
+    opened:  # Messages when a new PR is submitted
+        header: "Hello @{name}! Thanks for opening this PR. "
+                # The keyword {name} is converted into the author's username
+        footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)"
+                # The messages can be written as they would over GitHub
+    updated:  # Messages when new commits are added to the PR
+        header: "Hello @{name}! Thanks for updating this PR. "
+        footer: ""  # Why to comment the link to the style guide everytime? :)
+    no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: "
diff --git a/biopandas/__init__.py b/biopandas/__init__.py
@@ -24,5 +24,5 @@
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
 
-__version__ = '0.3.0'
+__version__ = "0.3.0"
 __author__ = "Sebastian Raschka <mail@sebastianraschka.com>"
diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py
@@ -306,7 +306,11 @@ def __dump_cat__(k, v):
                 pad = len(k2)
         pad += 3
         for k2 in v.keys():
-            output += "_%s.%s%s\n" % (k, __pad_string__(k2, pad), __dump_str__(v[k2][0]))
+            output += "_%s.%s%s\n" % (
+                k,
+                __pad_string__(k2, pad),
+                __dump_str__(v[k2][0]),
+            )
     else:
         output += "loop_\n"
         pad = []

diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py
@@ -94,7 +94,8 @@ def test__read_pdb_raises():
     Test if ValueError is raised for wrong file formats."""
 
     expect = (
-        "Wrong file format; allowed file formats are " ".cif, .cif.gz, .mmcif, .mmcif.gz"
+        "Wrong file format; allowed file formats are "
+        ".cif, .cif.gz, .mmcif, .mmcif.gz"
     )
 
     def run_code_1():

diff --git a/biopandas/mol2/mol2_io.py b/biopandas/mol2/mol2_io.py
@@ -27,33 +27,34 @@ def split_multimol2(mol2_path):
         from a gzip (.gz) file.
 
     """
-    if mol2_path.endswith('.mol2'):
+    if mol2_path.endswith(".mol2"):
         open_file = open
-        read_mode = 'r'
-    elif mol2_path.endswith('mol2.gz'):
+        read_mode = "r"
+    elif mol2_path.endswith("mol2.gz"):
         open_file = gzip.open
-        read_mode = 'rb'
+        read_mode = "rb"
     else:
-        raise ValueError('Wrong file format;'
-                         'allowed file formats are .mol2 and .mol2.gz.')
+        raise ValueError(
+            "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."
+        )
 
-    check = {'rb': b'@<TRIPOS>MOLECULE', 'r': '@<TRIPOS>MOLECULE'}
+    check = {"rb": b"@<TRIPOS>MOLECULE", "r": "@<TRIPOS>MOLECULE"}
 
     with open_file(mol2_path, read_mode) as f:
-        mol2 = ['', []]
+        mol2 = ["", []]
         while True:
             try:
                 line = next(f)
                 if line.startswith(check[read_mode]):
                     if mol2[0]:
-                        yield(mol2)
-                    mol2 = ['', []]
+                        yield (mol2)
+                    mol2 = ["", []]
                     mol2_id = next(f)
                     mol2[0] = mol2_id.rstrip()
                     mol2[1].append(line)
                     mol2[1].append(mol2_id)
                 else:
                     mol2[1].append(line)
             except StopIteration:
-                yield(mol2)
+                yield (mol2)
                 return
diff --git a/biopandas/mol2/pandas_mol2.py b/biopandas/mol2/pandas_mol2.py
@@ -11,15 +11,15 @@
 
 
 COLUMN_NAMES = (
- 'atom_id',
- 'atom_name',
- 'x',
- 'y',
- 'z',
- 'atom_type',
- 'subst_id',
- 'subst_name',
- 'charge'
+    "atom_id",
+    "atom_name",
+    "x",
+    "y",
+    "z",
+    "atom_type",
+    "subst_id",
+    "subst_name",
+    "charge",
 )
 
 COLUMN_TYPES = (int, str, float, float, float, str, int, str, float)
@@ -44,12 +44,13 @@ class PandasMol2(object):
         Location of the MOL2 file that was read in via `read_mol2`
 
     """
+
     def __init__(self):
         self._df = None
-        self.mol2_text = ''
-        self.header = ''
-        self.code = ''
-        self.mol2_path = ''
+        self.mol2_text = ""
+        self.header = ""
+        self.code = ""
+        self.mol2_path = ""
 
     @property
     def df(self):
@@ -59,9 +60,11 @@ def df(self):
     @df.setter
     def df(self, value):
         """Assign a new value to the pandas DataFrame"""
-        raise AttributeError('Please use `PandasMol2._df = ... ` instead\n'
-                             'of `PandasMol2.df = ... ` if you are sure that\n'
-                             'you want to overwrite the `df` attribute.')
+        raise AttributeError(
+            "Please use `PandasMol2._df = ... ` instead\n"
+            "of `PandasMol2.df = ... ` if you are sure that\n"
+            "you want to overwrite the `df` attribute."
+        )
         # self._df = value
 
     def _load_mol2(self, mol2_lines, mol2_code, columns):
@@ -76,11 +79,11 @@ def _load_mol2(self, mol2_lines, mol2_code, columns):
                 col_types.append(columns[i][1])
 
         try:
-            self.mol2_text = ''.join(mol2_lines)
+            self.mol2_text = "".join(mol2_lines)
             self.code = mol2_code
         except TypeError:
             mol2_lines = [m.decode() for m in mol2_lines]
-            self.mol2_text = ''.join(mol2_lines)
+            self.mol2_text = "".join(mol2_lines)
             self.code = mol2_code.decode()
 
         self._df = self._construct_df(mol2_lines, col_names, col_types)
@@ -163,9 +166,9 @@ def read_mol2_from_list(self, mol2_lines, mol2_code, columns=None):
 
     def _construct_df(self, mol2_lines, col_names, col_types):
         """Construct DataFrames from list of PDB lines."""
-        return self._atomsection_to_pandas(self._get_atomsection(mol2_lines),
-                                           col_names=col_names,
-                                           col_types=col_types)
+        return self._atomsection_to_pandas(
+            self._get_atomsection(mol2_lines), col_names=col_names, col_types=col_types
+        )
 
     @staticmethod
     def _get_atomsection(mol2_lst):
@@ -174,26 +177,25 @@ def _get_atomsection(mol2_lst):
         started = False
         first_idx = None
         for idx, s in enumerate(mol2_lst):
-            if s.startswith('@<TRIPOS>ATOM'):
+            if s.startswith("@<TRIPOS>ATOM"):
                 first_idx = idx + 1
                 started = True
-            elif started and s.startswith('@<TRIPOS>'):
+            elif started and s.startswith("@<TRIPOS>"):
                 last_idx_plus1 = idx
                 break
         if first_idx is None:
             # Raise error when file contains no @<TRIPOS>ATOM
             # (i.e. file is no mol2 file)
             raise ValueError(
-                    "Structural data could not be loaded. "
-                    "Is the input file/text in the mol2 format?"
-                )
+                "Structural data could not be loaded. "
+                "Is the input file/text in the mol2 format?"
+            )
         return mol2_lst[first_idx:last_idx_plus1]
 
     @staticmethod
     def _atomsection_to_pandas(mol2_atom_lst, col_names, col_types):
 
-        df = pd.DataFrame([lst.split() for lst in mol2_atom_lst],
-                          columns=col_names)
+        df = pd.DataFrame([lst.split() for lst in mol2_atom_lst], columns=col_names)
 
         for i in range(df.shape[1]):
             df[col_names[i]] = df[col_names[i]].astype(col_types[i])
@@ -222,18 +224,20 @@ def rmsd(df1, df2, heavy_only=True):
 
         """
         if df1.shape[0] != df2.shape[0]:
-            raise AttributeError('DataFrames have unequal lengths')
+            raise AttributeError("DataFrames have unequal lengths")
 
         if heavy_only:
-            d1 = df1[df1['atom_type'] != 'H']
-            d2 = df2[df2['atom_type'] != 'H']
+            d1 = df1[df1["atom_type"] != "H"]
+            d2 = df2[df2["atom_type"] != "H"]
         else:
             d1, d2 = df1, df2
 
-        total = ((d1['x'].values - d2['x'].values)**2 +
-                 (d1['y'].values - d2['y'].values)**2 +
-                 (d1['z'].values - d2['z'].values)**2)
-        rmsd = round((total.sum() / df1.shape[0])**0.5, 4)
+        total = (
+            (d1["x"].values - d2["x"].values) ** 2
+            + (d1["y"].values - d2["y"].values) ** 2
+            + (d1["z"].values - d2["z"].values) ** 2
+        )
+        rmsd = round((total.sum() / df1.shape[0]) ** 0.5, 4)
         return rmsd
 
     def distance(self, xyz=(0.00, 0.00, 0.00)):
@@ -252,8 +256,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00)):
             distance between the atoms in the atom section and `xyz`.
 
         """
-        return np.sqrt(np.sum(self.df[['x', 'y', 'z']]
-                       .subtract(xyz, axis=1)**2, axis=1))
+        return np.sqrt(
+            np.sum(self.df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1)
+        )
 
     @staticmethod
     def distance_df(df, xyz=(0.00, 0.00, 0.00)):
@@ -276,5 +281,4 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):
 
         """
 
-        return np.sqrt(np.sum(df[['x', 'y', 'z']]
-                       .subtract(xyz, axis=1)**2, axis=1))
+        return np.sqrt(np.sum(df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1))
diff --git a/biopandas/mol2/tests/test_mol2_io.py b/biopandas/mol2/tests/test_mol2_io.py
@@ -13,30 +13,25 @@
 
 def test_split_multimol2():
     all_mol2 = []
-    for i in split_multimol2(os.path.join(this_dir,
-                                          'data', '40_mol2_files.mol2')):
+    for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2")):
         all_mol2.append(i[0])
-    assert(all_mol2[1] == 'ZINC04084113')
-    assert(len(all_mol2) == 40)
+    assert all_mol2[1] == "ZINC04084113"
+    assert len(all_mol2) == 40
 
 
 def test_split_multimol2_wrong_format():
 
-    expect = ('Wrong file format;'
-              'allowed file formats are .mol2 and .mol2.gz.')
+    expect = "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."
 
     def run_code():
-        next(split_multimol2('40_mol2_files.pdb'))
+        next(split_multimol2("40_mol2_files.pdb"))
 
-    assert_raises(ValueError,
-                  expect,
-                  run_code)
+    assert_raises(ValueError, expect, run_code)
 
 
 def test_split_multimol2_gz():
     all_mol2 = []
-    for i in split_multimol2(os.path.join(this_dir,
-                                          'data', '40_mol2_files.mol2.gz')):
+    for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2.gz")):
         all_mol2.append(i[0])
-    assert(all_mol2[1].decode() == 'ZINC04084113')
-    assert(len(all_mol2) == 40)
+    assert all_mol2[1].decode() == "ZINC04084113"
+    assert len(all_mol2) == 40