BioPandas · rasbt · Oct 30, 2020 · Oct 22, 2020 · Oct 22, 2020 · Oct 22, 2020
diff --git a/biopandas/__init__.py b/biopandas/__init__.py
@@ -24,5 +24,5 @@
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
 
-__version__ = '0.2.7'
+__version__ = '0.3.0'
 __author__ = "Sebastian Raschka <mail@sebastianraschka.com>"
diff --git a/biopandas/mol2/mol2_io.py b/biopandas/mol2/mol2_io.py
@@ -9,7 +9,8 @@
 
 def split_multimol2(mol2_path):
     r"""
-    Splits a multi-mol2 file into individual Mol2 file contents.
+    Generator function that
+    splits a multi-mol2 file into individual Mol2 file contents.
 
     Parameters
     -----------
@@ -26,12 +27,16 @@ def split_multimol2(mol2_path):
         from a gzip (.gz) file.
 
     """
-    if mol2_path.endswith('.gz'):
+    if mol2_path.endswith('.mol2'):
+        open_file = open
+        read_mode = 'r'
+    elif mol2_path.endswith('mol2.gz'):
         open_file = gzip.open
         read_mode = 'rb'
     else:
-        open_file = open
-        read_mode = 'r'
+        raise ValueError('Wrong file format;'
+                         'allowed file formats are .mol2 and .mol2.gz.')
+
     check = {'rb': b'@<TRIPOS>MOLECULE', 'r': '@<TRIPOS>MOLECULE'}
 
     with open_file(mol2_path, read_mode) as f:

diff --git a/biopandas/mol2/pandas_mol2.py b/biopandas/mol2/pandas_mol2.py
@@ -169,15 +169,24 @@ def _construct_df(self, mol2_lines, col_names, col_types):
 
     @staticmethod
     def _get_atomsection(mol2_lst):
-        """Returns atom section from mol2 provided as list of strings"""
+        """Returns atom section from mol2 provided as list of strings.
+        Raises ValueError if data is not provided in the mol2 format."""
         started = False
+        first_idx = None
         for idx, s in enumerate(mol2_lst):
             if s.startswith('@<TRIPOS>ATOM'):
                 first_idx = idx + 1
                 started = True
             elif started and s.startswith('@<TRIPOS>'):
                 last_idx_plus1 = idx
                 break
+        if first_idx is None:
+            # Raise error when file contains no @<TRIPOS>ATOM
+            # (i.e. file is no mol2 file)
+            raise ValueError(
+                    "Structural data could not be loaded. "
+                    "Is the input file/text in the mol2 format?"
+                )
         return mol2_lst[first_idx:last_idx_plus1]
 
     @staticmethod

diff --git a/biopandas/mol2/tests/test_mol2_io.py b/biopandas/mol2/tests/test_mol2_io.py
@@ -6,6 +6,7 @@
 
 import os
 from biopandas.mol2.mol2_io import split_multimol2
+from biopandas.testutils import assert_raises
 
 this_dir = os.path.dirname(os.path.realpath(__file__))
 
@@ -19,6 +20,19 @@ def test_split_multimol2():
     assert(len(all_mol2) == 40)
 
 
+def test_split_multimol2_wrong_format():
+
+    expect = ('Wrong file format;'
+              'allowed file formats are .mol2 and .mol2.gz.')
+
+    def run_code():
+        next(split_multimol2('40_mol2_files.pdb'))
+
+    assert_raises(ValueError,
+                  expect,
+                  run_code)
+
+
 def test_split_multimol2_gz():
     all_mol2 = []
     for i in split_multimol2(os.path.join(this_dir,

diff --git a/biopandas/mol2/tests/test_pandas_mol2.py b/biopandas/mol2/tests/test_pandas_mol2.py
@@ -82,3 +82,17 @@ def overwrite():
     assert_raises(AttributeError,
                   expect,
                   overwrite)
+
+
+def test__get_atomsection_raises():
+    """Test if ValueError is raised if input list is not in the mol2 format."""
+
+    expect = ("Structural data could not be loaded. "
+              "Is the input file/text in the mol2 format?")
+
+    def run_code():
+        PandasMol2()._get_atomsection(["", ""])
+
+    assert_raises(ValueError,
+                  expect,
+                  run_code)
diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
@@ -245,12 +245,19 @@ def _init_get_dict():
     def _read_pdb(path):
         """Read PDB file from local drive."""
         r_mode = 'r'
-        openf = open
-        if path.endswith('.gz'):
+        if path.endswith('.pdb'):
+            openf = open
+        elif path.endswith('pdb.gz'):
             r_mode = 'rb'
             openf = gzip.open
+        else:
+            raise ValueError(
+                'Wrong file format; allowed file formats are .pdb and .pdb.gz.'
+            )
+
         with openf(path, r_mode) as f:
             txt = f.read()
+
         if path.endswith('.gz'):
             if sys.version_info[0] >= 3:
                 txt = txt.decode('utf-8')
@@ -370,6 +377,12 @@ def _construct_df(pdb_lines):
                     df[c['id']] = pd.Series(np.nan, index=df.index)
 
             dfs[r[0]] = df
+
+        # issue a warning if no atoms have been loaded
+        if len(dfs['ATOM']) == 0:
+            warnings.warn('No ATOM entries have been loaded. '
+                          'Is the input file/text in the pdb format?')
+
         return dfs
 
     def amino3to1(self, record='ATOM',

diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py
@@ -10,6 +10,7 @@
 import numpy as np
 import pandas as pd
 from nose.tools import raises
+from biopandas.testutils import assert_raises
 try:
     from urllib.request import urlopen
     from urllib.error import HTTPError, URLError
@@ -56,6 +57,28 @@ def test__read_pdb():
     assert txt == three_eiy
 
 
+def test__read_pdb_raises():
+    """Test private _read_pdb:
+    Test if ValueError is raised for wrong file formats."""
+
+    expect = ('Wrong file format; allowed file formats'
+              ' are .pdb and .pdb.gz.')
+
+    def run_code_1():
+        PandasPdb()._read_pdb("protein.mol2")
+
+    assert_raises(ValueError,
+                  expect,
+                  run_code_1)
+
+    def run_code_2():
+        PandasPdb()._read_pdb("protein.mol2.gz")
+
+    assert_raises(ValueError,
+                  expect,
+                  run_code_2)
+
+
 def test_fetch_pdb():
     """Test fetch_pdb"""
 

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -3,6 +3,27 @@
 The CHANGELOG for the current development version is available at
 [https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md).
 
+
+### 0.3.0 (TBA)
+
+##### Downloads
+
+- [Source code (zip)](https://github.com/rasbt/biopandas/archive/v0.3.0.zip)
+- [Source code (tar.gz)](https://github.com/rasbt/biopandas/archive/v0.3.0.tar.gz)
+
+##### New Features
+
+- A `PandasPdb.read_pdb_from_list` method was added analogous to the existing `PandasMol2.read_mol2_from_list` (via PR [72](https://github.com/rasbt/biopandas/pull/72/files) by [dominiquesydow](https://github.com/dominiquesydow))
+
+##### Changes
+
+- `ValueError` raising and improved file format error messages for `read_pdb` and `read_mol2` functionality. (via PR [73](https://github.com/rasbt/biopandas/pull/73/files) by [dominiquesydow](https://github.com/dominiquesydow))
+
+##### Bug Fixes
+
+- -
+
+
 ### 0.2.7 (08-04-2020)
 
 ##### Downloads