Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion biopandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#

__version__ = '0.2.7'
__version__ = '0.3.0'
__author__ = "Sebastian Raschka <mail@sebastianraschka.com>"
13 changes: 9 additions & 4 deletions biopandas/mol2/mol2_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

def split_multimol2(mol2_path):
r"""
Splits a multi-mol2 file into individual Mol2 file contents.
Generator function that
splits a multi-mol2 file into individual Mol2 file contents.

Parameters
-----------
Expand All @@ -26,12 +27,16 @@ def split_multimol2(mol2_path):
from a gzip (.gz) file.

"""
if mol2_path.endswith('.gz'):
if mol2_path.endswith('.mol2'):
open_file = open
read_mode = 'r'
elif mol2_path.endswith('mol2.gz'):
open_file = gzip.open
read_mode = 'rb'
else:
open_file = open
read_mode = 'r'
raise ValueError('Wrong file format;'
'allowed file formats are .mol2 and .mol2.gz.')

check = {'rb': b'@<TRIPOS>MOLECULE', 'r': '@<TRIPOS>MOLECULE'}

with open_file(mol2_path, read_mode) as f:
Expand Down
11 changes: 10 additions & 1 deletion biopandas/mol2/pandas_mol2.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,24 @@ def _construct_df(self, mol2_lines, col_names, col_types):

@staticmethod
def _get_atomsection(mol2_lst):
"""Returns atom section from mol2 provided as list of strings"""
"""Returns atom section from mol2 provided as list of strings.
Raises ValueError if data is not provided in the mol2 format."""
started = False
first_idx = None
for idx, s in enumerate(mol2_lst):
if s.startswith('@<TRIPOS>ATOM'):
first_idx = idx + 1
started = True
elif started and s.startswith('@<TRIPOS>'):
last_idx_plus1 = idx
break
if first_idx is None:
# Raise error when file contains no @<TRIPOS>ATOM
# (i.e. file is no mol2 file)
raise ValueError(
"Structural data could not be loaded. "
"Is the input file/text in the mol2 format?"
)
return mol2_lst[first_idx:last_idx_plus1]

@staticmethod
Expand Down
14 changes: 14 additions & 0 deletions biopandas/mol2/tests/test_mol2_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import os
from biopandas.mol2.mol2_io import split_multimol2
from biopandas.testutils import assert_raises

this_dir = os.path.dirname(os.path.realpath(__file__))

Expand All @@ -19,6 +20,19 @@ def test_split_multimol2():
assert(len(all_mol2) == 40)


def test_split_multimol2_wrong_format():

expect = ('Wrong file format;'
'allowed file formats are .mol2 and .mol2.gz.')

def run_code():
next(split_multimol2('40_mol2_files.pdb'))

assert_raises(ValueError,
expect,
run_code)


def test_split_multimol2_gz():
all_mol2 = []
for i in split_multimol2(os.path.join(this_dir,
Expand Down
14 changes: 14 additions & 0 deletions biopandas/mol2/tests/test_pandas_mol2.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,17 @@ def overwrite():
assert_raises(AttributeError,
expect,
overwrite)


def test__get_atomsection_raises():
"""Test if ValueError is raised if input list is not in the mol2 format."""

expect = ("Structural data could not be loaded. "
"Is the input file/text in the mol2 format?")

def run_code():
PandasMol2()._get_atomsection(["", ""])

assert_raises(ValueError,
expect,
run_code)
17 changes: 15 additions & 2 deletions biopandas/pdb/pandas_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,19 @@ def _init_get_dict():
def _read_pdb(path):
"""Read PDB file from local drive."""
r_mode = 'r'
openf = open
if path.endswith('.gz'):
if path.endswith('.pdb'):
openf = open
elif path.endswith('pdb.gz'):
r_mode = 'rb'
openf = gzip.open
else:
raise ValueError(
'Wrong file format; allowed file formats are .pdb and .pdb.gz.'
)

with openf(path, r_mode) as f:
txt = f.read()

if path.endswith('.gz'):
if sys.version_info[0] >= 3:
txt = txt.decode('utf-8')
Expand Down Expand Up @@ -370,6 +377,12 @@ def _construct_df(pdb_lines):
df[c['id']] = pd.Series(np.nan, index=df.index)

dfs[r[0]] = df

# issue a warning if no atoms have been loaded
if len(dfs['ATOM']) == 0:
warnings.warn('No ATOM entries have been loaded. '
'Is the input file/text in the pdb format?')

return dfs

def amino3to1(self, record='ATOM',
Expand Down
23 changes: 23 additions & 0 deletions biopandas/pdb/tests/test_read_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
import pandas as pd
from nose.tools import raises
from biopandas.testutils import assert_raises
try:
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
Expand Down Expand Up @@ -56,6 +57,28 @@ def test__read_pdb():
assert txt == three_eiy


def test__read_pdb_raises():
"""Test private _read_pdb:
Test if ValueError is raised for wrong file formats."""

expect = ('Wrong file format; allowed file formats'
' are .pdb and .pdb.gz.')

def run_code_1():
PandasPdb()._read_pdb("protein.mol2")

assert_raises(ValueError,
expect,
run_code_1)

def run_code_2():
PandasPdb()._read_pdb("protein.mol2.gz")

assert_raises(ValueError,
expect,
run_code_2)


def test_fetch_pdb():
"""Test fetch_pdb"""

Expand Down
21 changes: 21 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,27 @@
The CHANGELOG for the current development version is available at
[https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md).


### 0.3.0 (TBA)

##### Downloads

- [Source code (zip)](https://github.com/rasbt/biopandas/archive/v0.3.0.zip)
- [Source code (tar.gz)](https://github.com/rasbt/biopandas/archive/v0.3.0.tar.gz)

##### New Features

- A `PandasPdb.read_pdb_from_list` method was added analogous to the existing `PandasMol2.read_mol2_from_list` (via PR [72](https://github.com/rasbt/biopandas/pull/72/files) by [dominiquesydow](https://github.com/dominiquesydow))

##### Changes

- `ValueError` raising and improved file format error messages for `read_pdb` and `read_mol2` functionality. (via PR [73](https://github.com/rasbt/biopandas/pull/73/files) by [dominiquesydow](https://github.com/dominiquesydow))

##### Bug Fixes

- -


### 0.2.7 (08-04-2020)

##### Downloads
Expand Down