Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .pep8speaks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# File : .pep8speaks.yml

scanner:
diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
linter: flake8 # Other option is flake8

flake8: # Same as scanner.linter value. Other option is flake8
max-line-length: 88 # Default is 79 in PEP 8
ignore: # Errors and warnings to ignore
- W504 # line break after binary operator

no_blank_comment: False # If True, no comment is made on PR without any errors.
descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file

message: # Customize the comment made by the bot
opened: # Messages when a new PR is submitted
header: "Hello @{name}! Thanks for opening this PR. "
# The keyword {name} is converted into the author's username
footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)"
# The messages can be written as they would over GitHub
updated: # Messages when new commits are added to the PR
header: "Hello @{name}! Thanks for updating this PR. "
footer: "" # Why to comment the link to the style guide everytime? :)
no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: "
2 changes: 1 addition & 1 deletion biopandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#

__version__ = '0.3.0'
__version__ = "0.3.0"
__author__ = "Sebastian Raschka <mail@sebastianraschka.com>"
6 changes: 5 additions & 1 deletion biopandas/mmcif/mmcif_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,11 @@ def __dump_cat__(k, v):
pad = len(k2)
pad += 3
for k2 in v.keys():
output += "_%s.%s%s\n" % (k, __pad_string__(k2, pad), __dump_str__(v[k2][0]))
output += "_%s.%s%s\n" % (
k,
__pad_string__(k2, pad),
__dump_str__(v[k2][0]),
)
else:
output += "loop_\n"
pad = []
Expand Down
3 changes: 2 additions & 1 deletion biopandas/mmcif/tests/test_read_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ def test__read_pdb_raises():
Test if ValueError is raised for wrong file formats."""

expect = (
"Wrong file format; allowed file formats are " ".cif, .cif.gz, .mmcif, .mmcif.gz"
"Wrong file format; allowed file formats are "
".cif, .cif.gz, .mmcif, .mmcif.gz"
)

def run_code_1():
Expand Down
23 changes: 12 additions & 11 deletions biopandas/mol2/mol2_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,33 +27,34 @@ def split_multimol2(mol2_path):
from a gzip (.gz) file.

"""
if mol2_path.endswith('.mol2'):
if mol2_path.endswith(".mol2"):
open_file = open
read_mode = 'r'
elif mol2_path.endswith('mol2.gz'):
read_mode = "r"
elif mol2_path.endswith("mol2.gz"):
open_file = gzip.open
read_mode = 'rb'
read_mode = "rb"
else:
raise ValueError('Wrong file format;'
'allowed file formats are .mol2 and .mol2.gz.')
raise ValueError(
"Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."
)

check = {'rb': b'@<TRIPOS>MOLECULE', 'r': '@<TRIPOS>MOLECULE'}
check = {"rb": b"@<TRIPOS>MOLECULE", "r": "@<TRIPOS>MOLECULE"}

with open_file(mol2_path, read_mode) as f:
mol2 = ['', []]
mol2 = ["", []]
while True:
try:
line = next(f)
if line.startswith(check[read_mode]):
if mol2[0]:
yield(mol2)
mol2 = ['', []]
yield (mol2)
mol2 = ["", []]
mol2_id = next(f)
mol2[0] = mol2_id.rstrip()
mol2[1].append(line)
mol2[1].append(mol2_id)
else:
mol2[1].append(line)
except StopIteration:
yield(mol2)
yield (mol2)
return
82 changes: 43 additions & 39 deletions biopandas/mol2/pandas_mol2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@


COLUMN_NAMES = (
'atom_id',
'atom_name',
'x',
'y',
'z',
'atom_type',
'subst_id',
'subst_name',
'charge'
"atom_id",
"atom_name",
"x",
"y",
"z",
"atom_type",
"subst_id",
"subst_name",
"charge",
)

COLUMN_TYPES = (int, str, float, float, float, str, int, str, float)
Expand All @@ -44,12 +44,13 @@ class PandasMol2(object):
Location of the MOL2 file that was read in via `read_mol2`

"""

def __init__(self):
self._df = None
self.mol2_text = ''
self.header = ''
self.code = ''
self.mol2_path = ''
self.mol2_text = ""
self.header = ""
self.code = ""
self.mol2_path = ""

@property
def df(self):
Expand All @@ -59,9 +60,11 @@ def df(self):
@df.setter
def df(self, value):
"""Assign a new value to the pandas DataFrame"""
raise AttributeError('Please use `PandasMol2._df = ... ` instead\n'
'of `PandasMol2.df = ... ` if you are sure that\n'
'you want to overwrite the `df` attribute.')
raise AttributeError(
"Please use `PandasMol2._df = ... ` instead\n"
"of `PandasMol2.df = ... ` if you are sure that\n"
"you want to overwrite the `df` attribute."
)
# self._df = value

def _load_mol2(self, mol2_lines, mol2_code, columns):
Expand All @@ -76,11 +79,11 @@ def _load_mol2(self, mol2_lines, mol2_code, columns):
col_types.append(columns[i][1])

try:
self.mol2_text = ''.join(mol2_lines)
self.mol2_text = "".join(mol2_lines)
self.code = mol2_code
except TypeError:
mol2_lines = [m.decode() for m in mol2_lines]
self.mol2_text = ''.join(mol2_lines)
self.mol2_text = "".join(mol2_lines)
self.code = mol2_code.decode()

self._df = self._construct_df(mol2_lines, col_names, col_types)
Expand Down Expand Up @@ -163,9 +166,9 @@ def read_mol2_from_list(self, mol2_lines, mol2_code, columns=None):

def _construct_df(self, mol2_lines, col_names, col_types):
"""Construct DataFrames from list of PDB lines."""
return self._atomsection_to_pandas(self._get_atomsection(mol2_lines),
col_names=col_names,
col_types=col_types)
return self._atomsection_to_pandas(
self._get_atomsection(mol2_lines), col_names=col_names, col_types=col_types
)

@staticmethod
def _get_atomsection(mol2_lst):
Expand All @@ -174,26 +177,25 @@ def _get_atomsection(mol2_lst):
started = False
first_idx = None
for idx, s in enumerate(mol2_lst):
if s.startswith('@<TRIPOS>ATOM'):
if s.startswith("@<TRIPOS>ATOM"):
first_idx = idx + 1
started = True
elif started and s.startswith('@<TRIPOS>'):
elif started and s.startswith("@<TRIPOS>"):
last_idx_plus1 = idx
break
if first_idx is None:
# Raise error when file contains no @<TRIPOS>ATOM
# (i.e. file is no mol2 file)
raise ValueError(
"Structural data could not be loaded. "
"Is the input file/text in the mol2 format?"
)
"Structural data could not be loaded. "
"Is the input file/text in the mol2 format?"
)
return mol2_lst[first_idx:last_idx_plus1]

@staticmethod
def _atomsection_to_pandas(mol2_atom_lst, col_names, col_types):

df = pd.DataFrame([lst.split() for lst in mol2_atom_lst],
columns=col_names)
df = pd.DataFrame([lst.split() for lst in mol2_atom_lst], columns=col_names)

for i in range(df.shape[1]):
df[col_names[i]] = df[col_names[i]].astype(col_types[i])
Expand Down Expand Up @@ -222,18 +224,20 @@ def rmsd(df1, df2, heavy_only=True):

"""
if df1.shape[0] != df2.shape[0]:
raise AttributeError('DataFrames have unequal lengths')
raise AttributeError("DataFrames have unequal lengths")

if heavy_only:
d1 = df1[df1['atom_type'] != 'H']
d2 = df2[df2['atom_type'] != 'H']
d1 = df1[df1["atom_type"] != "H"]
d2 = df2[df2["atom_type"] != "H"]
else:
d1, d2 = df1, df2

total = ((d1['x'].values - d2['x'].values)**2 +
(d1['y'].values - d2['y'].values)**2 +
(d1['z'].values - d2['z'].values)**2)
rmsd = round((total.sum() / df1.shape[0])**0.5, 4)
total = (
(d1["x"].values - d2["x"].values) ** 2
+ (d1["y"].values - d2["y"].values) ** 2
+ (d1["z"].values - d2["z"].values) ** 2
)
rmsd = round((total.sum() / df1.shape[0]) ** 0.5, 4)
return rmsd

def distance(self, xyz=(0.00, 0.00, 0.00)):
Expand All @@ -252,8 +256,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00)):
distance between the atoms in the atom section and `xyz`.

"""
return np.sqrt(np.sum(self.df[['x', 'y', 'z']]
.subtract(xyz, axis=1)**2, axis=1))
return np.sqrt(
np.sum(self.df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1)
)

@staticmethod
def distance_df(df, xyz=(0.00, 0.00, 0.00)):
Expand All @@ -276,5 +281,4 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):

"""

return np.sqrt(np.sum(df[['x', 'y', 'z']]
.subtract(xyz, axis=1)**2, axis=1))
return np.sqrt(np.sum(df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1))
23 changes: 9 additions & 14 deletions biopandas/mol2/tests/test_mol2_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,25 @@

def test_split_multimol2():
all_mol2 = []
for i in split_multimol2(os.path.join(this_dir,
'data', '40_mol2_files.mol2')):
for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2")):
all_mol2.append(i[0])
assert(all_mol2[1] == 'ZINC04084113')
assert(len(all_mol2) == 40)
assert all_mol2[1] == "ZINC04084113"
assert len(all_mol2) == 40


def test_split_multimol2_wrong_format():

expect = ('Wrong file format;'
'allowed file formats are .mol2 and .mol2.gz.')
expect = "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."

def run_code():
next(split_multimol2('40_mol2_files.pdb'))
next(split_multimol2("40_mol2_files.pdb"))

assert_raises(ValueError,
expect,
run_code)
assert_raises(ValueError, expect, run_code)


def test_split_multimol2_gz():
all_mol2 = []
for i in split_multimol2(os.path.join(this_dir,
'data', '40_mol2_files.mol2.gz')):
for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2.gz")):
all_mol2.append(i[0])
assert(all_mol2[1].decode() == 'ZINC04084113')
assert(len(all_mol2) == 40)
assert all_mol2[1].decode() == "ZINC04084113"
assert len(all_mol2) == 40
Loading