Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 60 additions & 15 deletions biopandas/mmcif/pandas_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,15 @@

import gzip
import sys
from typing import Dict
import warnings
from distutils.version import LooseVersion
from typing import Dict, Optional
from urllib.error import HTTPError, URLError
from urllib.request import urlopen

import numpy as np
import pandas as pd

try:
from urllib.error import HTTPError, URLError
from urllib.request import urlopen
except ImportError:
raise ValueError("Python 2.7 is no longer supported")

import warnings
from distutils.version import LooseVersion

from ..pdb.engines import amino3to1dict
from .engines import ANISOU_DF_COLUMNS, mmcif_col_types
from .mmcif_parser import load_cif_data
Expand Down Expand Up @@ -72,20 +67,51 @@ def read_mmcif(self, path):
self.code = self.data["entry"]["id"][0].lower()
return self

def fetch_mmcif(self, pdb_code: str):
"""Fetches mmCIF file contents from the Protein Databank at rcsb.org.
def fetch_mmcif(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"):
"""Fetches mmCIF file contents from the Protein Databank at rcsb.org or AlphaFold database at https://alphafold.ebi.ac.uk/.
.

Parameters
----------
pdb_code : str
A 4-letter PDB code, e.g., "3eiy".
pdb_code : str, optional
A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.

uniprot_id : str, optional
A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.

source : str
The source to retrieve the structure from (`"pdb"`, `"alphafold2-v1"` or `"alphafold2-v2"`). Defaults to `"pdb"`.

Returns
---------
self

"""
self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code)
# Sanitize input
invalid_input_identifier_1 = pdb_code is None and uniprot_id is None
invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None
invalid_input_combination_1 = uniprot_id is not None and source == "pdb"
invalid_input_combination_2 = pdb_code is not None and source in {"alphafold2-v1", "alphafold2-v2"}

if invalid_input_identifier_1 or invalid_input_identifier_2:
raise ValueError("Please provide either a PDB code or a UniProt ID.")

if invalid_input_combination_1 :
raise ValueError("Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.")
elif invalid_input_combination_2 :
raise ValueError(f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.")

if source == "pdb":
self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code)
elif source == "alphafold2-v1":
af2_version = 1
self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
elif source == "alphafold2-v2":
af2_version = 2
self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
else:
raise ValueError(f"Invalid source: {source}. Please use one of 'pdb', 'alphafold2-v1' or 'alphafold-v2'.")

self._df = self._construct_df(text=self.mmcif_text)
return self

Expand Down Expand Up @@ -121,6 +147,25 @@ def _fetch_mmcif(pdb_code):
print(f"URL Error {e.args}")
return url, txt

@staticmethod
def _fetch_af2(uniprot_id: str, af2_version: int = 2):
"""Load MMCIF file from https://alphafold.ebi.ac.uk/."""
txt = None
url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id.upper()}-F1-model_v{af2_version}.cif"

try:
response = urlopen(url)
txt = response.read()
if sys.version_info[0] >= 3:
txt = txt.decode('utf-8')
else:
txt = txt.encode('ascii')
except HTTPError as e:
print('HTTP Error %s' % e.code)
except URLError as e:
print('URL Error %s' % e.args)
return url, txt

@staticmethod
def _read_mmcif(path):
"""Read MMCIF file from local drive."""
Expand Down
Empty file.
Loading