.
The Open Protein Structure Annotation Network
PDB Keyword
.

Convert:PDB

    Table of contents
    No headers

     

    using System;
    using System.Xml;
    using System.Text;
    using System.Collections.Generic;
    using System.Net;
    using System.IO;
    
    
    namespace PDBConvert {
        public class PDBConvert :  ConvertInterface.ConvertPlugin { 
            
            static string rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
            static string pdbNS = "http://purl.uniprot.org/pdb/";
            static string coreNS = "http://purl.uniprot.org/core/";
            static string xsiNS = "http://www.w3.org/2001/XMLSchema-instance";
            static string skosNS = "http://www.w3.org/2004/02/skos/core#";
            static string rdfsNS = "http://www.w3.org/2000/01/rdf-schema#";
            static string owlNS = "http://www.w3.org/2002/07/owl#";
    
    
            public XmlDocument ConvertAlias(string alias) {
                XmlDocument pdbdoc = RetrievePDBXml(alias);
    
                string uppralias = alias.ToUpper();
                XmlDocument doc = new XmlDocument();
    
                XmlElement rdfElem = doc.CreateElement("rdf:RDF", rdfNS);
                rdfElem.SetAttribute("xmlns:PDB", pdbNS);
                rdfElem.SetAttribute("xmlns:core", coreNS);
                rdfElem.SetAttribute("xmlns:xsi", xsiNS);
                rdfElem.SetAttribute("xmlns:skos", skosNS);
                rdfElem.SetAttribute("xmlns:rdfs", rdfsNS);
                rdfElem.SetAttribute("xmlns:owl", owlNS);
                doc.AppendChild(rdfElem);
    
                //description element
                XmlElement descrElem = doc.CreateElement("rdf:Description", rdfNS);
                descrElem.SetAttribute("rdf:about", rdfNS, "http://purl.uniprot.org/pdb/" + uppralias);
    
                //type element
                XmlElement typestructElem = doc.CreateElement("rdf:type", rdfNS);
                typestructElem.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/core/Protein_Structure");
                descrElem.AppendChild(typestructElem);
    
                //type element
                XmlElement typeElem = doc.CreateElement("rdf:type", rdfNS);
                typeElem.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/core/Protein");
                descrElem.AppendChild(typeElem);
    
                //seealso pdb element
                XmlElement seeElem = doc.CreateElement("owl:sameAs", owlNS);
                seeElem.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/pdb/" + uppralias);
                descrElem.AppendChild(seeElem);
    
                //seealso ebi element - BAD LINK
                XmlElement seeebiElem = doc.CreateElement("rdf:seeAlso", rdfNS);
                seeebiElem.SetAttribute("rdf:resource", rdfNS, "http://purl.org/Proteins/EBI/EBI-4151");
                descrElem.AppendChild(seeebiElem);
    
                //citation element
                XmlElement citeElem = RetrieveCitations(doc, pdbdoc);
                descrElem.AppendChild(citeElem);
    
                //poly chains element
                Dictionary<string,string> chainslist = RetrievePolyChains(doc, pdbdoc);
                foreach (string id in chainslist.Keys)
                {
                    XmlElement chainelem = doc.CreateElement("core:component", coreNS);
                    chainelem.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/pdb/" + uppralias + "_" + id);
                    descrElem.AppendChild(chainelem);
                }
                rdfElem.AppendChild(descrElem);
    
                //create descriptions for each chain.
                foreach (string id in chainslist.Keys)
                {
                    XmlElement descrPolyElem = RetrieveChainsDetails(doc, pdbdoc, id, uppralias, chainslist[id]);          
    
                    rdfElem.AppendChild(descrPolyElem);
                }
                
    			List<XmlNode> goList= RetrieveGoTerms( doc, uppralias );
    			
    			foreach ( XmlNode node in goList ) {
    				rdfElem.AppendChild( node );	
    			}
    			
                //Console.WriteLine(doc.InnerXml);
                
                return doc;
            } 
           private XmlElement RetrieveChainsDetails(XmlDocument doc, XmlDocument pdbdoc, string id, string alias, string entity_id) {
                XmlElement descrPolyElem = doc.CreateElement("rdf:Description", rdfNS);
                descrPolyElem.SetAttribute("rdf:about", rdfNS, "http://purl.uniprot.org/pdb/" + alias + "_" + id);
    
                string seq = GetElement(pdbdoc, "PDBx:entity_polyCategory", "PDBx:entity_poly", entity_id, "PDBx:pdbx_seq_one_letter_code_can");
                XmlElement seqEle = doc.CreateElement("rdf:value", rdfNS);
                seqEle.InnerText = seq;
                descrPolyElem.AppendChild(seqEle);
                
                XmlElement typeEle = doc.CreateElement("rdf:type", rdfNS);
                typeEle.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/core/Part");
                descrPolyElem.AppendChild(typeEle);
    
                //get PDB type
                string type = GetElement(pdbdoc, "PDBx:entity_polyCategory", "PDBx:entity_poly", entity_id, "PDBx:type");
    			if ( type != null ) {
    	            XmlElement typepdbEle = doc.CreateElement("rdf:title", rdfNS);
        	        typepdbEle.InnerText = type;
            	    descrPolyElem.AppendChild(typepdbEle);
    			}
    			
                //get pdb common name
                string name = GetElement(pdbdoc, "PDBx:entity_src_natCategory", "PDBx:entity_src_nat", entity_id, "PDBx:common_name");
                if ( name != null ) {
    				XmlElement nameEle = doc.CreateElement("core:commonName", coreNS);
    	            nameEle.InnerText = name;
        	        descrPolyElem.AppendChild(nameEle);
    			}
    			
                //get pdb common name
                string details = GetElement(pdbdoc, "PDBx:entity_src_natCategory", "PDBx:entity_src_nat", entity_id, "PDBx:details");
                if ( details != null ) {
    				XmlElement detailEle = doc.CreateElement("core:detail", coreNS);
    	            detailEle.InnerText = details;
        	        descrPolyElem.AppendChild(detailEle);
    			}
    			
                string taxon = GetElement(pdbdoc, "PDBx:entity_src_natCategory", "PDBx:entity_src_nat", entity_id, "PDBx:pdbx_ncbi_taxonomy_id");
    			if ( taxon != null ) {
    	            XmlElement taxonEle = doc.CreateElement("core:taxon", coreNS);
        	        taxonEle.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/taxonomy/" + taxon);
            	    descrPolyElem.AppendChild(taxonEle);
    			}
    			
    			string dbname = GetElement( pdbdoc, "PDBx:struct_refCategory", "PDBx:struct_ref", entity_id, "PDBx:db_name" );
    			if ( dbname != null && dbname.CompareTo("UNP") == 0 ) {
    				string accession = GetElement( pdbdoc, "PDBx:struct_refCategory", "PDBx:struct_ref", entity_id, "PDBx:pdbx_db_accession" );
    				XmlElement uniprotEle = doc.CreateElement("owl:sameAs", owlNS);
        	        uniprotEle.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/uniprot/" + accession);
    				descrPolyElem.AppendChild(uniprotEle);
    			}
    			
                return descrPolyElem;
    
            }
            private Dictionary<string,string> RetrievePolyChains(XmlDocument doc, XmlDocument pdbdoc)
            {
    			XmlNameTable xmlNameTable = new NameTable();
    			XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlNameTable);
          		nsmgr.AddNamespace("PDBx", "http://pdbml.pdb.org/schema/pdbx-v32.xsd");
    			
    			XmlNodeList nodeList = pdbdoc.SelectNodes( "//PDBx:datablock/PDBx:entity_polyCategory/PDBx:entity_poly", nsmgr ); 
    			Dictionary<string,string> outMap = new Dictionary<string, string>();
    			foreach ( XmlNode node in nodeList ) {
    				string entity_id = node.Attributes["entity_id"].Value;
    				XmlNode strandNode = node.SelectSingleNode("PDBx:pdbx_strand_id", nsmgr);
    				//Console.Out.WriteLine( entity_id + " " + strandNode.InnerText );
    				foreach ( string chain in strandNode.InnerText.Split( new char[] {','} ) ) {
    					outMap[ chain ] = entity_id;
    				}				         
    			}
    			return outMap;
            }
            private XmlElement RetrieveCitations(XmlDocument doc, XmlDocument pdbdoc)
            {
                XmlElement citElement = doc.CreateElement("core:citation", coreNS);
                citElement.SetAttribute("rdf:parseType", rdfNS, "Resource");
    
                //Get the primary citation.
                XmlElement typeElement = doc.CreateElement("rdf:type", rdfNS);
                typeElement.SetAttribute("rdf:resource", rdfNS, "http://purl.uniprot.org/core/Journal_Citation");
                citElement.AppendChild(typeElement);
    
                string title = GetElement(pdbdoc, "PDBx:citationCategory", "PDBx:citation", "primary", "PDBx:title");
    			if ( title != null ) {
    	            XmlElement titleEle = doc.CreateElement("core:title", coreNS);
        	        titleEle.InnerText = title;
            	    citElement.AppendChild(titleEle);
    			}
    			
                string journal = GetElement(pdbdoc, "PDBx:citationCategory", "PDBx:citation", "primary", "PDBx:journal_abbrev");
           		if ( journal != null ) {
    				XmlElement journEle = doc.CreateElement("core:name", coreNS);
            	    journEle.InnerText = journal;
                	citElement.AppendChild(journEle);
    			}
    			
                string year = GetElement(pdbdoc, "PDBx:citationCategory", "PDBx:citation", "primary", "PDBx:year");
                if ( year != null ) {
    				XmlElement yearEle = doc.CreateElement("core:date", coreNS);
        	        yearEle.SetAttribute("rdf:datatype", rdfNS, "http://www.w3.org/2001/XMLSchema#gYear");
            	    yearEle.InnerText = year;
                	citElement.AppendChild(yearEle);
    			}
    			
                return citElement;
    
            }
            private string GetElement(XmlDocument pdbdoc, string parent, string child, string childattrconstraint, string childchild)
            {
                string toreturn = null;
                XmlNodeList nodelist = pdbdoc.GetElementsByTagName(parent);
                for (int i = 0; i < nodelist.Count; i++)
                {
                    XmlNode node = nodelist[i];
                    XmlNodeList childnodes = node.ChildNodes;
                    
                    for (int j = 0; j < childnodes.Count; j++)
                    {
                        if (childnodes[j].Name == child)
                        {
                            if (HasAttributes(childnodes[j].Attributes, childattrconstraint)) //returns true for no attributes to look for.
                            {
                                XmlNodeList childchildnodes = childnodes[j].ChildNodes;
                                for (int k = 0; k < childchildnodes.Count; k++)
                                {
                                    if (childchildnodes[k].Name == childchild)
                                    {
                                        toreturn = childchildnodes[k].InnerText;
                                    }
                                }
                            }
                        }
                    }
                }
                return toreturn;
            }
            private static bool HasAttributes(XmlAttributeCollection attributes, string attr)
            {
                if (attr == "")
                    return true; //no attribute constraints.
    
                bool hasattributes = false;
                for (int k = 0; k < attributes.Count; k++)
                {
                    if (attributes[k].Value == attr)
                    {
                        hasattributes = true;
                    }
                }
                return hasattributes;
            }
            public XmlDocument RetrievePDBXml(string alias) {
                 string uristr = "http://www.pdb.org/pdb/files/" + alias + ".xml?headerOnly=YES";
                Uri uri = new Uri(uristr);
                // create the initial request
                HttpWebRequest httpRequest = (HttpWebRequest)WebRequest.Create(uri);
    
                HttpWebResponse response = null;
                try {
                    response = (HttpWebResponse)httpRequest.GetResponse();
                } catch (WebException e) {
                    Console.WriteLine("WebException: " + e.Message);
                }
                string html = "";
                XmlDocument xdoc = new XmlDocument();
                // get the response stream.
                Stream responseStream = response.GetResponseStream();
                try {
                    // use a stream reader that understands UTF8
                    StreamReader reader = new StreamReader(responseStream, Encoding.UTF8);
                    try {
                        html = reader.ReadToEnd();
                    } finally {
                        // close the reader
                        reader.Close();
                    }
                } catch {
                    response.Close();
                }
                //Console.WriteLine(html);
                xdoc.LoadXml(html);
                return xdoc;
            }
    
    	
    		public List<XmlNode> RetrieveGoTerms( XmlDocument doc, string alias ) {
    			string uristr = "http://www.pdb.org/pdb/rest/goTerms?structureId=" + alias;
                Uri uri = new Uri(uristr);
                // create the initial request
                HttpWebRequest httpRequest = (HttpWebRequest)WebRequest.Create(uri);
                HttpWebResponse response = null;
                try {
                    response = (HttpWebResponse)httpRequest.GetResponse();
                } catch (WebException e) {
                    Console.WriteLine("WebException: " + e.Message);
                }
                string html = "";
                XmlDocument gdoc = new XmlDocument();
                // get the response stream.
                Stream responseStream = response.GetResponseStream();
                try {
                    // use a stream reader that understands UTF8
                    StreamReader reader = new StreamReader(responseStream, Encoding.UTF8);
                    try {
                        html = reader.ReadToEnd();
                    } finally {
                        // close the reader
                        reader.Close();
                    }
                } catch {
                    response.Close();
                }
                //Console.WriteLine(html);
                gdoc.LoadXml(html);
    				
    			List<XmlNode> outlist = new List<XmlNode>();
    			XmlNodeList terms = gdoc.SelectNodes("//goTerms/term");
    			foreach (XmlNode node in terms) {
    				string id = node.Attributes["chainId"].Value;
    				XmlElement goLinkElem = doc.CreateElement("rdf:Description", rdfNS);
                	goLinkElem.SetAttribute("rdf:about", rdfNS, "http://purl.uniprot.org/pdb/" + alias + "_" + id);
    				string go = node.Attributes["id"].Value;
    				XmlElement classifiedNode = doc.CreateElement( "core:classifiedWith", coreNS );
    				classifiedNode.SetAttribute( "rdf:resource", rdfNS, "http://purl.uniprot.org/go/" + go );
    				goLinkElem.AppendChild( classifiedNode );
    				outlist.Add( goLinkElem );
    			}
    			
    			return outlist;
    		}
    
    	
    	} 
    
    
    }
    

    Reviews

    References

     

    No references found.

    Tag page
    • No tags

    Files (0)

     
    You must login to post a comment.
    All content on this site is licensed under a Creative Commons Attribution 3.0 License
    Powered by MindTouch