D Ban, C A Smith, B L de Groot, C Griesinger, D Lee Recent advances in measuring the kinetics of biomolecules by NMR relaxation dispersion spectroscopy Journal Article Arch Biochem Biophys, 628 , pp. 81-91, 2017. @article{Ban:2017:Arch-Biochem-Biophys:28576576, title = {Recent advances in measuring the kinetics of biomolecules by NMR relaxation dispersion spectroscopy}, author = {D Ban and C A Smith and B L de Groot and C Griesinger and D Lee}, doi = {10.1016/j.abb.2017.05.016}, year = {2017}, date = {2017-01-01}, journal = {Arch Biochem Biophys}, volume = {628}, pages = {81-91}, abstract = {Protein function can be modulated or dictated by the amplitude and timescale of biomolecular motion, therefore it is imperative to study protein dynamics. Nuclear Magnetic Resonance (NMR) spectroscopy is a powerful technique capable of studying timescales of motion that range from those faster than molecular reorientation on the picosecond timescale to those that occur in real-time. Across this entire regime, NMR observables can report on the amplitude of atomic motion, and the kinetics of atomic motion can be ascertained with a wide variety of experimental techniques from real-time to milliseconds and several nanoseconds to picoseconds. Still a four orders of magnitude window between several nanoseconds and tens of microseconds has remained elusive. Here, we highlight new relaxation dispersion NMR techniques that serve to cover this "hidden-time" window up to hundreds of nanoseconds that achieve atomic resolution while studying the molecule under physiological conditions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Protein function can be modulated or dictated by the amplitude and timescale of biomolecular motion, therefore it is imperative to study protein dynamics. Nuclear Magnetic Resonance (NMR) spectroscopy is a powerful technique capable of studying timescales of motion that range from those faster than molecular reorientation on the picosecond timescale to those that occur in real-time. Across this entire regime, NMR observables can report on the amplitude of atomic motion, and the kinetics of atomic motion can be ascertained with a wide variety of experimental techniques from real-time to milliseconds and several nanoseconds to picoseconds. Still a four orders of magnitude window between several nanoseconds and tens of microseconds has remained elusive. Here, we highlight new relaxation dispersion NMR techniques that serve to cover this "hidden-time" window up to hundreds of nanoseconds that achieve atomic resolution while studying the molecule under physiological conditions. |
C A Smith, D Ban, S Pratihar, K Giller, M Paulat, S Becker, C Griesinger, D Lee, B L de Groot Allosteric switch regulates protein-protein binding through collective motion Journal Article Proc Natl Acad Sci U S A, 113 (12), pp. 3269-3274, 2016. @article{Smith:2016:Proc-Natl-Acad-Sci-U-S-A:26961002, title = {Allosteric switch regulates protein-protein binding through collective motion}, author = {C A Smith and D Ban and S Pratihar and K Giller and M Paulat and S Becker and C Griesinger and D Lee and B L de Groot}, doi = {10.1073/pnas.1519609113}, year = {2016}, date = {2016-03-01}, journal = {Proc Natl Acad Sci U S A}, volume = {113}, number = {12}, pages = {3269-3274}, abstract = {Many biological processes depend on allosteric communication between different parts of a protein, but the role of internal protein motion in propagating signals through the structure remains largely unknown. Through an experimental and computational analysis of the ground state dynamics in ubiquitin, we identify a collective global motion that is specifically linked to a conformational switch distant from the binding interface. This allosteric coupling is also present in crystal structures and is found to facilitate multispecificity, particularly binding to the ubiquitin-specific protease (USP) family of deubiquitinases. The collective motion that enables this allosteric communication does not affect binding through localized changes but, instead, depends on expansion and contraction of the entire protein domain. The characterization of these collective motions represents a promising avenue for finding and manipulating allosteric networks.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Many biological processes depend on allosteric communication between different parts of a protein, but the role of internal protein motion in propagating signals through the structure remains largely unknown. Through an experimental and computational analysis of the ground state dynamics in ubiquitin, we identify a collective global motion that is specifically linked to a conformational switch distant from the binding interface. This allosteric coupling is also present in crystal structures and is found to facilitate multispecificity, particularly binding to the ubiquitin-specific protease (USP) family of deubiquitinases. The collective motion that enables this allosteric communication does not affect binding through localized changes but, instead, depends on expansion and contraction of the entire protein domain. The characterization of these collective motions represents a promising avenue for finding and manipulating allosteric networks. |
C A Smith, D Ban, S Pratihar, K Giller, C Schwiegk, B L de Groot, S Becker, C Griesinger, D Lee Population Shuffling of Protein Conformations Journal Article Angew Chem Int Ed Engl, 54 (1), pp. 207-10, 2015. @article{Smith:2014:Angew-Chem-Int-Ed-Engl:25377083, title = {Population Shuffling of Protein Conformations}, author = {C A Smith and D Ban and S Pratihar and K Giller and C Schwiegk and B L de Groot and S Becker and C Griesinger and D Lee}, doi = {10.1002/anie.201408890}, year = {2015}, date = {2015-01-01}, journal = {Angew Chem Int Ed Engl}, volume = {54}, number = {1}, pages = {207-10}, abstract = {Motions play a vital role in the functions of many proteins. Discrete conformational transitions to excited states, happening on timescales of hundreds of microseconds, have been extensively characterized. On the other hand, the dynamics of the ground state are widely unexplored. Newly developed high-power relaxation dispersion experiments allow the detection of motions up to a one-digit microsecond timescale. These experiments showed that side chains in the hydrophobic core as well as at protein-protein interaction surfaces of both ubiquitin and the third immunoglobulin binding domain of protein G move on the microsecond timescale. Both proteins exhibit plasticity to this microsecond motion through redistribution of the populations of their side-chain rotamers, which interconvert on the picosecond to nanosecond timescale, making it likely that this "population shuffling" process is a general mechanism.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motions play a vital role in the functions of many proteins. Discrete conformational transitions to excited states, happening on timescales of hundreds of microseconds, have been extensively characterized. On the other hand, the dynamics of the ground state are widely unexplored. Newly developed high-power relaxation dispersion experiments allow the detection of motions up to a one-digit microsecond timescale. These experiments showed that side chains in the hydrophobic core as well as at protein-protein interaction surfaces of both ubiquitin and the third immunoglobulin binding domain of protein G move on the microsecond timescale. Both proteins exhibit plasticity to this microsecond motion through redistribution of the populations of their side-chain rotamers, which interconvert on the picosecond to nanosecond timescale, making it likely that this "population shuffling" process is a general mechanism. |
T M Sabo, C A Smith, D Ban, A Mazur, D Lee, C Griesinger ORIUM: optimized RDC-based Iterative and Unified Model-free analysis Journal Article J Biomol NMR, 58 (4), pp. 287-301, 2014. @article{Sabo:2014:J-Biomol-NMR:24013952, title = {ORIUM: optimized RDC-based Iterative and Unified Model-free analysis}, author = {T M Sabo and C A Smith and D Ban and A Mazur and D Lee and C Griesinger}, doi = {10.1007/s10858-013-9775-1}, year = {2014}, date = {2014-04-01}, journal = {J Biomol NMR}, volume = {58}, number = {4}, pages = {287-301}, abstract = {Residual dipolar couplings (RDCs) are NMR parameters that provide both structural and dynamic information concerning inter-nuclear vectors, such as N-H(N) and Cα-Hα bonds within the protein backbone. Two approaches for extracting this information from RDCs are the model free analysis (MFA) (Meiler et al. in J Am Chem Soc 123:6098-6107, 2001; Peti et al. in J Am Chem Soc 124:5822-5833, 2002) and the direct interpretation of dipolar couplings (DIDCs) (Tolman in J Am Chem Soc 124:12020-12030, 2002). Both methods have been incorporated into iterative schemes, namely the self-consistent RDC based MFA (SCRM) (Lakomek et al. in J Biomol NMR 41:139-155, 2008) and iterative DIDC (Yao et al. in J Phys Chem B 112:6045-6056, 2008), with the goal of removing the influence of structural noise in the MFA and DIDC formulations. Here, we report a new iterative procedure entitled Optimized RDC-based Iterative and Unified Model-free analysis (ORIUM). ORIUM unifies theoretical concepts developed in the MFA, SCRM, and DIDC methods to construct a computationally less demanding approach to determine these structural and dynamic parameters. In all schemes, dynamic averaging reduces the actual magnitude of the alignment tensors complicating the determination of the absolute values for the generalized order parameters. To readdress this scaling issue that has been previously investigated (Lakomek et al. in J Biomol NMR 41:139-155, 2008; Salmon et al. in Angew Chem Int Edit 48:4154-4157, 2009), a new method is presented using only RDC data to establish a lower bound on protein motion, bypassing the requirement of Lipari-Szabo order parameters. ORIUM and the new scaling procedure are applied to the proteins ubiquitin and the third immunoglobulin domain of protein G (GB3). Our results indicate good agreement with the SCRM and iterative DIDC approaches and signify the general applicability of ORIUM and the proposed scaling for the extraction of inter-nuclear vector structural and dynamic content.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Residual dipolar couplings (RDCs) are NMR parameters that provide both structural and dynamic information concerning inter-nuclear vectors, such as N-H(N) and Cα-Hα bonds within the protein backbone. Two approaches for extracting this information from RDCs are the model free analysis (MFA) (Meiler et al. in J Am Chem Soc 123:6098-6107, 2001; Peti et al. in J Am Chem Soc 124:5822-5833, 2002) and the direct interpretation of dipolar couplings (DIDCs) (Tolman in J Am Chem Soc 124:12020-12030, 2002). Both methods have been incorporated into iterative schemes, namely the self-consistent RDC based MFA (SCRM) (Lakomek et al. in J Biomol NMR 41:139-155, 2008) and iterative DIDC (Yao et al. in J Phys Chem B 112:6045-6056, 2008), with the goal of removing the influence of structural noise in the MFA and DIDC formulations. Here, we report a new iterative procedure entitled Optimized RDC-based Iterative and Unified Model-free analysis (ORIUM). ORIUM unifies theoretical concepts developed in the MFA, SCRM, and DIDC methods to construct a computationally less demanding approach to determine these structural and dynamic parameters. In all schemes, dynamic averaging reduces the actual magnitude of the alignment tensors complicating the determination of the absolute values for the generalized order parameters. To readdress this scaling issue that has been previously investigated (Lakomek et al. in J Biomol NMR 41:139-155, 2008; Salmon et al. in Angew Chem Int Edit 48:4154-4157, 2009), a new method is presented using only RDC data to establish a lower bound on protein motion, bypassing the requirement of Lipari-Szabo order parameters. ORIUM and the new scaling procedure are applied to the proteins ubiquitin and the third immunoglobulin domain of protein G (GB3). Our results indicate good agreement with the SCRM and iterative DIDC approaches and signify the general applicability of ORIUM and the proposed scaling for the extraction of inter-nuclear vector structural and dynamic content. |
N Ollikainen, C A Smith, J S Fraser, T Kortemme Flexible backbone sampling methods to model and design protein alternative conformations Journal Article Methods Enzymol, 523 , pp. 61-85, 2013. @article{Ollikainen:2013:Methods-Enzymol:23422426, title = {Flexible backbone sampling methods to model and design protein alternative conformations}, author = {N Ollikainen and C A Smith and J S Fraser and T Kortemme}, doi = {10.1016/B978-0-12-394292-0.00004-7}, year = {2013}, date = {2013-01-01}, journal = {Methods Enzymol}, volume = {523}, pages = {61-85}, abstract = {Sampling alternative conformations is key to understanding how proteins work and engineering them for new functions. However, accurately characterizing and modeling protein conformational ensembles remain experimentally and computationally challenging. These challenges must be met before protein conformational heterogeneity can be exploited in protein engineering and design. Here, as a stepping stone, we describe methods to detect alternative conformations in proteins and strategies to model these near-native conformational changes based on backrub-type Monte Carlo moves in Rosetta. We illustrate how Rosetta simulations that apply backrub moves improve modeling of point mutant side-chain conformations, native side-chain conformational heterogeneity, functional conformational changes, tolerated sequence space, protein interaction specificity, and amino acid covariation across protein-protein interfaces. We include relevant Rosetta command lines and RosettaScripts to encourage the application of these types of simulations to other systems. Our work highlights that critical scoring and sampling improvements will be necessary to approximate conformational landscapes. Challenges for the future development of these methods include modeling conformational changes that propagate away from designed mutation sites and modulating backbone flexibility to predictively design functionally important conformational heterogeneity.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Sampling alternative conformations is key to understanding how proteins work and engineering them for new functions. However, accurately characterizing and modeling protein conformational ensembles remain experimentally and computationally challenging. These challenges must be met before protein conformational heterogeneity can be exploited in protein engineering and design. Here, as a stepping stone, we describe methods to detect alternative conformations in proteins and strategies to model these near-native conformational changes based on backrub-type Monte Carlo moves in Rosetta. We illustrate how Rosetta simulations that apply backrub moves improve modeling of point mutant side-chain conformations, native side-chain conformational heterogeneity, functional conformational changes, tolerated sequence space, protein interaction specificity, and amino acid covariation across protein-protein interfaces. We include relevant Rosetta command lines and RosettaScripts to encourage the application of these types of simulations to other systems. Our work highlights that critical scoring and sampling improvements will be necessary to approximate conformational landscapes. Challenges for the future development of these methods include modeling conformational changes that propagate away from designed mutation sites and modulating backbone flexibility to predictively design functionally important conformational heterogeneity. |
C A Smith, C A Shi, M K Chroust, T E Bliska, M J Kelly, M P Jacobson, T Kortemme Design of a phosphorylatable PDZ domain with peptide-specific affinity changes Journal Article Structure, 21 (1), pp. 54-64, 2013. @article{Smith:2013:Structure:23159126, title = {Design of a phosphorylatable PDZ domain with peptide-specific affinity changes}, author = {C A Smith and C A Shi and M K Chroust and T E Bliska and M J Kelly and M P Jacobson and T Kortemme}, doi = {10.1016/j.str.2012.10.007}, year = {2013}, date = {2013-01-01}, journal = {Structure}, volume = {21}, number = {1}, pages = {54-64}, abstract = {Phosphorylation is one of the most common posttranslational modifications controlling cellular protein activity. Here, we describe a combined computational and experimental strategy to design new phosphorylation sites into globular proteins to regulate their functions. We target a peptide recognition protein, the Erbin PDZ domain, to be phosphorylated by cAMP-dependent protein kinase. Comparing the five successful designs to the unsuccessful cases, we find a trade-off between protein stability and the ability to be modified by phosphorylation. In two designs, Erbin's peptide binding function is modified by phosphorylation, where the presence of the phosphate group destabilizes peptide binding. One of these showed an additional switch in specificity by†introducing favorable interactions between†a designed arginine in the peptide and phosphoserine on the PDZ domain. Because of the diversity of PDZ domains, this opens avenues for the design of related phosphoswitchable domains to create†a repertoire of regulatable interaction parts for synthetic biology.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Phosphorylation is one of the most common posttranslational modifications controlling cellular protein activity. Here, we describe a combined computational and experimental strategy to design new phosphorylation sites into globular proteins to regulate their functions. We target a peptide recognition protein, the Erbin PDZ domain, to be phosphorylated by cAMP-dependent protein kinase. Comparing the five successful designs to the unsuccessful cases, we find a trade-off between protein stability and the ability to be modified by phosphorylation. In two designs, Erbin's peptide binding function is modified by phosphorylation, where the presence of the phosphate group destabilizes peptide binding. One of these showed an additional switch in specificity by†introducing favorable interactions between†a designed arginine in the peptide and phosphoserine on the PDZ domain. Because of the diversity of PDZ domains, this opens avenues for the design of related phosphoswitchable domains to create†a repertoire of regulatable interaction parts for synthetic biology. |
Andrew Leaver-Fay, Michael Tyka, Steven M Lewis, Oliver F Lange, James Thompson, Ron Jacak, Kristian W Kaufmann, Douglas P Renfrew, Colin A Smith, Will Sheffler, Ian W Davis, Seth Cooper, Adrien Treuille, Daniel J Mandell, Florian Richter, Yih-En Andrew Ban, Sarel J Fleishman, Jacob E Corn, David E Kim, Sergey Lyskov, Monica Berrondo, Stuart Mentzer, Zoran Popovic, James J Havranek, John Karanicolas, Rhiju Das, Jens Meiler, Tanja Kortemme, Jeffrey J Gray, Brian Kuhlman, David Baker, Philip Bradley ROSETTA3: An Object-Oriented Software Suite for the Simulation and Design of Macromolecules Journal Article Methods Enzymol, 487 , pp. 545-574, 2011. @article{Leaver-Fay:2011, title = {ROSETTA3: An Object-Oriented Software Suite for the Simulation and Design of Macromolecules}, author = {Andrew Leaver-Fay and Michael Tyka and Steven M Lewis and Oliver F Lange and James Thompson and Ron Jacak and Kristian W Kaufmann and Douglas P Renfrew and Colin A Smith and Will Sheffler and Ian W Davis and Seth Cooper and Adrien Treuille and Daniel J Mandell and Florian Richter and Yih-En Andrew Ban and Sarel J Fleishman and Jacob E Corn and David E Kim and Sergey Lyskov and Monica Berrondo and Stuart Mentzer and Zoran Popovic and James J Havranek and John Karanicolas and Rhiju Das and Jens Meiler and Tanja Kortemme and Jeffrey J Gray and Brian Kuhlman and David Baker and Philip Bradley}, doi = {10.1016/B978-0-12-381270-4.00019-6}, year = {2011}, date = {2011-01-01}, journal = {Methods Enzymol}, volume = {487}, pages = {545-574}, abstract = {We have recently completed a full rearchitecturing of the ROSETTA molecular modeling program, generalizing and expanding its existing functionality. The new architecture enables the rapid prototyping of novel protocols by providing easy-to-use interfaces to powerful tools for molecular modeling. The source code of this rearchitecturing has been released as ROSETTA3 and is freely available for academic use. At the time of its release, it contained 470,000 lines of code. Counting currently unpublished protocols at the time of this writing, the source includes 1,285,000 lines. Its rapid growth is a testament to its ease of use. This chapter describes the requirements for our new architecture, justifies the design decisions, sketches out central classes, and highlights a few of the common tasks that the new software can perform.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We have recently completed a full rearchitecturing of the ROSETTA molecular modeling program, generalizing and expanding its existing functionality. The new architecture enables the rapid prototyping of novel protocols by providing easy-to-use interfaces to powerful tools for molecular modeling. The source code of this rearchitecturing has been released as ROSETTA3 and is freely available for academic use. At the time of its release, it contained 470,000 lines of code. Counting currently unpublished protocols at the time of this writing, the source includes 1,285,000 lines. Its rapid growth is a testament to its ease of use. This chapter describes the requirements for our new architecture, justifies the design decisions, sketches out central classes, and highlights a few of the common tasks that the new software can perform. |
C A Smith, T Kortemme Predicting the Tolerated Sequences for Proteins and Protein Interfaces Using RosettaBackrub Flexible Backbone Design Journal Article PLoS One, 6 (7), 2011. @article{Smith:2011:PLoS-One:21789164, title = {Predicting the Tolerated Sequences for Proteins and Protein Interfaces Using RosettaBackrub Flexible Backbone Design}, author = {C A Smith and T Kortemme}, doi = {10.1371/journal.pone.0020451}, year = {2011}, date = {2011-01-01}, journal = {PLoS One}, volume = {6}, number = {7}, abstract = {Predicting the set of sequences that are tolerated by a protein or protein interface, while maintaining a desired function, is useful for characterizing protein interaction specificity and for computationally designing sequence libraries to engineer proteins with new functions. Here we provide a general method, a detailed set of protocols, and several benchmarks and analyses for estimating tolerated sequences using flexible backbone protein design implemented in the Rosetta molecular modeling software suite. The input to the method is at least one experimentally determined three-dimensional protein structure or high-quality model. The starting structure(s) are expanded or refined into a conformational ensemble using Monte Carlo simulations consisting of backrub backbone and side chain moves in Rosetta. The method then uses a combination of simulated annealing and genetic algorithm optimization methods to enrich for low-energy sequences for the individual members of the ensemble. To emphasize certain functional requirements (e.g. forming a binding interface), interactions between and within parts of the structure (e.g. domains) can be reweighted in the scoring function. Results from each backbone structure are merged together to create a single estimate for the tolerated sequence space. We provide an extensive description of the protocol and its parameters, all source code, example analysis scripts and three tests applying this method to finding sequences predicted to stabilize proteins or protein interfaces. The generality of this method makes many other applications possible, for example stabilizing interactions with small molecules, DNA, or RNA. Through the use of within-domain reweighting and/or multistate design, it may also be possible to use this method to find sequences that stabilize particular protein conformations or binding interactions over others.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Predicting the set of sequences that are tolerated by a protein or protein interface, while maintaining a desired function, is useful for characterizing protein interaction specificity and for computationally designing sequence libraries to engineer proteins with new functions. Here we provide a general method, a detailed set of protocols, and several benchmarks and analyses for estimating tolerated sequences using flexible backbone protein design implemented in the Rosetta molecular modeling software suite. The input to the method is at least one experimentally determined three-dimensional protein structure or high-quality model. The starting structure(s) are expanded or refined into a conformational ensemble using Monte Carlo simulations consisting of backrub backbone and side chain moves in Rosetta. The method then uses a combination of simulated annealing and genetic algorithm optimization methods to enrich for low-energy sequences for the individual members of the ensemble. To emphasize certain functional requirements (e.g. forming a binding interface), interactions between and within parts of the structure (e.g. domains) can be reweighted in the scoring function. Results from each backbone structure are merged together to create a single estimate for the tolerated sequence space. We provide an extensive description of the protocol and its parameters, all source code, example analysis scripts and three tests applying this method to finding sequences predicted to stabilize proteins or protein interfaces. The generality of this method makes many other applications possible, for example stabilizing interactions with small molecules, DNA, or RNA. Through the use of within-domain reweighting and/or multistate design, it may also be possible to use this method to find sequences that stabilize particular protein conformations or binding interactions over others. |
C A Smith, T Kortemme Structure-Based Prediction of the Peptide Sequence Space Recognized by Natural and Synthetic PDZ Domains Journal Article J Mol Biol, 402 (2), pp. 460-474, 2010. @article{Smith:2010:J-Mol-Biol:20654621, title = {Structure-Based Prediction of the Peptide Sequence Space Recognized by Natural and Synthetic PDZ Domains}, author = {C A Smith and T Kortemme}, doi = {10.1016/j.jmb.2010.07.032}, year = {2010}, date = {2010-09-01}, journal = {J Mol Biol}, volume = {402}, number = {2}, pages = {460-474}, abstract = {Protein-protein recognition, frequently mediated by members of large families of interaction domains, is one of the cornerstones of biological function. Here, we present a computational, structure-based method to predict the sequence space of peptides recognized by PDZ domains, one of the largest families of recognition proteins. As a test set, we use the considerable amount of recent phage display data that describe the peptide recognition preferences for 169 naturally occurring and engineered PDZ domains. For both wild-type PDZ domains and single point mutants, we find that 70-80% of the most frequently observed amino acids by phage display are predicted within the top five ranked amino acids. Phage display frequently identified recognition preferences for amino acids different from those present in the original crystal structure. Notably, in about half of these cases, our algorithm correctly captures these preferences, indicating that it can predict mutations that increase binding affinity relative to the starting structure. We also find that we can computationally recapitulate specificity changes upon mutation, a key test for successful forward design of protein-protein interface specificity. Across all evaluated data sets, we find that incorporation backbone sampling improves accuracy substantially, irrespective of using a crystal or NMR structure as the starting conformation. Finally, we report successful prediction of several amino acid specificity changes from blind tests in the DREAM4 peptide recognition domain specificity prediction challenge. Because the foundational methods developed here are structure based, these results suggest that they can be more generally applied to specificity prediction and redesign of other protein-protein interfaces that have structural information but lack phage display data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Protein-protein recognition, frequently mediated by members of large families of interaction domains, is one of the cornerstones of biological function. Here, we present a computational, structure-based method to predict the sequence space of peptides recognized by PDZ domains, one of the largest families of recognition proteins. As a test set, we use the considerable amount of recent phage display data that describe the peptide recognition preferences for 169 naturally occurring and engineered PDZ domains. For both wild-type PDZ domains and single point mutants, we find that 70-80% of the most frequently observed amino acids by phage display are predicted within the top five ranked amino acids. Phage display frequently identified recognition preferences for amino acids different from those present in the original crystal structure. Notably, in about half of these cases, our algorithm correctly captures these preferences, indicating that it can predict mutations that increase binding affinity relative to the starting structure. We also find that we can computationally recapitulate specificity changes upon mutation, a key test for successful forward design of protein-protein interface specificity. Across all evaluated data sets, we find that incorporation backbone sampling improves accuracy substantially, irrespective of using a crystal or NMR structure as the starting conformation. Finally, we report successful prediction of several amino acid specificity changes from blind tests in the DREAM4 peptide recognition domain specificity prediction challenge. Because the foundational methods developed here are structure based, these results suggest that they can be more generally applied to specificity prediction and redesign of other protein-protein interfaces that have structural information but lack phage display data. |
F Lauck, C A Smith, G F Friedland, E L Humphris, T Kortemme RosettaBackrub--a web server for flexible backbone protein structure modeling and design Journal Article Nucleic Acids Res, 38 Suppl , pp. 569-575, 2010. @article{Lauck:2010:Nucleic-Acids-Res:20462859, title = {RosettaBackrub--a web server for flexible backbone protein structure modeling and design}, author = {F Lauck and C A Smith and G F Friedland and E L Humphris and T Kortemme}, doi = {10.1093/nar/gkq369}, year = {2010}, date = {2010-07-01}, journal = {Nucleic Acids Res}, volume = {38 Suppl}, pages = {569-575}, abstract = {The RosettaBackrub server (http://kortemmelab.ucsf.edu/backrub) implements the Backrub method, derived from observations of alternative conformations in high-resolution protein crystal structures, for flexible backbone protein modeling. Backrub modeling is applied to three related applications using the Rosetta program for structure prediction and design: (I) modeling of structures of point mutations, (II) generating protein conformational ensembles and designing sequences consistent with these conformations and (III) predicting tolerated sequences at protein-protein interfaces. The three protocols have been validated on experimental data. Starting from a user-provided single input protein structure in PDB format, the server generates near-native conformational ensembles. The predicted conformations and sequences can be used for different applications, such as to guide mutagenesis experiments, for ensemble-docking approaches or to generate sequence libraries for protein design.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The RosettaBackrub server (http://kortemmelab.ucsf.edu/backrub) implements the Backrub method, derived from observations of alternative conformations in high-resolution protein crystal structures, for flexible backbone protein modeling. Backrub modeling is applied to three related applications using the Rosetta program for structure prediction and design: (I) modeling of structures of point mutations, (II) generating protein conformational ensembles and designing sequences consistent with these conformations and (III) predicting tolerated sequences at protein-protein interfaces. The three protocols have been validated on experimental data. Starting from a user-provided single input protein structure in PDB format, the server generates near-native conformational ensembles. The predicted conformations and sequences can be used for different applications, such as to guide mutagenesis experiments, for ensemble-docking approaches or to generate sequence libraries for protein design. |
G D Friedland, A J Linares, C A Smith, T Kortemme A simple model of backbone flexibility improves modeling of side-chain conformational variability Journal Article J Mol Biol, 380 (4), pp. 757-774, 2008. @article{Friedland:2008:J-Mol-Biol:18547586, title = {A simple model of backbone flexibility improves modeling of side-chain conformational variability}, author = {G D Friedland and A J Linares and C A Smith and T Kortemme}, doi = {10.1016/j.jmb.2008.05.006}, year = {2008}, date = {2008-07-01}, journal = {J Mol Biol}, volume = {380}, number = {4}, pages = {757-774}, abstract = {The considerable flexibility of side-chains in folded proteins is important for protein stability and function, and may have a role in mediating allosteric interactions. While sampling side-chain degrees of freedom has been an integral part of several successful computational protein design methods, the predictions of these approaches have not been directly compared to experimental measurements of side-chain motional amplitudes. In addition, protein design methods frequently keep the backbone fixed, an approximation that may substantially limit the ability to accurately model side-chain flexibility. Here, we describe a Monte Carlo approach to modeling side-chain conformational variability and validate our method against a large dataset of methyl relaxation order parameters derived from nuclear magnetic resonance (NMR) experiments (17 proteins and a total of 530 data points). We also evaluate a model of backbone flexibility based on Backrub motions, a type of conformational change frequently observed in ultra-high-resolution X-ray structures that accounts for correlated side-chain backbone movements. The fixed-backbone model performs reasonably well with an overall rmsd between computed and predicted side-chain order parameters of 0.26. Notably, including backbone flexibility leads to significant improvements in modeling side-chain order parameters for ten of the 17 proteins in the set. Greater accuracy of the flexible backbone model results from both increases and decreases in side-chain flexibility relative to the fixed-backbone model. This simple flexible-backbone model should be useful for a variety of protein design applications, including improved modeling of protein-protein interactions, design of proteins with desired flexibility or rigidity, and prediction of correlated motions within proteins.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The considerable flexibility of side-chains in folded proteins is important for protein stability and function, and may have a role in mediating allosteric interactions. While sampling side-chain degrees of freedom has been an integral part of several successful computational protein design methods, the predictions of these approaches have not been directly compared to experimental measurements of side-chain motional amplitudes. In addition, protein design methods frequently keep the backbone fixed, an approximation that may substantially limit the ability to accurately model side-chain flexibility. Here, we describe a Monte Carlo approach to modeling side-chain conformational variability and validate our method against a large dataset of methyl relaxation order parameters derived from nuclear magnetic resonance (NMR) experiments (17 proteins and a total of 530 data points). We also evaluate a model of backbone flexibility based on Backrub motions, a type of conformational change frequently observed in ultra-high-resolution X-ray structures that accounts for correlated side-chain backbone movements. The fixed-backbone model performs reasonably well with an overall rmsd between computed and predicted side-chain order parameters of 0.26. Notably, including backbone flexibility leads to significant improvements in modeling side-chain order parameters for ten of the 17 proteins in the set. Greater accuracy of the flexible backbone model results from both increases and decreases in side-chain flexibility relative to the fixed-backbone model. This simple flexible-backbone model should be useful for a variety of protein design applications, including improved modeling of protein-protein interactions, design of proteins with desired flexibility or rigidity, and prediction of correlated motions within proteins. |
C A Smith, T Kortemme Backrub-like backbone simulation recapitulates natural protein conformational variability and improves mutant side-chain prediction Journal Article J Mol Biol, 380 (4), pp. 742-756, 2008. @article{Smith:2008:J-Mol-Biol:18547585, title = {Backrub-like backbone simulation recapitulates natural protein conformational variability and improves mutant side-chain prediction}, author = {C A Smith and T Kortemme}, doi = {10.1016/j.jmb.2008.05.023}, year = {2008}, date = {2008-07-01}, journal = {J Mol Biol}, volume = {380}, number = {4}, pages = {742-756}, abstract = {Incorporation of effective backbone sampling into protein simulation and design is an important step in increasing the accuracy of computational protein modeling. Recent analysis of high-resolution crystal structures has suggested a new model, termed backrub, to describe localized, hinge-like alternative backbone and side-chain conformations observed in the crystal lattice. The model involves internal backbone rotations about axes between C-alpha atoms. Based on this observation, we have implemented a backrub-inspired sampling method in the Rosetta structure prediction and design program. We evaluate this model of backbone flexibility using three different tests. First, we show that Rosetta backrub simulations recapitulate the correlation between backbone and side-chain conformations in the high-resolution crystal structures upon which the model was based. As a second test of backrub sampling, we show that backbone flexibility improves the accuracy of predicting point-mutant side-chain conformations over fixed backbone rotameric sampling alone. Finally, we show that backrub sampling of triosephosphate isomerase loop 6 can capture the millisecond/microsecond oscillation between the open and closed states observed in solution. Our results suggest that backrub sampling captures a sizable fraction of localized conformational changes that occur in natural proteins. Application of this simple model of backbone motions may significantly improve both protein design and atomistic simulations of localized protein flexibility.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Incorporation of effective backbone sampling into protein simulation and design is an important step in increasing the accuracy of computational protein modeling. Recent analysis of high-resolution crystal structures has suggested a new model, termed backrub, to describe localized, hinge-like alternative backbone and side-chain conformations observed in the crystal lattice. The model involves internal backbone rotations about axes between C-alpha atoms. Based on this observation, we have implemented a backrub-inspired sampling method in the Rosetta structure prediction and design program. We evaluate this model of backbone flexibility using three different tests. First, we show that Rosetta backrub simulations recapitulate the correlation between backbone and side-chain conformations in the high-resolution crystal structures upon which the model was based. As a second test of backrub sampling, we show that backbone flexibility improves the accuracy of predicting point-mutant side-chain conformations over fixed backbone rotameric sampling alone. Finally, we show that backrub sampling of triosephosphate isomerase loop 6 can capture the millisecond/microsecond oscillation between the open and closed states observed in solution. Our results suggest that backrub sampling captures a sizable fraction of localized conformational changes that occur in natural proteins. Application of this simple model of backbone motions may significantly improve both protein design and atomistic simulations of localized protein flexibility. |
George Nicola, Colin A Smith, Ruben Abagyan New method for the assessment of all drug-like pockets across a structural genome Journal Article J Comput Biol, 15 (3), pp. 231-240, 2008. @article{Nicola:2008, title = {New method for the assessment of all drug-like pockets across a structural genome}, author = {George Nicola and Colin A Smith and Ruben Abagyan}, doi = {10.1089/cmb.2007.0178}, year = {2008}, date = {2008-04-01}, journal = {J Comput Biol}, volume = {15}, number = {3}, pages = {231-240}, publisher = {MARY ANN LIEBERT INC}, address = {140 HUGUENOT STREET, 3RD FL, NEW ROCHELLE, NY 10801 USA}, abstract = {With the increasing wealth of structural information available for human pathogens, it is now becoming possible to leverage that information to aid in rational selection of targets for inhibitor discovery. We present a methodology for assessing the drugability of all small-molecule binding pockets in a pathogen. Our approach incorporates accurate pocket identification, sequence conservation with a similar organism, sequence conservation with the host, and structure resolution. This novel method is applied to 21 structures from the malarial parasite Plasmodium falciparum. Based on our survey of the structural genome, we selected enoyl-acyl carrier protein reductase (ENR) as a promising candidate for virtual screening based inhibitor discovery.}, keywords = {}, pubstate = {published}, tppubtype = {article} } With the increasing wealth of structural information available for human pathogens, it is now becoming possible to leverage that information to aid in rational selection of targets for inhibitor discovery. We present a methodology for assessing the drugability of all small-molecule binding pockets in a pathogen. Our approach incorporates accurate pocket identification, sequence conservation with a similar organism, sequence conservation with the host, and structure resolution. This novel method is applied to 21 structures from the malarial parasite Plasmodium falciparum. Based on our survey of the structural genome, we selected enoyl-acyl carrier protein reductase (ENR) as a promising candidate for virtual screening based inhibitor discovery. |
Grace O'Maille, Eden P Go, Linh Hoang, Elizabeth J Want, Colin Smith, Paul O'Maille, Anders Nordstrom, Hirotoshi Morita, Chuan Qin, Wilasinee Uritboonthai, Junefredo Apon, Richard Moore, James Garrett, Gary Siuzdak Metabolomics relative quantitation with mass spectrometry using chemical derivatization and isotope labeling Journal Article Spectr-Int J, 22 (5), pp. 327-343, 2008. @article{OMaille:2008, title = {Metabolomics relative quantitation with mass spectrometry using chemical derivatization and isotope labeling}, author = {Grace O'Maille and Eden P Go and Linh Hoang and Elizabeth J Want and Colin Smith and Paul O'Maille and Anders Nordstrom and Hirotoshi Morita and Chuan Qin and Wilasinee Uritboonthai and Junefredo Apon and Richard Moore and James Garrett and Gary Siuzdak}, doi = {10.3233/SPE-2008-0361}, year = {2008}, date = {2008-01-01}, journal = {Spectr-Int J}, volume = {22}, number = {5}, pages = {327-343}, publisher = {IOS PRESS}, address = {NIEUWE HEMWEG 6B, 1013 BG AMSTERDAM, NETHERLANDS}, abstract = {Comprehensive detection and quantitation of metabolites from a biological source constitute the major challenges of current metabolomics research. Two chemical derivatization methodologies, butylation and amination, were applied to human serum for ionization enhancement of a broad spectrum of metabolite classes, including steroids and amino acids. LC-ESI-MS analysis of the derivatized serum samples provided a significant signal elevation across the total ion chromatogram to over a 100-fold increase in ionization efficiency. It was also demonstrated that derivatization combined with isotopically labeled reagents facilitated the relative quantitation of derivatized metabolites from individual as well as pooled samples.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Comprehensive detection and quantitation of metabolites from a biological source constitute the major challenges of current metabolomics research. Two chemical derivatization methodologies, butylation and amination, were applied to human serum for ionization enhancement of a broad spectrum of metabolite classes, including steroids and amino acids. LC-ESI-MS analysis of the derivatized serum samples provided a significant signal elevation across the total ion chromatogram to over a 100-fold increase in ionization efficiency. It was also demonstrated that derivatization combined with isotopically labeled reagents facilitated the relative quantitation of derivatized metabolites from individual as well as pooled samples. |
George Nicola, Colin A Smith, Edinson Lucumi, Mack R Kuo, Luchezar Karagyozov, David A Fidock, James C Sacchettin, Ruben Abagyan Discovery of novel inhibitors targeting enoyl-acyl carrier protein reductase in Plasmodium falciparum by structure-based virtual screening Journal Article Biochem Biophys Res Commun, 358 (3), pp. 686-691, 2007. @article{Nicola:2007, title = {Discovery of novel inhibitors targeting enoyl-acyl carrier protein reductase in Plasmodium falciparum by structure-based virtual screening}, author = {George Nicola and Colin A Smith and Edinson Lucumi and Mack R Kuo and Luchezar Karagyozov and David A Fidock and James C Sacchettin and Ruben Abagyan}, doi = {10.1016/j.bbrc.2007.04.113}, year = {2007}, date = {2007-07-01}, journal = {Biochem Biophys Res Commun}, volume = {358}, number = {3}, pages = {686-691}, publisher = {ACADEMIC PRESS INC ELSEVIER SCIENCE}, address = {525 B ST, STE 1900, SAN DIEGO, CA 92101-4495 USA}, abstract = {There is a dire need for novel therapeutics to treat the virulent malarial parasite, Plasmodium falciparum. Recently, the X-ray crystal structure of enoyl-acyl carrier protein reductase (ENR) in complex with triclosan has been determined and provides an opportunity for the rational design of novel inhibitors targeting the active site of ENR. Here, we report the discovery of several compounds by virtual screening and their experimental validation as high potency PfENR inhibitors. (c) 2007 Elsevier Inc. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } There is a dire need for novel therapeutics to treat the virulent malarial parasite, Plasmodium falciparum. Recently, the X-ray crystal structure of enoyl-acyl carrier protein reductase (ENR) in complex with triclosan has been determined and provides an opportunity for the rational design of novel inhibitors targeting the active site of ENR. Here, we report the discovery of several compounds by virtual screening and their experimental validation as high potency PfENR inhibitors. (c) 2007 Elsevier Inc. All rights reserved. |
Elizabeth J Want, Colin A Smith, Chuan Qin, K C VanHorne, Gary Siuzdak Phospholipid capture combined with non-linear chromatographic correction for improved serum metabolite profiling Journal Article Metabolomics, 2 (3), pp. 145-154, 2006. @article{Want:2006b, title = {Phospholipid capture combined with non-linear chromatographic correction for improved serum metabolite profiling}, author = {Elizabeth J Want and Colin A Smith and Chuan Qin and K C VanHorne and Gary Siuzdak}, doi = {10.1007/s11306-006-0028-0}, year = {2006}, date = {2006-09-01}, journal = {Metabolomics}, volume = {2}, number = {3}, pages = {145-154}, publisher = {SPRINGER}, address = {233 SPRING STREET, NEW YORK, NY 10013 USA}, abstract = {Serum analysis with LC/MS can yield thousands of potential metabolites. However, in metabolomics, biomarkers of interest will often be of low abundance, and ionization suppression from high abundance endogenous metabolites such as phospholipids may prevent the detection of these metabolites. Here a cerium-modified column and methyl-tert-butyl-ether (MTBE) liquid-liquid extraction were employed to remove phospholipids from serum in order to obtain a more comprehensive metabolite profile. XCMS, an in-house developed data analysis software platform, showed that the intensity of existing endogenous metabolites increased, and that new metabolites were observed. This application of phospholipid capture in combination with XCMS non-linear data processing has enormous potential in metabolite profiling, for biomarker detection and quantitation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Serum analysis with LC/MS can yield thousands of potential metabolites. However, in metabolomics, biomarkers of interest will often be of low abundance, and ionization suppression from high abundance endogenous metabolites such as phospholipids may prevent the detection of these metabolites. Here a cerium-modified column and methyl-tert-butyl-ether (MTBE) liquid-liquid extraction were employed to remove phospholipids from serum in order to obtain a more comprehensive metabolite profile. XCMS, an in-house developed data analysis software platform, showed that the intensity of existing endogenous metabolites increased, and that new metabolites were observed. This application of phospholipid capture in combination with XCMS non-linear data processing has enormous potential in metabolite profiling, for biomarker detection and quantitation. |
C A Smith, E J Want, G O'Maille, R Abagyan, G Siuzdak XCMS: processing mass spectrometry data for metabolite profiling using nonlinear peak alignment, matching, and identification Journal Article Anal Chem, 78 (3), pp. 779-787, 2006. @article{Smith:2006:Anal-Chem:16448051, title = {XCMS: processing mass spectrometry data for metabolite profiling using nonlinear peak alignment, matching, and identification}, author = {C A Smith and E J Want and G O'Maille and R Abagyan and G Siuzdak}, doi = {10.1021/ac051437y}, year = {2006}, date = {2006-02-01}, journal = {Anal Chem}, volume = {78}, number = {3}, pages = {779-787}, abstract = {Metabolite profiling in biomarker discovery, enzyme substrate assignment, drug activity/specificity determination, and basic metabolic research requires new data preprocessing approaches to correlate specific metabolites to their biological origin. Here we introduce an LC/MS-based data analysis approach, XCMS, which incorporates novel nonlinear retention time alignment, matched filtration, peak detection, and peak matching. Without using internal standards, the method dynamically identifies hundreds of endogenous metabolites for use as standards, calculating a nonlinear retention time correction profile for each sample. Following retention time correction, the relative metabolite ion intensities are directly compared to identify changes in specific endogenous metabolites, such as potential biomarkers. The software is demonstrated using data sets from a previously reported enzyme knockout study and a large-scale study of plasma samples. XCMS is freely available under an open-source license at http://metlin.scripps.edu/download/.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Metabolite profiling in biomarker discovery, enzyme substrate assignment, drug activity/specificity determination, and basic metabolic research requires new data preprocessing approaches to correlate specific metabolites to their biological origin. Here we introduce an LC/MS-based data analysis approach, XCMS, which incorporates novel nonlinear retention time alignment, matched filtration, peak detection, and peak matching. Without using internal standards, the method dynamically identifies hundreds of endogenous metabolites for use as standards, calculating a nonlinear retention time correction profile for each sample. Following retention time correction, the relative metabolite ion intensities are directly compared to identify changes in specific endogenous metabolites, such as potential biomarkers. The software is demonstrated using data sets from a previously reported enzyme knockout study and a large-scale study of plasma samples. XCMS is freely available under an open-source license at http://metlin.scripps.edu/download/. |
EJ Want, G O'Maille, CA Smith, TR Brandon, W Uritboonthai, C Qin, SA Trauger, G Siuzdak Solvent-dependent metabolite distribution, clustering, and protein extraction for serum profiling with mass spectrometry Journal Article Anal Chem, 78 (3), pp. 743-752, 2006. @article{Want:2006a, title = {Solvent-dependent metabolite distribution, clustering, and protein extraction for serum profiling with mass spectrometry}, author = {EJ Want and G O'Maille and CA Smith and TR Brandon and W Uritboonthai and C Qin and SA Trauger and G Siuzdak}, doi = {DOI 10.1021/ac051312t}, year = {2006}, date = {2006-02-01}, journal = {Anal Chem}, volume = {78}, number = {3}, pages = {743-752}, publisher = {AMER CHEMICAL SOC}, address = {1155 16TH ST, NW, WASHINGTON, DC 20036 USA}, abstract = {The aim of metabolite profiling is to monitor all metabolites within a biological sample for applications in basic biochemical research as well as pharmacokinetic studies and biomarker discovery. Here, novel data analysis software, XCMS, was used to monitor all metabolite features detected from an array of serum extraction methods, with application to metabolite profiling using electrospray liquid chromatography/mass spectrometry (ESI-LC/MS). The XCMS software enabled the comparison of methods with regard to reproducibility, the number and type of metabolite features detected, and the similarity of these features between different extraction methods. Extraction efficiency with regard to metabolite feature hydrophobicity was examined through the generation of unique feature density distribution plots, displaying feature distribution along chromatographic time. Hierarchical clustering was performed to highlight similarities in the metabolite features observed between the extraction methods. Protein extraction efficiency was determined using the Bradford assay, and the residual proteins were identified using nano-LC/MS/MS. Additionally, the identification of four of the most intensely ionized serum metabolites using FTMS and tandem mass spectrometry was reported. The extraction methods, ranging from organic solvents and acids to heat denaturation, varied widely in both protein removal efficiency and the number of mass spectral features detected. Methanol protein precipitation followed by centrifugation was found to be the most effective, straightforward, and reproducible approach, resulting in serum extracts containing over 2000 detected metabolite features and less than 2% residual protein. Interestingly, the combination of all approaches produced over 10 000 unique metabolite features, a number that is indicative of the complexity of the human metabolome and the potential of metabolomics in biomarker discovery.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The aim of metabolite profiling is to monitor all metabolites within a biological sample for applications in basic biochemical research as well as pharmacokinetic studies and biomarker discovery. Here, novel data analysis software, XCMS, was used to monitor all metabolite features detected from an array of serum extraction methods, with application to metabolite profiling using electrospray liquid chromatography/mass spectrometry (ESI-LC/MS). The XCMS software enabled the comparison of methods with regard to reproducibility, the number and type of metabolite features detected, and the similarity of these features between different extraction methods. Extraction efficiency with regard to metabolite feature hydrophobicity was examined through the generation of unique feature density distribution plots, displaying feature distribution along chromatographic time. Hierarchical clustering was performed to highlight similarities in the metabolite features observed between the extraction methods. Protein extraction efficiency was determined using the Bradford assay, and the residual proteins were identified using nano-LC/MS/MS. Additionally, the identification of four of the most intensely ionized serum metabolites using FTMS and tandem mass spectrometry was reported. The extraction methods, ranging from organic solvents and acids to heat denaturation, varied widely in both protein removal efficiency and the number of mass spectral features detected. Methanol protein precipitation followed by centrifugation was found to be the most effective, straightforward, and reproducible approach, resulting in serum extracts containing over 2000 detected metabolite features and less than 2% residual protein. Interestingly, the combination of all approaches produced over 10 000 unique metabolite features, a number that is indicative of the complexity of the human metabolome and the potential of metabolomics in biomarker discovery. |
CA Smith, G O'Maille, EJ Want, C Qin, SA Trauger, TR Brandon, DE Custodio, R Abagyan, G Siuzdak METLIN - A metabolite mass spectral database Journal Article Ther Drug Monit, 27 (6), pp. 747-751, 2005. @article{Smith:2005a, title = {METLIN - A metabolite mass spectral database}, author = {CA Smith and G O'Maille and EJ Want and C Qin and SA Trauger and TR Brandon and DE Custodio and R Abagyan and G Siuzdak}, year = {2005}, date = {2005-12-01}, journal = {Ther Drug Monit}, volume = {27}, number = {6}, pages = {747-751}, publisher = {LIPPINCOTT WILLIAMS & WILKINS}, address = {530 WALNUT ST, PHILADELPHIA, PA 19106-3261 USA}, abstract = {Endogenous metabolites have gained increasing interest over the past 5 years largely for their implications in diagnostic and pharmaceutical biomarker discovery. METLIN (http://metlin. scripps.edu), a freely accessible web-based data repository, has been developed to assist in a broad array of metabolite research and to facilitate metabolite identification through mass analysis. METLIN includes an annotated list of known metabolite structural information that is easily cross-correlated with its catalogue of high-resolution Fourier transform mass spectrometry (FTMS) spectra, tandem mass spectrometry (MS/MS) spectra, and LC/MS data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Endogenous metabolites have gained increasing interest over the past 5 years largely for their implications in diagnostic and pharmaceutical biomarker discovery. METLIN (http://metlin. scripps.edu), a freely accessible web-based data repository, has been developed to assist in a broad array of metabolite research and to facilitate metabolite identification through mass analysis. METLIN includes an annotated list of known metabolite structural information that is easily cross-correlated with its catalogue of high-resolution Fourier transform mass spectrometry (FTMS) spectra, tandem mass spectrometry (MS/MS) spectra, and LC/MS data. |
YX Huang, CA Smith, HB Song, BP Morgan, R Abagyan, S Tomlinson Insights into the human CD59 complement binding interface toward engineering new therapeutics Journal Article J Biol Chem, 280 (40), pp. 34073-34079, 2005. @article{Huang:2005, title = {Insights into the human CD59 complement binding interface toward engineering new therapeutics}, author = {YX Huang and CA Smith and HB Song and BP Morgan and R Abagyan and S Tomlinson}, doi = {DOI 10.1074/jbc.M504922200}, year = {2005}, date = {2005-10-01}, journal = {J Biol Chem}, volume = {280}, number = {40}, pages = {34073-34079}, publisher = {AMER SOC BIOCHEMISTRY MOLECULAR BIOLOGY INC}, address = {9650 ROCKVILLE PIKE, BETHESDA, MD 20814-3996 USA}, abstract = {CD59 is a 77-amino acid membrane glycoprotein that plays an important role in regulating the terminal pathway of complement by inhibiting formation of the cytolytic membrane attack complex ( MAC or C5b-9). The MAC is formed by the self assembly of C5b, C6, C7, C8, and multiple C9 molecules, with CD59 functioning by binding C5b-8 and C5b-9 in the assembling complex. We performed a scanning alanine mutagenesis screen of residues 16 - 57, a region previously identified to contain the C8/C9 binding interface. We have also created an improved NMR model from previously published data for structural understanding of CD59. Based on the scanning mutagenesis data, refined models, and additional site-specific mutations, we identified a binding interface that is much broader than previously thought. In addition to identifying substitutions that decreased CD59 activity, a surprising number of substitutions significantly enhanced CD59 activity. Because CD59 has significant therapeutic potential for the treatment of various inflammatory conditions, we investigated further the ability to enhance CD59 activity by additional mutagenesis studies. Based on the enhanced activity of membrane-bound mutant CD59 molecules, clinically relevant soluble mutant CD59-based proteins were prepared and shown to have up to a 3-fold increase in complement inhibitory activity.}, keywords = {}, pubstate = {published}, tppubtype = {article} } CD59 is a 77-amino acid membrane glycoprotein that plays an important role in regulating the terminal pathway of complement by inhibiting formation of the cytolytic membrane attack complex ( MAC or C5b-9). The MAC is formed by the self assembly of C5b, C6, C7, C8, and multiple C9 molecules, with CD59 functioning by binding C5b-8 and C5b-9 in the assembling complex. We performed a scanning alanine mutagenesis screen of residues 16 - 57, a region previously identified to contain the C8/C9 binding interface. We have also created an improved NMR model from previously published data for structural understanding of CD59. Based on the scanning mutagenesis data, refined models, and additional site-specific mutations, we identified a binding interface that is much broader than previously thought. In addition to identifying substitutions that decreased CD59 activity, a surprising number of substitutions significantly enhanced CD59 activity. Because CD59 has significant therapeutic potential for the treatment of various inflammatory conditions, we investigated further the ability to enhance CD59 activity by additional mutagenesis studies. Based on the enhanced activity of membrane-bound mutant CD59 molecules, clinically relevant soluble mutant CD59-based proteins were prepared and shown to have up to a 3-fold increase in complement inhibitory activity. |
CA Smith, W Huber, RC Gentleman Interactive Outputs Incollection Gentleman, Robert; Carey, Vincent J; Huber, Wolfgang; Irizarry, Rafael A; Dudoit, Sandrine (Ed.): Bioinformatics and Computational Biology Solutions Using R and Bioconductor, pp. 147-160, Springer, New York, 2005. @incollection{Smith:2005b, title = {Interactive Outputs}, author = {CA Smith and W Huber and RC Gentleman}, editor = {Robert Gentleman and Vincent J. Carey and Wolfgang Huber and Rafael A. Irizarry and Sandrine Dudoit}, doi = {10.1007/0-387-29362-0_9}, year = {2005}, date = {2005-01-01}, booktitle = {Bioinformatics and Computational Biology Solutions Using R and Bioconductor}, pages = {147-160}, publisher = {Springer}, address = {New York}, chapter = {9}, series = {Statistics for Biology and Health}, abstract = {In this chapter, we discuss creation of interactive outputs. We focus on the generation of reports, marked up in HTML, that link sets of genes with on-line resources, such as those supplied by the EBI or the NCBI, and which can be shared between different investigators. We discuss both the simple creation of these pages as well as some of the underlying software tools that can be used to construct new and different outputs. Although linked Web pages form the most commonly used outputs, we also consider some other tools that can be used to produce Web graphics that respond to the mouse in different ways.}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } In this chapter, we discuss creation of interactive outputs. We focus on the generation of reports, marked up in HTML, that link sets of genes with on-line resources, such as those supplied by the EBI or the NCBI, and which can be shared between different investigators. We discuss both the simple creation of these pages as well as some of the underlying software tools that can be used to construct new and different outputs. Although linked Web pages form the most commonly used outputs, we also consider some other tools that can be used to produce Web graphics that respond to the mouse in different ways. |
CA Smith Browser-based Affymetrix Analysis and Annotation Incollection Gentleman, Robert; Carey, Vincent J; Huber, Wolfgang; Irizarry, Rafael A; Dudoit, Sandrine (Ed.): Bioinformatics and Computational Biology Solutions Using R and Bioconductor, pp. 313-326, Springer, New York, 2005. @incollection{Smith:2005c, title = {Browser-based Affymetrix Analysis and Annotation}, author = {CA Smith}, editor = {Robert Gentleman and Vincent J. Carey and Wolfgang Huber and Rafael A. Irizarry and Sandrine Dudoit}, doi = {10.1007/0-387-29362-0_18}, year = {2005}, date = {2005-01-01}, booktitle = {Bioinformatics and Computational Biology Solutions Using R and Bioconductor}, pages = {313-326}, publisher = {Springer}, address = {New York}, chapter = {18}, series = {Statistics for Biology and Health}, abstract = {webbioc is a CGI-based interface to Bioconductor methods for preprocessing and analyzing Affymetrix data. It wraps up the functionality of a number of Bioconductor packages into a consistent environment that can be deployed for use by small groups or large departments. Without ever seeing a command prompt, it will take the user from raw data to annotated lists of the most significantly differentially expressed genes. It will optionally make use of a back-end computer cluster for batch processing. This chapter will discuss the appropriate circumstances under which webbioc should be deployed and the pros and cons of using it versus the typical command line environment of R. Installation and configuration will be fully covered. Use of theWeb-based interface will be visually demonstrated. Finally, we will describe how to expand the interface by adding additional analysis modules.}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } webbioc is a CGI-based interface to Bioconductor methods for preprocessing and analyzing Affymetrix data. It wraps up the functionality of a number of Bioconductor packages into a consistent environment that can be deployed for use by small groups or large departments. Without ever seeing a command prompt, it will take the user from raw data to annotated lists of the most significantly differentially expressed genes. It will optionally make use of a back-end computer cluster for batch processing. This chapter will discuss the appropriate circumstances under which webbioc should be deployed and the pros and cons of using it versus the typical command line environment of R. Installation and configuration will be fully covered. Use of theWeb-based interface will be visually demonstrated. Finally, we will describe how to expand the interface by adding additional analysis modules. |
RC Gentleman, VJ Carey, DM Bates, B Bolstad, M Dettling, S Dudoit, B Ellis, L Gautier, YC Ge, J Gentry, K Hornik, T Hothorn, W Huber, S Iacus, R Irizarry, F Leisch, C Li, M Maechler, AJ Rossini, G Sawitzki, C Smith, G Smyth, L Tierney, JYH Yang, JH Zhang Bioconductor: open software development for computational biology and bioinformatics Journal Article Genome Biol, 5 (10), 2004. @article{Gentleman:2004, title = {Bioconductor: open software development for computational biology and bioinformatics}, author = {RC Gentleman and VJ Carey and DM Bates and B Bolstad and M Dettling and S Dudoit and B Ellis and L Gautier and YC Ge and J Gentry and K Hornik and T Hothorn and W Huber and S Iacus and R Irizarry and F Leisch and C Li and M Maechler and AJ Rossini and G Sawitzki and C Smith and G Smyth and L Tierney and JYH Yang and JH Zhang}, doi = {10.1186/gb-2004-5-10-r80}, year = {2004}, date = {2004-01-01}, journal = {Genome Biol}, volume = {5}, number = {10}, publisher = {BIOMED CENTRAL LTD}, address = {MIDDLESEX HOUSE, 34-42 CLEVELAND ST, LONDON W1T 4LB, ENGLAND}, abstract = {The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples. |