Links to Exploration step
Le document en format XML
<record><TEI><teiHeader><fileDesc><titleStmt><title xml:lang="en">eGenomics: Cataloguing Our Complete Genome Collection III</title>
<author><name sortKey="Field, Dawn" sort="Field, Dawn" uniqKey="Field D" first="Dawn" last="Field">Dawn Field</name>
<affiliation><nlm:aff id="a1">Molecular Evolution and Bioinformatics Section, Oxford Centre for Ecology and Hydrology, Mansfield Road, Oxford, Oxfordshire OX1 3SR, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Garrity, George" sort="Garrity, George" uniqKey="Garrity G" first="George" last="Garrity">George Garrity</name>
<affiliation><nlm:aff id="a2">Department of Microbiology and Molecular Genetics, Michigan State University, East Lansing, MI 48824, USA</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Gray, Tanya" sort="Gray, Tanya" uniqKey="Gray T" first="Tanya" last="Gray">Tanya Gray</name>
<affiliation><nlm:aff id="a1">Molecular Evolution and Bioinformatics Section, Oxford Centre for Ecology and Hydrology, Mansfield Road, Oxford, Oxfordshire OX1 3SR, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Selengut, Jeremy" sort="Selengut, Jeremy" uniqKey="Selengut J" first="Jeremy" last="Selengut">Jeremy Selengut</name>
<affiliation><nlm:aff id="a3">The Institute for Genomic Research, 9712 Medical Center Drive, Rockville, MD 20850, USA</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Sterk, Peter" sort="Sterk, Peter" uniqKey="Sterk P" first="Peter" last="Sterk">Peter Sterk</name>
<affiliation><nlm:aff id="a4">European Molecular Biology Laboratory Outstation–The European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Thomson, Nick" sort="Thomson, Nick" uniqKey="Thomson N" first="Nick" last="Thomson">Nick Thomson</name>
<affiliation><nlm:aff id="a5">The Pathogen Sequencing Unit, The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Tatusova, Tatiana" sort="Tatusova, Tatiana" uniqKey="Tatusova T" first="Tatiana" last="Tatusova">Tatiana Tatusova</name>
<affiliation><nlm:aff id="a6">National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Cochrane, Guy" sort="Cochrane, Guy" uniqKey="Cochrane G" first="Guy" last="Cochrane">Guy Cochrane</name>
<affiliation><nlm:aff id="a4">European Molecular Biology Laboratory Outstation–The European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Glockner, Frank Oliver" sort="Glockner, Frank Oliver" uniqKey="Glockner F" first="Frank Oliver" last="Glöckner">Frank Oliver Glöckner</name>
<affiliation><nlm:aff id="a7">Microbial Genomics Group, Max Planck Institute for Marine Microbiology and International University Bremen, 28359 Bremen, Germany</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Kottmann, Renzo" sort="Kottmann, Renzo" uniqKey="Kottmann R" first="Renzo" last="Kottmann">Renzo Kottmann</name>
<affiliation><nlm:aff id="a7">Microbial Genomics Group, Max Planck Institute for Marine Microbiology and International University Bremen, 28359 Bremen, Germany</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Lister, Allyson L" sort="Lister, Allyson L" uniqKey="Lister A" first="Allyson L." last="Lister">Allyson L. Lister</name>
<affiliation><nlm:aff id="a8">CISBAN and School of Computing Science, Newcastle University, Newcastle upon Tyne NE1 7RU, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Tateno, Yoshio" sort="Tateno, Yoshio" uniqKey="Tateno Y" first="Yoshio" last="Tateno">Yoshio Tateno</name>
<affiliation><nlm:aff id="a9">Center for Information Biology and DNA Data Bank of Japan, National Institute of Genetics, Research Organization of Information and Systems, Shizuoka 441-8540, Japan</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Vaughan, Robert" sort="Vaughan, Robert" uniqKey="Vaughan R" first="Robert" last="Vaughan">Robert Vaughan</name>
<affiliation><nlm:aff id="a4">European Molecular Biology Laboratory Outstation–The European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK</nlm:aff>
</affiliation>
</author>
</titleStmt>
<publicationStmt><idno type="wicri:source">PMC</idno>
<idno type="pmc">1872051</idno>
<idno type="url">http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1872051</idno>
<idno type="RBID">PMC:1872051</idno>
<idno type="doi">10.1155/2007/47304</idno>
<idno type="pmid">NONE</idno>
<date when="2007">2007</date>
<idno type="wicri:Area/Pmc/Corpus">000257</idno>
</publicationStmt>
<sourceDesc><biblStruct><analytic><title xml:lang="en" level="a" type="main">eGenomics: Cataloguing Our Complete Genome Collection III</title>
<author><name sortKey="Field, Dawn" sort="Field, Dawn" uniqKey="Field D" first="Dawn" last="Field">Dawn Field</name>
<affiliation><nlm:aff id="a1">Molecular Evolution and Bioinformatics Section, Oxford Centre for Ecology and Hydrology, Mansfield Road, Oxford, Oxfordshire OX1 3SR, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Garrity, George" sort="Garrity, George" uniqKey="Garrity G" first="George" last="Garrity">George Garrity</name>
<affiliation><nlm:aff id="a2">Department of Microbiology and Molecular Genetics, Michigan State University, East Lansing, MI 48824, USA</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Gray, Tanya" sort="Gray, Tanya" uniqKey="Gray T" first="Tanya" last="Gray">Tanya Gray</name>
<affiliation><nlm:aff id="a1">Molecular Evolution and Bioinformatics Section, Oxford Centre for Ecology and Hydrology, Mansfield Road, Oxford, Oxfordshire OX1 3SR, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Selengut, Jeremy" sort="Selengut, Jeremy" uniqKey="Selengut J" first="Jeremy" last="Selengut">Jeremy Selengut</name>
<affiliation><nlm:aff id="a3">The Institute for Genomic Research, 9712 Medical Center Drive, Rockville, MD 20850, USA</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Sterk, Peter" sort="Sterk, Peter" uniqKey="Sterk P" first="Peter" last="Sterk">Peter Sterk</name>
<affiliation><nlm:aff id="a4">European Molecular Biology Laboratory Outstation–The European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Thomson, Nick" sort="Thomson, Nick" uniqKey="Thomson N" first="Nick" last="Thomson">Nick Thomson</name>
<affiliation><nlm:aff id="a5">The Pathogen Sequencing Unit, The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Tatusova, Tatiana" sort="Tatusova, Tatiana" uniqKey="Tatusova T" first="Tatiana" last="Tatusova">Tatiana Tatusova</name>
<affiliation><nlm:aff id="a6">National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Cochrane, Guy" sort="Cochrane, Guy" uniqKey="Cochrane G" first="Guy" last="Cochrane">Guy Cochrane</name>
<affiliation><nlm:aff id="a4">European Molecular Biology Laboratory Outstation–The European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Glockner, Frank Oliver" sort="Glockner, Frank Oliver" uniqKey="Glockner F" first="Frank Oliver" last="Glöckner">Frank Oliver Glöckner</name>
<affiliation><nlm:aff id="a7">Microbial Genomics Group, Max Planck Institute for Marine Microbiology and International University Bremen, 28359 Bremen, Germany</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Kottmann, Renzo" sort="Kottmann, Renzo" uniqKey="Kottmann R" first="Renzo" last="Kottmann">Renzo Kottmann</name>
<affiliation><nlm:aff id="a7">Microbial Genomics Group, Max Planck Institute for Marine Microbiology and International University Bremen, 28359 Bremen, Germany</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Lister, Allyson L" sort="Lister, Allyson L" uniqKey="Lister A" first="Allyson L." last="Lister">Allyson L. Lister</name>
<affiliation><nlm:aff id="a8">CISBAN and School of Computing Science, Newcastle University, Newcastle upon Tyne NE1 7RU, UK</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Tateno, Yoshio" sort="Tateno, Yoshio" uniqKey="Tateno Y" first="Yoshio" last="Tateno">Yoshio Tateno</name>
<affiliation><nlm:aff id="a9">Center for Information Biology and DNA Data Bank of Japan, National Institute of Genetics, Research Organization of Information and Systems, Shizuoka 441-8540, Japan</nlm:aff>
</affiliation>
</author>
<author><name sortKey="Vaughan, Robert" sort="Vaughan, Robert" uniqKey="Vaughan R" first="Robert" last="Vaughan">Robert Vaughan</name>
<affiliation><nlm:aff id="a4">European Molecular Biology Laboratory Outstation–The European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK</nlm:aff>
</affiliation>
</author>
</analytic>
<series><title level="j">Comparative and Functional Genomics</title>
<idno type="ISSN">1531-6912</idno>
<idno type="eISSN">1532-6268</idno>
<imprint><date when="2007">2007</date>
</imprint>
</series>
</biblStruct>
</sourceDesc>
</fileDesc>
<profileDesc><textClass></textClass>
</profileDesc>
</teiHeader>
<front><div type="abstract" xml:lang="en"><p>This meeting report summarizes the proceedings of the “eGenomics: Cataloguing our Complete Genome Collection III” workshop held September 11–13, 2006, at the National Institute for Environmental <italic>e</italic>
Science (NIE<italic>e</italic>
S), Cambridge, United Kingdom. This 3rd workshop of the Genomic Standards Consortium was divided into two parts. The first half of the three-day workshop was dedicated to reviewing the genomic diversity of our current and future genome and metagenome collection, and exploring linkages to a series of existing projects through formal presentations. The second half was dedicated to strategic discussions. Outcomes of the workshop include a revised “Minimum Information about a Genome Sequence” (MIGS) specification (v1.1), consensus on a variety of features to be added to the Genome Catalogue (GCat), agreement by several researchers to adopt MIGS for imminent genome publications, and an agreement by the EBI and NCBI to input their genome collections into GCat for the purpose of quantifying the amount of optional data already available (e.g., for geographic location coordinates) and working towards a single, global list of all public genomes and metagenomes.</p>
</div>
</front>
<back><div1 type="bibliography"><listBibl><biblStruct><analytic><author><name sortKey="Field, D" uniqKey="Field D">D Field</name>
</author>
<author><name sortKey="Garrity, G" uniqKey="Garrity G">G Garrity</name>
</author>
<author><name sortKey="Morrison, N" uniqKey="Morrison N">N Morrison</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Martiny, Jbh" uniqKey="Martiny J">JBH Martiny</name>
</author>
<author><name sortKey="Field, D" uniqKey="Field D">D Field</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Field, D" uniqKey="Field D">D Field</name>
</author>
<author><name sortKey="Morrison, N" uniqKey="Morrison N">N Morrison</name>
</author>
<author><name sortKey="Selengut, J" uniqKey="Selengut J">J Selengut</name>
</author>
<author><name sortKey="Sterk, P" uniqKey="Sterk P">P Sterk</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Baldauf, Sl" uniqKey="Baldauf S">SL Baldauf</name>
</author>
<author><name sortKey="Bhattacharya, D" uniqKey="Bhattacharya D">D Bhattacharya</name>
</author>
<author><name sortKey="Cockrill, J" uniqKey="Cockrill J">J Cockrill</name>
</author>
<author><name sortKey="Hugenholtz, P" uniqKey="Hugenholtz P">P Hugenholtz</name>
</author>
<author><name sortKey="Pawlowski, J" uniqKey="Pawlowski J">J Pawlowski</name>
</author>
<author><name sortKey="Simpson, Agb" uniqKey="Simpson A">AGB Simpson</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Edwards, Ra" uniqKey="Edwards R">RA Edwards</name>
</author>
<author><name sortKey="Rodriguez Brito, B" uniqKey="Rodriguez Brito B">B Rodriguez-Brito</name>
</author>
<author><name sortKey="Wegley, L" uniqKey="Wegley L">L Wegley</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Edwards, R" uniqKey="Edwards R">R Edwards</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Morrison, N" uniqKey="Morrison N">N Morrison</name>
</author>
<author><name sortKey="Cochrane, G" uniqKey="Cochrane G">G Cochrane</name>
</author>
<author><name sortKey="Faruque, N" uniqKey="Faruque N">N Faruque</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Field, D" uniqKey="Field D">D Field</name>
</author>
<author><name sortKey="Sansone, S A" uniqKey="Sansone S">S-A Sansone</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Garrity, G" uniqKey="Garrity G">G Garrity</name>
</author>
<author><name sortKey="Lyons, C" uniqKey="Lyons C">C Lyons</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Lombardot, T" uniqKey="Lombardot T">T Lombardot</name>
</author>
<author><name sortKey="Kottmann, R" uniqKey="Kottmann R">R Kottmann</name>
</author>
<author><name sortKey="Pfeffer, H" uniqKey="Pfeffer H">H Pfeffer</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Liolios, K" uniqKey="Liolios K">K Liolios</name>
</author>
<author><name sortKey="Tavernarakis, N" uniqKey="Tavernarakis N">N Tavernarakis</name>
</author>
<author><name sortKey="Hugenholtz, P" uniqKey="Hugenholtz P">P Hugenholtz</name>
</author>
<author><name sortKey="Kyrpides, Nc" uniqKey="Kyrpides N">NC Kyrpides</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Whetzel, Pl" uniqKey="Whetzel P">PL Whetzel</name>
</author>
<author><name sortKey="Brinkman, Rr" uniqKey="Brinkman R">RR Brinkman</name>
</author>
<author><name sortKey="Causton, Hc" uniqKey="Causton H">HC Causton</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Angly, Fe" uniqKey="Angly F">FE Angly</name>
</author>
<author><name sortKey="Felts, B" uniqKey="Felts B">B Felts</name>
</author>
<author><name sortKey="Breitbart, M" uniqKey="Breitbart M">M Breitbart</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Glockner, Fo" uniqKey="Glockner F">FO Glöckner</name>
</author>
<author><name sortKey="Kube, M" uniqKey="Kube M">M Kube</name>
</author>
<author><name sortKey="Bauer, M" uniqKey="Bauer M">M Bauer</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Rabus, R" uniqKey="Rabus R">R Rabus</name>
</author>
<author><name sortKey="Ruepp, A" uniqKey="Ruepp A">A Ruepp</name>
</author>
<author><name sortKey="Frickey, T" uniqKey="Frickey T">T Frickey</name>
</author>
</analytic>
</biblStruct>
<biblStruct><analytic><author><name sortKey="Bauer, M" uniqKey="Bauer M">M Bauer</name>
</author>
<author><name sortKey="Kube, M" uniqKey="Kube M">M Kube</name>
</author>
<author><name sortKey="Teeling, H" uniqKey="Teeling H">H Teeling</name>
</author>
</analytic>
</biblStruct>
</listBibl>
</div1>
</back>
</TEI>
<pmc article-type="meeting-report"><pmc-dir>properties open_access</pmc-dir>
<front><journal-meta><journal-id journal-id-type="nlm-ta">Comp Funct Genomics</journal-id>
<journal-id journal-id-type="iso-abbrev">Comp. Funct. Genomics</journal-id>
<journal-id journal-id-type="publisher-id">CFG</journal-id>
<journal-title-group><journal-title>Comparative and Functional Genomics</journal-title>
</journal-title-group>
<issn pub-type="ppub">1531-6912</issn>
<issn pub-type="epub">1532-6268</issn>
<publisher><publisher-name>Hindawi Publishing Corporation</publisher-name>
</publisher>
</journal-meta>
<article-meta><article-id pub-id-type="pmc">1872051</article-id>
<article-id pub-id-type="doi">10.1155/2007/47304</article-id>
<article-categories><subj-group subj-group-type="heading"><subject>Meeting Report</subject>
</subj-group>
</article-categories>
<title-group><article-title>eGenomics: Cataloguing Our Complete Genome Collection III</article-title>
</title-group>
<contrib-group><contrib contrib-type="author"><name><surname>Field</surname>
<given-names>Dawn</given-names>
</name>
<xref ref-type="aff" rid="a1"><sup>1</sup>
</xref>
<xref ref-type="corresp" rid="cor1">*</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Garrity</surname>
<given-names>George</given-names>
</name>
<xref ref-type="aff" rid="a2"><sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Gray</surname>
<given-names>Tanya</given-names>
</name>
<xref ref-type="aff" rid="a1"><sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Selengut</surname>
<given-names>Jeremy</given-names>
</name>
<xref ref-type="aff" rid="a3"><sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Sterk</surname>
<given-names>Peter</given-names>
</name>
<xref ref-type="aff" rid="a4"><sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Thomson</surname>
<given-names>Nick</given-names>
</name>
<xref ref-type="aff" rid="a5"><sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Tatusova</surname>
<given-names>Tatiana</given-names>
</name>
<xref ref-type="aff" rid="a6"><sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Cochrane</surname>
<given-names>Guy</given-names>
</name>
<xref ref-type="aff" rid="a4"><sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Glöckner</surname>
<given-names>Frank Oliver</given-names>
</name>
<xref ref-type="aff" rid="a7"><sup>7</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Kottmann</surname>
<given-names>Renzo</given-names>
</name>
<xref ref-type="aff" rid="a7"><sup>7</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Lister</surname>
<given-names>Allyson L.</given-names>
</name>
<xref ref-type="aff" rid="a8"><sup>8</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Tateno</surname>
<given-names>Yoshio</given-names>
</name>
<xref ref-type="aff" rid="a9"><sup>9</sup>
</xref>
</contrib>
<contrib contrib-type="author"><name><surname>Vaughan</surname>
<given-names>Robert</given-names>
</name>
<xref ref-type="aff" rid="a4"><sup>4</sup>
</xref>
</contrib>
</contrib-group>
<aff id="a1"><sup>1</sup>
Molecular Evolution and Bioinformatics Section, Oxford Centre for Ecology and Hydrology, Mansfield Road, Oxford, Oxfordshire OX1 3SR, UK</aff>
<aff id="a2"><sup>2</sup>
Department of Microbiology and Molecular Genetics, Michigan State University, East Lansing, MI 48824, USA</aff>
<aff id="a3"><sup>3</sup>
The Institute for Genomic Research, 9712 Medical Center Drive, Rockville, MD 20850, USA</aff>
<aff id="a4"><sup>4</sup>
European Molecular Biology Laboratory Outstation–The European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK</aff>
<aff id="a5"><sup>5</sup>
The Pathogen Sequencing Unit, The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK</aff>
<aff id="a6"><sup>6</sup>
National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA</aff>
<aff id="a7"><sup>7</sup>
Microbial Genomics Group, Max Planck Institute for Marine Microbiology and International University Bremen, 28359 Bremen, Germany</aff>
<aff id="a8"><sup>8</sup>
CISBAN and School of Computing Science, Newcastle University, Newcastle upon Tyne NE1 7RU, UK</aff>
<aff id="a9"><sup>9</sup>
Center for Information Biology and DNA Data Bank of Japan, National Institute of Genetics, Research Organization of Information and Systems, Shizuoka 441-8540, Japan</aff>
<author-notes><corresp id="cor1">*Dawn Field: <email>dfield@ceh.ac.uk</email>
</corresp>
<fn fn-type="other"><p>Recommended by Stephen Oliver</p>
</fn>
</author-notes>
<pub-date pub-type="ppub"><year>2007</year>
</pub-date>
<pub-date pub-type="epub"><day>30</day>
<month>4</month>
<year>2007</year>
</pub-date>
<volume>2007</volume>
<elocation-id>47304</elocation-id>
<history><date date-type="received"><day>28</day>
<month>12</month>
<year>2006</year>
</date>
<date date-type="accepted"><day>28</day>
<month>12</month>
<year>2006</year>
</date>
</history>
<permissions><copyright-statement>Copyright © 2007 Dawn Field et al.</copyright-statement>
<copyright-year>2007</copyright-year>
<license license-type="open-access"><license-p>This is an open access article distributed under the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<abstract><p>This meeting report summarizes the proceedings of the “eGenomics: Cataloguing our Complete Genome Collection III” workshop held September 11–13, 2006, at the National Institute for Environmental <italic>e</italic>
Science (NIE<italic>e</italic>
S), Cambridge, United Kingdom. This 3rd workshop of the Genomic Standards Consortium was divided into two parts. The first half of the three-day workshop was dedicated to reviewing the genomic diversity of our current and future genome and metagenome collection, and exploring linkages to a series of existing projects through formal presentations. The second half was dedicated to strategic discussions. Outcomes of the workshop include a revised “Minimum Information about a Genome Sequence” (MIGS) specification (v1.1), consensus on a variety of features to be added to the Genome Catalogue (GCat), agreement by several researchers to adopt MIGS for imminent genome publications, and an agreement by the EBI and NCBI to input their genome collections into GCat for the purpose of quantifying the amount of optional data already available (e.g., for geographic location coordinates) and working towards a single, global list of all public genomes and metagenomes.</p>
</abstract>
</article-meta>
</front>
<body><sec id="sec1"><title>1. INTRODUCTION</title>
<p>The Genomic Standards Consortium (GSC) is an initiative working towards richer
descriptions of our collection of genomes and metagenomes (further information about Genomic Standards Consortium can be found at
<ext-link ext-link-type="uri" xlink:href="http://gensc.sf.net">http://gensc.sf.net</ext-link>
) [<xref ref-type="bibr" rid="B1">1</xref>
]. Established in September 2005, the goal of this international community is to promote mechanisms standardizing the description of genomes and the exchange and integration of genomic data.
Genomic sequencing projects are being completed at a rapid pace that will only
increase as the application of ultra-high-throughput methods becomes commonplace. The primary aim of developing a new genomic standard is to ensure that those researchers generating genomes contribute to an increase in the quality and quantity of
metadata, so that interpretation and analyses of the genome
collection can be carried out in a comprehensive and unhindered
manner, especially from an ecological and environmental
perspective [<xref ref-type="bibr" rid="B2">2</xref>
]. More background information about the GSC can be found at its website <ext-link ext-link-type="uri" xlink:href="http://gensc.sf.net">http://gensc.sf.net</ext-link>
.</p>
<p>The 3rd workshop was organized by Dawn Field (Oxford Centre for
Ecology and Hydrology) and Tatiana Tatusova (National Center for
Biotechnology Information) and took place at the National
Institute for Environmental <italic>e</italic>
Science (NIE<italic>e</italic>
S) in Cambridge, England, on 11–13 September, 2006. Participants included
developers of community-based standards, computer scientists,
researchers building genomic databases and conducting large-scale
comparative genomic analyses, and biologists from various
disciplines who are applying genomic data in their own settings.
These participants included representatives of major sequence databases
(DDBJ/EMBL/NCBI) and sequencing centres (JGI/Sanger/TIGR), a
combination which proved essential for building the future roadmap
for the GSC. The workshop built upon the previous two workshops,
the first of which [<xref ref-type="bibr" rid="B1">1</xref>
] led to the formation of the GSC and the second of which [<xref ref-type="bibr" rid="B3">3</xref>
] aided its integration
with the wider “OMICS” standardization community.</p>
<p>The workshop began with an introduction from the organizers. Dawn
Field (Oxford Centre for Ecology and Hydrology) welcomed returning
and new participants and emphasized the need to place GSC
activities within the context of wider international
standardization activities, many of which were represented by
speakers at this meeting. Tatiana Tatusova (NCBI Entrez Genomes)
further set the context for the event by relating her memories of
the phenomenal growth in the number of genomes over the past 10
years. She also underscored the need to work with the wider
community, highlighting the recent ASM/NCBI Workshop on Microbial
Genome Annotation, Washington, DC, USA and the National Academy of
Sciences study of metagenomics as two examples of recent allied initiatives.</p>
</sec>
<sec id="sec2"><title>2. SESSION I: OVERVIEW OF OUR CURRENT AND
FUTURE GENOME COLLECTION</title>
<p>As Dave Ussery (Technical University Denmark), the session chair,
stated in his introduction, the first session was designed to
“remind everyone of the problem.” Sandie Baldauf (University of
York) kicked off the meeting with an overview of eukaryotic
diversity by reviewing current understanding of the eukaryotic
tree of life [<xref ref-type="bibr" rid="B4">4</xref>
]. For each of the eight major lineages of
eukaryotes, she described the salient features of representative
species and presented an estimate of the number of finished and
future genomes that would be available for specific taxa. To date,
animals and fungi remain the best sampled taxa by far, while the majority of eukaryotic lineages are represented only by an EST project, or not at all.
Single-celled eukaryotes were highlighted as fascinating, not only
because of their unusual molecular biology (e.g., ciliates have
massively scrambled genes, euglenoid plastid genomes have
twintrons (introns within introns) and trypanosomes have massive
RNA editing of mitochondrial transcripts), but also because of
their intriguing biological features (e.g., dinoflagellates cause
various types of toxic shellfish poisoning and produce the most
potent toxins known to science). Eukaryotic diversity will remain
under-sampled for the near future, but numbers of known species
are expected to increase rapidly. Eukaryotic microbial genomics is
only now beginning its exponential growth phase just as bacterial
microbial genomics did 10 years ago. Difficulties arise in the
selection of eukaryotic genomes as some, like many protists, have
extraordinarily large genomes and include large quantities of repetitive DNA.</p>
<p>Rob Edwards (San Diego State University) started his talk with
slides of a sampling trip to Christmas Island, explaining this was
the reason he missed the first GSC workshop. Rob proceeded to
describe a range of metagenomic data sets from a variety of
environments that have been generated with 454 pyrosequencing
technology. In total, Rob has collected information from 71
libraries (12 from collaborators), 2 of which were published, and
12 of which were in the INSDC databases at the time of the
workshop. He remarked that while the rate of sequencing has
increased tremendously, the average read length has not and is
currently 103 bp. Short-read length and massive
amounts of data continue to make the informatics of 454 data sets
challenging. However, Rob showed several examples where such data
is providing insights into the genes and functions of organisms
from a range of habitats (see, e.g., [<xref ref-type="bibr" rid="B5">5</xref>
]). With the
growth in environmental metagenomics projects, he stressed the
importance of including global positioning system (GPS)
coordinates (latitude and longitude) for each sample. These are
critical fields, already found in the MIGS specification and
supported by the optional “/lat_lon”
qualifier in INSDC files. Submission to public databases is in progress, and all datasets are available from
<ext-link ext-link-type="uri" xlink:href="http://scums.sdsu.edu/">http://scums.sdsu.edu/</ext-link>
. For further reading, Rob has authored a white paper on random community genomics [<xref ref-type="bibr" rid="B6">6</xref>
].</p>
<p>George Kowalchuk (Netherlands Institute of Ecology) discussed the
Dutch Ecogenomics program
(<ext-link ext-link-type="uri" xlink:href="http://www.ecogenomics.nl">http://www.ecogenomics.nl</ext-link>
) a cooperative
effort of institutions and companies financed by natural gas tax
revenues and overseen by the Netherlands Genomics Initiative
(<ext-link ext-link-type="uri" xlink:href="http://www.genomics.nl">http://www.genomics.nl</ext-link>
). Molecular methods in
microbial ecology of soils are starting to answer the simple
question of “what is there?” and it is proposed that integrated
(meta-)genomics approaches will start to provide a greater
understanding of the more important question of “what are they
doing?” The major themes of the program are bioremediation,
ecological insurance, ecotoxicology and disease suppression
(health interactions), all of which are brought together via
overarching bioinformatics and technological platforms. He
stressed that there is a growing need to work on a “microbially
relevant” scale to pick apart the biology of complex communities
and to do so, his group is currently using targeted metagenomic
approaches and sequencing of key taxa within characterized
communities. Such studies will help characterize the normal
operating range of life support functions via an integrated
ecogenomics approach and the study of the interactions of internal
and external stress factors.</p>
<p>Paul Gilna (University of California, San Diego) gave an
introductory talk about a new project to build a community
resource for microbial ecologists who use metagenomics to study
natural diversity. The recently launched “Community
Cyberinfrastructure for Advanced Marine Microbial Ecology Research
and Analysis” project, or CAMERA, has a five-year grant of
24.5 m from the Moore Foundation to build the computational
infrastructure required for large-scale analyses of metagenomic
data sets, with special emphasis on the global ocean survey (GOS)
samples from the Sorcerer II voyage. In addition to sequences, he
pointed out that each sample site could be linked to vast
quantities of other data, including terabytes of satellite data.
Metadata that has been captured for the GOS samples includes
information on the site, sampling, and experimental parameters
(i.e., filter applied to separate different size organisms prior
to sequencing or insert size). He stressed the need to learn from
history and to remember that the growth of databases in the coming
years is not linear. CAMERA intends to use next generation
computational infrastructure including tiled-wall
videoconferencing rooms, the lambda rail (10 GB network), and
the TeraGrid (1000s of CPUs) to provide access to data for
metagenomic researchers across the US and beyond.</p>
<p>The clear theme to emerge from the opening session was a sense of
the vast number of genomes and metagenomes that will be available
in the near future, the potential this technology offers to better
understand the natural world, and the wide range of technological
advances that will be derived from these efforts. There was a
general feeling that the global genomics initiative was comparable
to the space race of the 20th century and the overall social and
economic benefits would be as great or greater. As such, it sets
the stage for a further set of presentations on how the
international community can ensure that this data can be dealt
with and used at its full potential.</p>
</sec>
<sec id="sec3"><title>3. SESSION II: DATABASES AND METADATA CAPTURE
AND EXCHANGE EFFORTS</title>
<p>George Garrity (Michigan State University) chaired the next
session on international metadata capture and exchange efforts.
Tatiana Tatusova (NCBI) spoke on the sequencing project registry
and how information about genome sequencing projects will be
exchanged between collaborators using a web services protocol.
Persistent identifiers for genomes and genes are part of the
essential infrastructure for the future organization of the
complete genome collection [<xref ref-type="bibr" rid="B7">7</xref>
]. After presenting on the annotation of the complete <italic>E. coli</italic>
K-12 genome and
genomic resources at DDBJ, Yoshio Tateno (DDBJ) spoke about the
systematic evaluation and classification of the predicted proteins
in the complete bacterial genomes in the INSDC. In the Gene Trek
in Prokaryote Space (GTPS) project, proteins in the bacterial
genomes have first been predicted using Glimmer and RBSfinder, and
then evaluated and classified by BLASTP and InterPro into six
grades. The predicted proteins have then been further compared
with all genes in the bacterial division of the INSDC. The results
of the comparison were also used for the evaluation and
classification. Among all predicted proteins (1,254,150), 556,815
were evaluated as currently reliable ones. The methods and results
of GTPS are presented at <ext-link ext-link-type="uri" xlink:href="http://gtps.ddbj.nig.ac.jp">http://gtps.ddbj.nig.ac.jp</ext-link>
.</p>
<p>Natalia Maltsev (Argonne National Laboratory) was present at the
first GSC workshop but could not attend this meeting, therefore
Dawn Field presented the Maltsev lab's new project to make genomic
annotations freely available in GFF3 format. The repository can be
found at <ext-link ext-link-type="ftp" xlink:href="ftp://ftp.mcs.anl.gov/pub/compbio/PUMA2/gff/gff_files">ftp://ftp.mcs.anl.gov/pub/compbio/PUMA2/gff/gff_files</ext-link>
.
A reoccurring theme throughout the workshop was the strong desire
of members of the GSC to see downstream analyses held in various
databases seamlessly integrated with INSDC files to produce an
integrated source of information about genomes. The group agreed
that GFF3 was a viable approach that should be supported but that
a significant amount of community consensus-building would have to
precede such activities as it is possible to create GFF-compliant
files that are not easily integrated because, for example, they
use different sets of features or optional fields.</p>
<p>At the end of this session Peter Sterk (EBI), Dawn Field (CEH
Oxford), and Tanya Gray (CEH Oxford) presented an overview of the
current status of the MIGS specification, its implementation as an
XML schema, and a demonstration of the alpha release of the Genome
Catalogue (GCat) software. This introduction was aimed at
providing a background for discussions on Day 2. Progress since
the last two workshops has included the following.
<list list-type="roman-lower"><list-item><p>Launch of the GSC website: <ext-link ext-link-type="uri" xlink:href="http://gensc.sf.net">http://gensc.sf.net</ext-link>
.</p>
</list-item>
<list-item><p>The publication of a special issue of the journal
<italic>OMICS</italic>
: a journal of integrative biology [<xref ref-type="bibr" rid="B8">8</xref>
], which included contributions by GSC members and the meeting report from
the 2nd GSC workshop [<xref ref-type="bibr" rid="B3">3</xref>
].</p>
</list-item>
<list-item><p>The drafting of MIGS 1.0 checklist and implementation as an XML schema
(with the initiation of suitable controlled vocabularies).</p>
</list-item>
<list-item><p>Alpha release of the Genome Catalogue (GCat) software.</p>
</list-item>
</list>
</p>
<p>In brief, the current version of MIGS that emerged from the first two
GSC workshops has now been implemented as an XML schema for
the purpose of discussing the information to be captured. The GCat
software has been developed to provide a web interface that is
generated “on-the-fly” from an underlying XML schema. GCat
is designed to have a low development overhead which makes it
especially useful in the short term while the MIGS specification
is in flux. The benefit of this early implementation is that the
GSC can support both the discussion of MIGS with case study
genomes and the collection of MIGS-compliant genome reports. GCat
has been developed in collaboration with the GSC implementation working group.</p>
</sec>
<sec id="sec4"><title>4. SESSION III: ALLIED PROJECTS AND
ONTOLOGY DEVELOPMENT</title>
<p>On the second day of the workshop, the focus shifted to allied
projects that are already leading the way in the area of the
standardization and integration of biological information. Dawn
Field chaired a session on a series of such projects, which had
all been selected for their immediate relevance to the GSC and its
future aims. George Garrity (Michigan State University) presented
the NamesforLife (N4L) (<ext-link ext-link-type="uri" xlink:href="http://www.names4life.com">http://www.names4life.com</ext-link>
)
project [<xref ref-type="bibr" rid="B9">9</xref>
], a prototype of which is accessible via the DOI resolver (<ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1601/tx.0">http://dx.doi.org/10.1601/tx.0</ext-link>
). This prototype aims to disambiguate and future-proof biological nomenclature by combating the knowledge bleed that occurs when
information dispersed in the scientific literature and databases
is no longer accessible because key search terms (names) and
concepts (taxa) have changed over time. The N4L technology is
based on a semantic resolution service that couples Digital Object
Identifiers (DOIs) with an ontology that expresses nomenclatural
acts and taxonomic concepts as a collection of XML information
objects that are persistently addressable and resolve
nomenclatural acts in a contemporaneous manner. A key benefit of
using DOIs is the ease of integration with the published
literature, databases, and other electronic sources of information
that have already adopted this standard.</p>
<p>George also spoke briefly about a related initiative, lead by Rick
Stevens (Argonne National Laboratories) and Eddy Rubin (Joint
Genome Institute) to produce draft genome sequences for all of the
taxonomic-type strains of prokaryotes. The project would take
approximately five years to complete, provide much needed reference
genomes that are essential for correct assembly of metagenomes,
fill existing phylogenetic gaps, and provide a foundation to
meaningful linkage to vast amounts of data, information, and
knowledge about these organisms that would significantly enhance
inference. The obvious benefits of the proposal were immediately
seized upon by participants.</p>
<p>Chris Taylor (European Bioinformatics Institute) described the new
MIBBI project, an initiative aimed at bringing the MIxxx
community of “Minimum Information” checklist developers together
to create a unified source of “OMICS” checklists
(<ext-link ext-link-type="uri" xlink:href="http://mibbi.sf.net">http://mibbi.sf.net</ext-link>
). The future goal of the project is to
formulate an MIBBI Foundry in which participants will commit
themselves to the integration of the ever-growing list of
checklists such that the community can work towards multiomic
standards. MIBBI has been driven by the Protein Standards
Initiative (PSI), the Reporting Structures for Biological
Investigations (RSBI), and the GSC. It represents a valuable
opportunity for the GSC to work more closely with a wide range of
standardization activities in the “OMICS” and allied sciences.</p>
<p>The next three talks focused on international efforts at ontology
development. Michael Ashburner (University of Cambridge) gave the
history of ontology work that has arisen from the development of
the Gene Ontology (GO). From the beginning, one “problem” with
GO was that it contained many other implicit ontologies—chemical
compounds, for example. Over time, an increasing number of
ontologies appeared, and GO developers became concerned that each
was being developed independently. The GO developers essentially
wanted a one-stop-shop, and established the Open Biomedical
Ontologies (OBO) Library as a sourceforge site
(<ext-link ext-link-type="uri" xlink:href="http://obo.sf.net">http://obo.sf.net</ext-link>
), encouraging colleagues to submit their ontologies to the collection. Now the
number of registered ontologies has increased to more than fifty
and OBO will be taken over by the recently founded NIH-funded
National Center for Biomedical Ontologies (NCBO). Through this
funding it will be possible to add more functions and services to
OBO. Within a year, compound terms like “myoblast fusion” should
be deconvoluted by explicitly referencing orthogonal ontologies
(in this case the cell-type ontology). Finally, the OBO-Foundry is
an effort to propagate best practices and to develop truly orthogonal ontologies.</p>
<p>Trish Whetzel (University of Pennsylvania) presented an overview
of the ontology for biomedical investigations (OBI, previously
known as the ontology for functional genomics investigation, or
FuGO). OBI aims to provide an ontology for the unambiguous
description of the components of biomedical (biological)
investigations including the design, protocols and
instrumentation, material, data, and types of analyses used. The
application of this ontology to the annotation of a wide range of
investigations would allow consistent annotation of data across
technological and biological domains, thus enabling powerful
concept-driven queries over the data. She presented an overview of
which parts of the MIGS specification could be placed within OBI.</p>
<p>Additionally, phenotypic descriptions in MIGS could use the newly
established phenotype and trait ontology (PATO), described by Suzi
Lewis (Berkeley). The development of this ontology has been driven
by the need to help the biomedical community describe the
phenotypes associated with specific genes in different taxa, but
is now becoming wider in scope due to interest from a variety of
communities. Both OBI and PATO are part of the OBO Foundry, which
aims to provide a unified set of ontologies that can explicitly
describe organisms and their molecules, phenotypes, and traits.
The day when the semantic resolution espoused by OBO becomes
possible is drawing ever nearer.</p>
<p>The last speaker in the session was Frank Oliver Glöckner (Max
Planck Institute for Marine Microbiology) who discussed the need
to place sequences into their proper environmental context (e.g.,
marine, terrestrial, symbiotic). He suggested the exact location
(GPS), depth (altitude), and time (x, y, z, t) of any sample be
taken in any molecular field study. This geospatial information
can then be used as a universal anchor to allow for sequence data
in the context of prevailing biodiversity and habitat parameters.
It will also allow supplementing the on-site information with
dynamic data layers from global monitoring systems leading to an
integrated ecosystem assessment.</p>
<p>He introduced the International Census of Marine Microbes (ICoMM)
initiative (<ext-link ext-link-type="uri" xlink:href="http://icomm.mbl.edu">http://icomm.mbl.edu</ext-link>
) as an additional source of geo-referenced data for microbial diversity, detailed the Metafunctions project
(<ext-link ext-link-type="uri" xlink:href="www.metafunctions.org">www.metafunctions.org</ext-link>
) that integrates genomic information with habitat parameters, and described the design and use of the Megx.net database [<xref ref-type="bibr" rid="B10">10</xref>
]. Furthermore, he introduced “Minimum Information about a Metagenomic Sequence” (MIMS) as an integrated extension of MIGS
(<ext-link ext-link-type="uri" xlink:href="http://gensc.sf.net">http://gensc.sf.net</ext-link>
). In addition to the core information captured in MIGS on latitude, longitude, depth (altitude), time, and date of sampling, MIMS would capture a more extensive list of habitat parameters that provide a rich set of contextual data for the sake
of hypothesis generation and testing as well as ecosystems biology.</p>
<p>In the absence of Nikos Kyrpides (Joint Genome Institute), Dawn
Field briefly demonstrated the Genomes Online Database (GOLD v2.0)
[<xref ref-type="bibr" rid="B11">11</xref>
], in particular pointing out the new search engine, the inclusion of the descriptors phenotype, ecotype, disease, project
relevance and availability and their controlled vocabularies.
Nikos reports that while many authors submit data directly to
GOLD, he still curates a large amount of data and is keen to have
community input. The group expressed interest in having these
controlled vocabularies made available to the wider community. It
also underscored the value of GOLD as an authoritative genomic
database with an extensive user community that should be tightly
integrated into any future GSC strategy.</p>
</sec>
<sec id="sec5"><title>5. SESSION IV: GROUP DISCUSSION OF THE MIGS SPECIFICATION, THE GENOME
CATALOGUE, AND FUTURE TERM CAPTURE ACTIVITIES</title>
<p>This session marked the shift in the workshop from formal
presentations to group discussion. All participants moved to a
computer room where each had an access to a computer for the purpose
of evaluating the GSC website and the Genome Catalogue. The
session was led by Dawn Field and Jeremy Selengut (TIGR) and started with
a discussion of the MIGS specification. In particular, the group
focused on fields which were candidates for removal from the
current specification, which helped the group to better define the
general scope of the specification. As a result, the GSC agreed
that all fields must meet the following criteria:
<list list-type="roman-lower"><list-item><p>to be an <italic>appropriate extension</italic>
of existing INSDC qualifiers
and information collected in the INSDC Project Metadata database;</p>
</list-item>
<list-item><p>to consist of <italic>objective facts</italic>
about
genomic investigations (information that, ideally, the generators
of a genome can best provide, but this does not exclude input by
relevant experts);</p>
</list-item>
<list-item><p>to contain <italic>specific information</italic>
about the genome sequenced,
while general information (e.g., about a species) should be held in authoritative databases;</p>
</list-item>
<list-item><p>to include <italic>clearly defined</italic>
pieces of information
using values selected from controlled vocabularies.</p>
</list-item>
</list>
</p>
<p>It was clear from initial discussions that each part of the MIGS
specification was of varying importance to each researcher. To get
a good overview of the importance of each field in the
specification, a lightning round vote was taken for all fields in
the specification, and the number of votes recorded. It was found
that a few clear cases could be made for dropping or compressing
fields by using them as controlled vocabulary terms in other
fields of a more general nature. A complete list of modifications
used to produce MIGS v1.1 following this workshop can be found in
the GSC Wiki under “MIGS Change Log”
(<ext-link ext-link-type="uri" xlink:href="http://gensc.sf.net">http://gensc.sf.net</ext-link>
).</p>
<p>The discussions then shifted to the issues surrounding the use of
the Genome Catalogue by the GSC and the development of future
content. Jeremy Selengut talked about how it would be
possible, using an intelligent interface, to step users through
the input of data far more easily. By presenting more general
questions first, users could be guided by relevant,
context-dependent input forms. For example, users who selected
“draft” genome would then be prompted to fill in information for
“estimated size of genome” while those who selected “complete” genome would not. Similarly, submitters of metagenomic data would not be burdened with questions only relevant to single-isolate
studies and vice-versa.</p>
<p>Rob Edwards (SDSU) then presented an excellent case study for the
GSC by relating his experiences with metadata capture for his
collection of metagenomic libraries. This collection of data sets
makes an excellent case study. His take-home message is that
researchers cannot be expected to comply with standards of
annotation that are presented <italic>post hoc</italic>
. Rather, the best
chance of gaining compliance is to have such standards at the
start of experimental work. Rob also stressed that, as a potential
adopter of such a system, he would not be willing to enter data
two or more times. He emphasized the need for having no redundancy
in the submission procedure developed for users (e.g., to INSDC
and GCat), which would require a tight linkage between submission forms.</p>
<p>The group followed this with a discussion of the offer made prior
to the workshop by EMBL participants Guy Cochrane and Bob Vaughan
to enter the EMBL genomes into GCat for the purpose of generating
useful content which might encourage authors to submit further
information. Tatiana Tatusova (NCBI) offered to do the same for
the NCBI genome collection. It was agreed that doing so would
allow the GSC to quantify the amount of optional information
(e.g., lat_lon) that is already available in INSDC fields and
make it possible to work together towards a single, global list of
genomes and metagenomes in the public domain.</p>
<p>Finally, the group briefly discussed the capture of terms in
genome reports and agreed to continue work towards the posting of
controlled vocabularies already in use by the community to the GSC
website. All terms used to complete MIGS-compliant genome reports
will be submitted by default to OBI [<xref ref-type="bibr" rid="B12">12</xref>
] unless a more relevant ontology already exists.</p>
</sec>
<sec id="sec6"><title>6. DAY 3: ROADMAP AND WRAP-UP DISCUSSIONS</title>
<p>To start the day, Peter Sterk led a panel discussion with members
of the INSDC. The INSDC was represented by Bob Vaughan and Guy
Cochrane of EMBL, Tatiana Tatusova of the NCBI, and Yoshio Tateno
of the DDBJ. The INSDC has a long history of describing nucleotide
sequences and is now dedicating substantial efforts to building
custom solutions for managing genomic data [<xref ref-type="bibr" rid="B7">7</xref>
]. Guy Cochrane began the session by giving an introduction to the INSDC and
outlined how the collaborators come together each May to hold an
annual meeting in which formal proposals for changes to INSDC
policy can be considered. It was agreed that Guy, through EMBL, would
take forward an agenda item to present the MIGS specification at
the May 2007 meeting (it had already been briefly introduced in
May 2006) and report back to the GSC.</p>
<p>The main issue addressed in this panel session was that of the
“MIGS-to-INSDC” mapping, which provides a defined way for
information in MIGS to be formatted for inclusion in
EMBL/DDBJ/Genbank documents. Developed by Bob and Guy and approved
by the INSDC, this mapping places each MIGS field into the
official INSDC feature table (see
<ext-link ext-link-type="uri" xlink:href="http://www.ebi.ac.uk/embl/WebFeat/index.html">http://www.ebi.ac.uk/embl/WebFeat/index.html</ext-link>
).
The most frequently used optional qualifier in the mapping is
/isolation_source. When many fields in MIGS go into a single
qualifier, they will be written out as modifiers of feature qualifiers (e.g.,
/isolation_source=“altitude: 1500 M” or
/note=“ploidy level: tetraploid”). It was further
discussed that any field not already mapped into an INSDC
qualifier could be placed into community-regulated structured
comments (using the convention of tag-value pairs) by the
submitters of the original sequences. Guy Cochrane also raised the
issue of some MIGS fields becoming a formal part of the INSDC
optional source qualifiers. He suggested that EMBL would take
forward a proposal to add “health/disease status of host” to the
next INSDC Collaborators meeting in May 2007.</p>
</sec>
<sec id="sec7"><title>7. THE GSC ROADMAP</title>
<p>The final session of the meeting was moderated by the GSC
coordinators (George Garrity, Nick Thomson, Jeremy Selengut, Peter
Sterk, Tatiana Tatusova, and Dawn Field). This session was
dedicated to summarizing agreed action points and building
consensus on the way forward. During the discussions on Day 2,
Paul Gilna (UCSD) observed that the GSC is well placed to lead by
example on the issue of adoption of MIGS. This fact came into
focus on Day 3 when the GSC agreed that, as part of developing a
presence in the genomics community, members would work to develop
a logo, advertise the GSC website, advertise the GSC goals and
aims in relevant public presentations, talk to their home
institutions about adoption of MIGS, and request official
permission to use the logos of participating projects and
institutions on the GSC website. Perhaps most importantly GSC
participants agreed to complete MIGS-compliant genome reports.</p>
<p>In brief, based on workshop discussions the GSC has developed the
following ten-point Roadmap.</p>
<p>(1) <italic>Update MIGS to version 1.1 before genome reports are
accepted and post to the website for further community
consultation.</italic>
Now available on the web, this version is more
streamlined and strongly typed for the sake of future validation
(e.g., selection from a controlled vocabulary is now expected for most values).</p>
<p>(2) <italic>Implement GCat identifiers</italic>
. The group agreed they
should take the form <italic>NNNNNN</italic>
_GCAT (where <italic>N</italic>
is a number from 0 to 9) to avoid any confusion with INSDC accession numbers. In
taking this step, the GSC has paved the way towards creation and
adoption of a community infrastructure for supporting MIGS compliance.</p>
<p>(3) <italic>Produce a production version of GCat ready to accept
published genome reports. </italic>
This is available at
<ext-link ext-link-type="uri" xlink:href="http://gensc.sf.net">http://gensc.sf.net</ext-link>
.</p>
<p>(4) <italic>Develop guidelines for the submission of genomes
reports. </italic>
These guidelines will emphasize that genomes should be
submitted first to the INSDC and that the GSC then recommends that
INSDC refer authors to GCat.</p>
<p>(5) <italic>Actively work to generate MIGS-compliant genome
reports</italic>
. The first valid reports are in the catalogue and the GSC
will work with curators at key institutions (EBI, NCBI, JGI, TIGR,
Sanger Institute) to curate further reports.</p>
<p>(6) <italic>Build a batch upload facility into GCat</italic>
. This is
required to allow GCat to deal with EMBL- and NCBI-produced lists
of information about their genomes for the sake of populating GCat
with content, quantifying the amount of optional information
already associated with these genomes (e.g., the lat_lon
qualifier) and working towards producing a single, global list of
genomes in the public domain. Likewise, it will be necessary to
support the future submission of Rob Edward's complete set of
metagenomic datasets and, subsequent to the meeting, the full set
of Sanger Institute genomes volunteered by Nick Thomson.</p>
<p>(7) <italic>Develop a policy on ownership of the contents of the
genome reports</italic>
. It was agreed that all data should be placed into
the public domain as soon as deposited, and that a system of
curation by the GSC and social tagging by any member of the
community should be developed. Tatiana Tatusova (NCBI) stated that
all data <italic>must</italic>
be completely open access for NCBI's participation.</p>
<p>(8) <italic>Seek funding to help support the implementation and
adoption of MIGS</italic>
. Critical to the future success of this
initiative will be the capacity of the GSC to find resources for
curation (e.g., aid submitters, validate submissions, work with
the INSDC, make sure information content stays compatible with
changes in the MIGS specification and the introduction of new
controlled vocabulary terms/ontologies). Although curation-based
activities are difficult to fund, they are vital for the success
of the project, and to the community as a whole. The cost of data
curation by individual researchers is difficult to estimate, but
considerably higher, both in financial and productivity terms.
There is a pressing need for community to work with vetted
datasets. Matt Kane (National Science Foundation) spoke briefly on
the Research Coordination Network program offered by NSF which
could be an opportunity to pursue for further support of
networking, workshops, and related activities. The GSC agreed to
explore this option further.</p>
<p>(9) <italic>Return to NIEeS in 2007 for a 4th workshop</italic>
. Ideally,
all of the above activities would be completed or well under way
at the time of the 4th workshop and the GSC would be able to
extend its Roadmap accordingly.</p>
<p>(10) <italic>Complete MIGS 2.0</italic>
. We plan to complete MIGS 2.0, a
production version of MIGS complete with an appropriate set of
terms formalized within OBI [<xref ref-type="bibr" rid="B12">12</xref>
] and
other relevant ontologies, and a significantly improved version of the Genome Catalogue, by October 2007.</p>
<p>In conclusion, this workshop has produced an improved version of
MIGS (v1.1), an updated XML schema (v1.1), consensus on a wide
range of features to add to GCat, and further actions within the
group that support the generation and submission of MIGS-compliant
genome reports to GCat. Since the workshop, GCat identifiers have
been implemented and the first MIGS-compliant genome reports for
published and unpublished projects have been submitted
[<xref ref-type="bibr" rid="B13">13</xref>
–<xref ref-type="bibr" rid="B16">16</xref>
]. A variety of value-added features have also been developed within the Genome Catalogue including the ability to view genomes on a map based on their latitude and longitude and the ability to access information using REST style web services.
If the GSC can meet its target of producing the infrastructure
required to support MIGS (specification, a working repository, and
access to appropriate terms) it should put the community in a
stronger position to push for enforcement of compliance. The GSC
continues to make its open call for support and involvement in
this initiative. The GSC welcomes new members, links to new
projects, and researchers willing to describe the genomes with the
submission of MIGS-compliant genome reports as part of the further
development of this project. Anyone interested in knowing more
about or joining this effort is encouraged to contact any of the
coordinators or join the GSC mailing lists
(<ext-link ext-link-type="uri" xlink:href="http://gensc.sf.net">http://gensc.sf.net</ext-link>
).</p>
</sec>
</body>
<back><ack><title>ACKNOWLEDGMENTS</title>
<p>The authors acknowledge the invaluable contributions of all of the
participants who attended the workshop. This workshop was funded
and hosted by NIE<italic>e</italic>
S and an NERC International Opportunities Fund
Award (NE/3521773/1) to the first author.</p>
</ack>
<ref-list><ref id="B1"><label>1</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Field</surname>
<given-names>D</given-names>
</name>
<name><surname>Garrity</surname>
<given-names>G</given-names>
</name>
<name><surname>Morrison</surname>
<given-names>N</given-names>
</name>
<etal></etal>
</person-group>
<article-title>Meeting report: eGenomics: cataloguing our complete genome collection I</article-title>
<source><italic>Comparative and Functional Genomics</italic>
</source>
<year>2006</year>
<volume>6</volume>
<fpage>357</fpage>
<lpage>362</lpage>
<pub-id pub-id-type="pmid">18629203</pub-id>
</element-citation>
</ref>
<ref id="B2"><label>2</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Martiny</surname>
<given-names>JBH</given-names>
</name>
<name><surname>Field</surname>
<given-names>D</given-names>
</name>
</person-group>
<article-title>Ecological perspectives on the sequenced genome collection</article-title>
<source><italic>Ecology Letters</italic>
</source>
<year>2005</year>
<volume>8</volume>
<issue>12</issue>
<fpage>1334</fpage>
<lpage>1345</lpage>
</element-citation>
</ref>
<ref id="B3"><label>3</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Field</surname>
<given-names>D</given-names>
</name>
<name><surname>Morrison</surname>
<given-names>N</given-names>
</name>
<name><surname>Selengut</surname>
<given-names>J</given-names>
</name>
<name><surname>Sterk</surname>
<given-names>P</given-names>
</name>
</person-group>
<article-title>Meeting report: eGenomics: cataloguing our complete genome collection II</article-title>
<source><italic>OMICS: A Journal of Integrative Biology</italic>
</source>
<year>2006</year>
<volume>10</volume>
<issue>2</issue>
<fpage>100</fpage>
<lpage>104</lpage>
<pub-id pub-id-type="pmid">16901213</pub-id>
</element-citation>
</ref>
<ref id="B4"><label>4</label>
<element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Baldauf</surname>
<given-names>SL</given-names>
</name>
<name><surname>Bhattacharya</surname>
<given-names>D</given-names>
</name>
<name><surname>Cockrill</surname>
<given-names>J</given-names>
</name>
<name><surname>Hugenholtz</surname>
<given-names>P</given-names>
</name>
<name><surname>Pawlowski</surname>
<given-names>J</given-names>
</name>
<name><surname>Simpson</surname>
<given-names>AGB</given-names>
</name>
</person-group>
<person-group person-group-type="editor"><name><surname>Cracraft</surname>
<given-names>J</given-names>
</name>
<name><surname>Donoghue</surname>
<given-names>MJ</given-names>
</name>
</person-group>
<article-title>The tree of life: an overview</article-title>
<source><italic>Assembling the Tree of Life</italic>
</source>
<year>2004</year>
<publisher-loc>Oxford, UK</publisher-loc>
<publisher-name>Oxford University Press</publisher-name>
<fpage>43</fpage>
<lpage>75</lpage>
</element-citation>
</ref>
<ref id="B5"><label>5</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Edwards</surname>
<given-names>RA</given-names>
</name>
<name><surname>Rodriguez-Brito</surname>
<given-names>B</given-names>
</name>
<name><surname>Wegley</surname>
<given-names>L</given-names>
</name>
<etal></etal>
</person-group>
<article-title>Using pyrosequencing to shed light on deep mine microbial ecology under extreme hydrogeologic conditions</article-title>
<source><italic>BMC Genomics</italic>
</source>
<year>2006</year>
<volume>7</volume>
<fpage>57</fpage>
<pub-id pub-id-type="pmid">16549033</pub-id>
</element-citation>
</ref>
<ref id="B6"><label>6</label>
<element-citation publication-type="other"><person-group person-group-type="author"><name><surname>Edwards</surname>
<given-names>R</given-names>
</name>
</person-group>
<article-title>Random Community Genomics</article-title>
<year>2006</year>
<comment><ext-link ext-link-type="uri" xlink:href="http://phagesdsuedu/~rob/">http://phagesdsuedu/∼rob/</ext-link>
.</comment>
</element-citation>
</ref>
<ref id="B7"><label>7</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Morrison</surname>
<given-names>N</given-names>
</name>
<name><surname>Cochrane</surname>
<given-names>G</given-names>
</name>
<name><surname>Faruque</surname>
<given-names>N</given-names>
</name>
<etal></etal>
</person-group>
<article-title>Concept of sample in OMICS technology</article-title>
<source><italic>OMICS: A Journal of Integrative Biology</italic>
</source>
<year>2006</year>
<volume>10</volume>
<issue>2</issue>
<fpage>127</fpage>
<lpage>137</lpage>
<pub-id pub-id-type="pmid">16901217</pub-id>
</element-citation>
</ref>
<ref id="B8"><label>8</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Field</surname>
<given-names>D</given-names>
</name>
<name><surname>Sansone</surname>
<given-names>S-A</given-names>
</name>
</person-group>
<article-title>A special issue on data standards</article-title>
<source><italic>OMICS: A Journal of Integrative Biology</italic>
</source>
<year>2006</year>
<volume>10</volume>
<issue>2</issue>
<fpage>84</fpage>
<lpage>93</lpage>
</element-citation>
</ref>
<ref id="B9"><label>9</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Garrity</surname>
<given-names>G</given-names>
</name>
<name><surname>Lyons</surname>
<given-names>C</given-names>
</name>
</person-group>
<article-title>Future-proofing biological nomenclature</article-title>
<source><italic>OMICS: A Journal of Integrative Biology</italic>
</source>
<year>2003</year>
<volume>7</volume>
<issue>1</issue>
<fpage>31</fpage>
<lpage>33</lpage>
<pub-id pub-id-type="pmid">12831553</pub-id>
</element-citation>
</ref>
<ref id="B10"><label>10</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lombardot</surname>
<given-names>T</given-names>
</name>
<name><surname>Kottmann</surname>
<given-names>R</given-names>
</name>
<name><surname>Pfeffer</surname>
<given-names>H</given-names>
</name>
<etal></etal>
</person-group>
<article-title>Megx.net—database resources for marine ecological genomics</article-title>
<source><italic>Nucleic Acids Research</italic>
</source>
<year>2006</year>
<volume>34</volume>
<issue>Database issue</issue>
<fpage>D390</fpage>
<lpage>D393</lpage>
<pub-id pub-id-type="pmid">16381894</pub-id>
</element-citation>
</ref>
<ref id="B11"><label>11</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liolios</surname>
<given-names>K</given-names>
</name>
<name><surname>Tavernarakis</surname>
<given-names>N</given-names>
</name>
<name><surname>Hugenholtz</surname>
<given-names>P</given-names>
</name>
<name><surname>Kyrpides</surname>
<given-names>NC</given-names>
</name>
</person-group>
<article-title>The Genomes On Line Database (GOLD) v.2: a monitor of genome projects worldwide</article-title>
<source><italic>Nucleic Acids Research</italic>
</source>
<year>2006</year>
<volume>34</volume>
<issue>Database issue</issue>
<fpage>D332</fpage>
<lpage>D334</lpage>
<pub-id pub-id-type="pmid">16381880</pub-id>
</element-citation>
</ref>
<ref id="B12"><label>12</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Whetzel</surname>
<given-names>PL</given-names>
</name>
<name><surname>Brinkman</surname>
<given-names>RR</given-names>
</name>
<name><surname>Causton</surname>
<given-names>HC</given-names>
</name>
<etal></etal>
</person-group>
<article-title>Development of FuGO: an ontology for functional genomics investigations</article-title>
<source><italic>OMICS: A Journal of Integrative Biology</italic>
</source>
<year>2006</year>
<volume>10</volume>
<issue>2</issue>
<fpage>199</fpage>
<lpage>204</lpage>
<pub-id pub-id-type="pmid">16901226</pub-id>
</element-citation>
</ref>
<ref id="B13"><label>13</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Angly</surname>
<given-names>FE</given-names>
</name>
<name><surname>Felts</surname>
<given-names>B</given-names>
</name>
<name><surname>Breitbart</surname>
<given-names>M</given-names>
</name>
<etal></etal>
</person-group>
<article-title>The marine viromes of four oceanic regions</article-title>
<source><italic>PLoS Biology</italic>
</source>
<year>2006</year>
<volume>4</volume>
<issue>11</issue>
<fpage>2121</fpage>
<lpage>2131</lpage>
</element-citation>
</ref>
<ref id="B14"><label>14</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Glöckner</surname>
<given-names>FO</given-names>
</name>
<name><surname>Kube</surname>
<given-names>M</given-names>
</name>
<name><surname>Bauer</surname>
<given-names>M</given-names>
</name>
<etal></etal>
</person-group>
<article-title>Complete genome sequence of the marine planctomycete Pirellula sp. strain 1</article-title>
<source><italic>Proceedings of the National Academy of Sciences of the United States of America</italic>
</source>
<year>2003</year>
<volume>100</volume>
<issue>14</issue>
<fpage>8298</fpage>
<lpage>8303</lpage>
<pub-id pub-id-type="pmid">12835416</pub-id>
</element-citation>
</ref>
<ref id="B15"><label>15</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rabus</surname>
<given-names>R</given-names>
</name>
<name><surname>Ruepp</surname>
<given-names>A</given-names>
</name>
<name><surname>Frickey</surname>
<given-names>T</given-names>
</name>
<etal></etal>
</person-group>
<article-title>The genome of Desulfotalea psychrophila, a sulfate-reducing bacterium from permanently cold Arctic sediments</article-title>
<source><italic>Environmental Microbiology</italic>
</source>
<year>2004</year>
<volume>6</volume>
<issue>9</issue>
<fpage>887</fpage>
<lpage>902</lpage>
<pub-id pub-id-type="pmid">15305914</pub-id>
</element-citation>
</ref>
<ref id="B16"><label>16</label>
<element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bauer</surname>
<given-names>M</given-names>
</name>
<name><surname>Kube</surname>
<given-names>M</given-names>
</name>
<name><surname>Teeling</surname>
<given-names>H</given-names>
</name>
<etal></etal>
</person-group>
<article-title>Whole genome analysis of the marine Bacteroidetes ‘Gramella forsetii’ reveals adaptations to degradation of polymeric organic matter</article-title>
<source><italic>Environmental Microbiology</italic>
</source>
<year>2006</year>
<volume>8</volume>
<issue>12</issue>
<fpage>2201</fpage>
<lpage>2213</lpage>
<pub-id pub-id-type="pmid">17107561</pub-id>
</element-citation>
</ref>
</ref-list>
</back>
</pmc>
</record>
Pour manipuler ce document sous Unix (Dilib)
EXPLOR_STEP=$WICRI_ROOT/Ticri/CIDE/explor/CyberinfraV1/Data/Pmc/Corpus
HfdSelect -h $EXPLOR_STEP/biblio.hfd -nk 0002570 | SxmlIndent | more
Ou
HfdSelect -h $EXPLOR_AREA/Data/Pmc/Corpus/biblio.hfd -nk 0002570 | SxmlIndent | more
Pour mettre un lien sur cette page dans le réseau Wicri
{{Explor lien |wiki= Ticri/CIDE |area= CyberinfraV1 |flux= Pmc |étape= Corpus |type= RBID |clé= |texte= }}
This area was generated with Dilib version V0.6.25. |