<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.1">
   <front>
      <journal-meta>
         <journal-id journal-id-type="publisher-id">peerj</journal-id>
         <journal-id journal-id-type="pmc">peerj</journal-id>
         <journal-id journal-id-type="nlm-ta">PeerJ</journal-id>
         <journal-title-group>
            <journal-title>PeerJ</journal-title>
            <abbrev-journal-title abbrev-type="publisher">PeerJ</abbrev-journal-title>
         </journal-title-group>
         <issn pub-type="epub">2167-8359</issn>
         <publisher>
            <publisher-name>PeerJ Inc.</publisher-name>
            <publisher-loc>San Diego, USA</publisher-loc>
         </publisher>
      </journal-meta>
      <article-meta>
         <article-id pub-id-type="publisher-id">6800</article-id>
         <article-id pub-id-type="doi">10.7717/peerj.6800</article-id>
         <article-categories>
            <subj-group subj-group-type="categories">
               <subject>Bioinformatics</subject>
               <subject>Genomics</subject>
               <subject>Marine Biology</subject>
               <subject>Microbiology</subject>
               <subject>Virology</subject>
            </subj-group>
         </article-categories>
         <title-group>
            <article-title>Long-read viral metagenomics captures abundant and microdiverse viral populations and their niche-defining genomic islands</article-title>
         </title-group>
         <contrib-group content-type="authors">
            <contrib id="author-1" contrib-type="author">
               <name>
                  <surname>Warwick-Dugdale</surname>
                  <given-names>Joanna</given-names>
               </name><xref ref-type="aff" rid="aff-1">1</xref><xref ref-type="aff" rid="aff-2">2</xref></contrib>
            <contrib id="author-2" contrib-type="author">
               <name>
                  <surname>Solonenko</surname>
                  <given-names>Natalie</given-names>
               </name><xref ref-type="aff" rid="aff-3">3</xref></contrib>
            <contrib id="author-3" contrib-type="author">
               <name>
                  <surname>Moore</surname>
                  <given-names>Karen</given-names>
               </name><xref ref-type="aff" rid="aff-2">2</xref></contrib>
            <contrib id="author-4" contrib-type="author">
               <name>
                  <surname>Chittick</surname>
                  <given-names>Lauren</given-names>
               </name><xref ref-type="aff" rid="aff-3">3</xref></contrib>
            <contrib id="author-5" contrib-type="author">
               <name>
                  <surname>Gregory</surname>
                  <given-names>Ann C.</given-names>
               </name><xref ref-type="aff" rid="aff-3">3</xref></contrib>
            <contrib id="author-6" contrib-type="author">
               <name>
                  <surname>Allen</surname>
                  <given-names>Michael J.</given-names>
               </name><xref ref-type="aff" rid="aff-1">1</xref><xref ref-type="aff" rid="aff-2">2</xref></contrib>
            <contrib id="author-7" contrib-type="author">
               <name>
                  <surname>Sullivan</surname>
                  <given-names>Matthew B.</given-names>
               </name><xref ref-type="aff" rid="aff-3">3</xref><xref ref-type="aff" rid="aff-4">4</xref></contrib>
            <contrib id="author-8" contrib-type="author" corresp="yes">
               <name>
                  <surname>Temperton</surname>
                  <given-names>Ben</given-names>
               </name>
               <email>b.temperton@exeter.ac.uk</email><xref ref-type="aff" rid="aff-2">2</xref></contrib>
            <aff id="aff-1"><label>1</label><institution>Plymouth Marine Laboratory</institution>, <city>Plymouth</city>, <state>Devon</state>, <country>United Kingdom</country></aff>
            <aff id="aff-2"><label>2</label><institution>School of Biosciences, University of Exeter</institution>, <city>Exeter</city>, <state>Devon</state>, <country>United Kingdom</country></aff>
            <aff id="aff-3"><label>3</label><institution>Department of Microbiology, Ohio State University</institution>, <city>Columbus</city>, <state>OH</state>, <country>United States of America</country></aff>
            <aff id="aff-4"><label>4</label><institution>Civil, Environmental and Geodetic Engineering, Ohio State University</institution>, <city>Columbus</city>, <state>OH</state>, <country>United States of America</country></aff>
         </contrib-group>
         <contrib-group content-type="editors">
            <contrib contrib-type="editor">
               <name>
                  <surname>Nelson</surname>
                  <given-names>Craig</given-names>
               </name>
            </contrib>
         </contrib-group>
         <pub-date pub-type="epub" date-type="pub" iso-8601-date="2019-04-25">
            <day>25</day>
            <month>4</month>
            <year iso-8601-date="2019">2019</year>
         </pub-date>
         <volume>7</volume>
         <elocation-id>e6800</elocation-id>
         <history>
            <date date-type="received" iso-8601-date="2018-11-15">
               <day>15</day>
               <month>11</month>
               <year iso-8601-date="2018">2018</year>
            </date>
            <date date-type="accepted" iso-8601-date="2019-03-14">
               <day>14</day>
               <month>3</month>
               <year iso-8601-date="2019">2019</year>
            </date>
         </history>
         <permissions>
            <copyright-statement>©2019 Warwick-Dugdale et al.</copyright-statement>
            <copyright-year>2019</copyright-year>
            <copyright-holder>Warwick-Dugdale et al.</copyright-holder>
            <license xlink:href="http://creativecommons.org/licenses/by/4.0/">
               <license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, reproduction and adaptation in any medium and for any purpose provided that it is properly attributed. For attribution, the original author(s), title, publication source (PeerJ) and either DOI or URL of the article must be cited.</license-p>
            </license>
         </permissions>
         <self-uri xlink:href="https://peerj.com/articles/6800"/>
         <abstract>
            <p>Marine viruses impact global biogeochemical cycles via their influence on host community structure and function, yet our understanding of viral ecology is constrained by limitations in host culturing and a lack of reference genomes and ‘universal’ gene markers to facilitate community surveys. Short-read viral metagenomic studies have provided clues to viral function and first estimates of global viral gene abundance and distribution, but their assemblies are confounded by populations with high levels of strain evenness and nucleotide diversity (microdiversity), limiting assembly of some of the most abundant viruses on Earth. Such features also challenge assembly across genomic islands containing niche-defining genes that drive ecological speciation. These populations and features may be successfully captured by single-virus genomics and fosmid-based approaches, at least in abundant taxa, but at considerable cost and technical expertise. Here we established a low-cost, low-input, high throughput alternative sequencing and informatics workflow to improve viral metagenomic assemblies using short-read and long-read technology. The ‘VirION’ (Viral, long-read metagenomics via MinION sequencing) approach was first validated using mock communities where it was found to be as relatively quantitative as short-read methods and provided significant improvements in recovery of viral genomes. We then then applied VirION to the first metagenome from a natural viral community from the Western English Channel. In comparison to a short-read only approach, VirION: (i) increased number and completeness of assembled viral genomes; (ii) captured abundant, highly microdiverse virus populations, and (iii) captured more and longer genomic islands. Together, these findings suggest that VirION provides a high throughput and cost-effective alternative to fosmid and single-virus genomic approaches to more comprehensively explore viral communities in nature.</p>
         </abstract>
         <kwd-group kwd-group-type="author">
            <kwd>Viral Metagenomics</kwd>
            <kwd>Virus</kwd>
            <kwd>Virome</kwd>
            <kwd>Metagenome</kwd>
            <kwd>Assembly</kwd>
            <kwd>Viral ecology</kwd>
            <kwd>Long-read sequencing</kwd>
            <kwd>Marine Microbiology</kwd>
         </kwd-group>
         <funding-group>
            <award-group id="fund-1">
               <funding-source>Bermuda Institute of Ocean Sciences as part of the BIOS-SCOPE program</funding-source>
            </award-group>
            <award-group id="fund-2">
               <funding-source>Royal Society and the Natural Environment Research Council (NERC)</funding-source>
               <award-id>NE/P008534/1</award-id>
               <award-id> NE/R010935/1</award-id>
            </award-group>
            <award-group id="fund-3">
               <funding-source>NERC Great Western Four+ (GW4+) Doctoral Training Partnership PhD</funding-source>
               <award-id>NE/L002434/1</award-id>
            </award-group>
            <award-group id="fund-4">
               <funding-source>Gordon and Betty Moore Foundation</funding-source>
               <award-id>#3790</award-id>
               <award-id> 5488</award-id>
            </award-group>
            <funding-statement>Major support was provided by a fellowship to Ben Temperton from the Bermuda Institute of Ocean Sciences as part of the BIOS-SCOPE program; the Royal Society and the Natural Environment Research Council (NERC) (NE/P008534/1 and NE/R010935/1 to Ben Temperton). Additional support was from a NERC Great Western Four+ (GW4+) Doctoral Training Partnership PhD to Joanna Warwick-Dugdale (NE/L002434/1) and the Gordon and Betty Moore Foundation (awards #3790 and 5488) to Matthew B. Sullivan. There was no additional external funding received for this study. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
         </funding-group>
      </article-meta>
   </front>
   <body>
      <sec sec-type="intro">
         <title>Introduction</title>
         <p>The marine bacterial communities that regulate global carbon biogeochemical cycles are themselves structured by selective, phage-mediated lysis (<xref ref-type="bibr" rid="ref-79">Weinbauer, 2004</xref>; <xref ref-type="bibr" rid="ref-70">Suttle, 2007</xref>). Bacteria co-evolve with their phages and exchange genetic information, and phages even ‘reprogram’ hosts during infection so as to channel host metabolism towards phage replication (<xref ref-type="bibr" rid="ref-23">Forterre, 2013</xref>; <xref ref-type="bibr" rid="ref-29">Hurwitz, Hallam &amp; Sullivan, 2013</xref>; <xref ref-type="bibr" rid="ref-31">Hurwitz &amp; U’Ren, 2016</xref>). Over the last decade, the convergence of high throughput sequencing and the use of universal taxonomic marker genes for bacteria have revolutionised our understanding of microbial ecology (<xref ref-type="bibr" rid="ref-74">Torsvik &amp; Ovreaas, 2002</xref>; <xref ref-type="bibr" rid="ref-75">Treusch et al., 2009</xref>; <xref ref-type="bibr" rid="ref-72">Thompson et al., 2017</xref>). Problematically, however, viral ecologists lack parallel approaches. First, PCR-amplified marker genes are limited to a narrow subset of the viral community, and require degeneracies and amplification conditions that undermine the quantitative nature of the data (<xref ref-type="bibr" rid="ref-68">Sullivan, 2015</xref>). Second, while short-read viral metagenomics studies to date have provided clues to viral function (e.g., virally encoded, host-derived central metabolism genes, known as Auxiliary Metabolic Genes: AMGs) (<xref ref-type="bibr" rid="ref-9">Breitbart et al., 2007</xref>; <xref ref-type="bibr" rid="ref-29">Hurwitz, Hallam &amp; Sullivan, 2013</xref>), and first estimates of global viral gene abundance and distribution (<xref ref-type="bibr" rid="ref-11">Brum et al., 2015</xref>; <xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>), they suffer from technical limitations. This is because short-read assemblies are composites of populations ‘features’ (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>), with successful assembly a function of coverage and branch resolution in assembly graphs (<xref ref-type="bibr" rid="ref-71">Temperton &amp; Giovannoni, 2012</xref>; <xref ref-type="bibr" rid="ref-60">Olson et al., 2017</xref>). This limits our ability to assemble viral populations where multiple strains are abundant and microdiverse (<xref ref-type="bibr" rid="ref-63">Roux et al., 2017</xref>), as well as genomic regions of high diversity, such as genomic islands (GIs), which, in microbes, often contain niche-defining genes that drive ecological speciation (<xref ref-type="bibr" rid="ref-14">Coleman et al., 2006</xref>). In these latter regions, assembly is impeded by low coverage and/or repeat regions at the boundaries (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>; <xref ref-type="bibr" rid="ref-4">Ashton et al., 2015</xref>).</p>
         <p>These are not just technical limitations—emerging data suggests that these obstacles alter our understanding of viral roles on important taxa and global carbon biogeochemistry. For example, the globally dominant members of the chemoheterotrophic order Pelagibacterales comprise up to 25% of all bacterioplankton and are major contributors in the conversion of marine dissolved organic matter back to atmospheric CO<sub>2</sub> (<xref ref-type="bibr" rid="ref-26">Giovannoni, 2017</xref>). Their associated viruses dominate global oceans (<xref ref-type="bibr" rid="ref-82">Zhao et al., 2013</xref>; <xref ref-type="bibr" rid="ref-46">Martinez-Hernandez et al., 2018</xref>) and are likely to contribute significantly to carbon turnover in surface water by release of labile intracellular carbon during lysis (<xref ref-type="bibr" rid="ref-69">Suttle, 2005</xref>; <xref ref-type="bibr" rid="ref-70">Suttle, 2007</xref>). However, the genomes of viruses associated with Pelagibacterales contain numerous GIs and/or high microdiversity (<xref ref-type="bibr" rid="ref-82">Zhao et al., 2013</xref>; <xref ref-type="bibr" rid="ref-46">Martinez-Hernandez et al., 2018</xref>). Such features fragment genomes in short-read assemblies, which reduces representation following contig size-selection for downstream analyses (<xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>; <xref ref-type="bibr" rid="ref-63">Roux et al., 2017</xref>). Though single-virus genomics (<xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>) and fosmid-based approaches (<xref ref-type="bibr" rid="ref-53">Mizuno et al., 2013</xref>; <xref ref-type="bibr" rid="ref-52">Mizuno et al., 2016</xref>) can overcome such issues, these methods are technically challenging and costly to implement.</p>
         <p>Alternatively, recent advances in long-read sequencing technology might be leveraged to better capture microdiverse viral populations and genomic islands. Such approaches can yield very long reads (&gt;800 kbp) (<xref ref-type="bibr" rid="ref-32">Jain et al., 2015</xref>; <xref ref-type="bibr" rid="ref-33">Jain et al., 2018</xref>; <xref ref-type="bibr" rid="ref-42">Loman, Quick &amp; Simpson, 2015</xref>), which would be long enough to capture complete genomes of double-stranded DNA bacteriophages (‘phages’) (10–617.5 kbp (<xref ref-type="bibr" rid="ref-44">Mahmoudabadi &amp; Phillips, 2018</xref>)). At a minimum, such long reads could span genomic global- and local repeat regions, which tangle the De Bruijn Graph and fragment the assembly (<xref ref-type="bibr" rid="ref-36">Koren &amp; Phillippy, 2015</xref>). Long reads may also overcome assembly challenges in regions of low coverage, to improve overall assembly of genomes from both cultured isolates (<xref ref-type="bibr" rid="ref-81">Wick et al., 2017</xref>) and metagenomics (<xref ref-type="bibr" rid="ref-24">Frank et al., 2016</xref>; <xref ref-type="bibr" rid="ref-20">Driscoll et al., 2017</xref>). It is also probable that long-read assemblies using overlap-layout-consensus would be less prone to microdiversity-associated fragmentation of genomes observed in De Bruijn Graph approaches (<xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>; <xref ref-type="bibr" rid="ref-63">Roux et al., 2017</xref>).</p>
         <p>The challenge is that long-read technologies (both from PacBio and Oxford Nanopore) currently require large amounts of input DNA (micrograms; <xref ref-type="bibr" rid="ref-33">Jain et al., 2018</xref>, instead of nanograms commonly available from natural viral communities in seawater; <xref ref-type="bibr" rid="ref-28">Hurwitz et al., 2013</xref>). Furthermore, PacBio subreads and nanopore reads have high error rates (5–10%), with the former enriched in insertion errors and the latter enriched in insertion-deletion errors (<xref ref-type="bibr" rid="ref-80">Weirather et al., 2017</xref>). Indel errors shift the reading frame of the DNA sequence and confound gene-calling algorithms, artificially inflating the number of identified stop codons and producing shorter gene calls (<xref ref-type="bibr" rid="ref-78">Warr &amp; Watson, 2019</xref>). This is a particular problem for viral metagenomics as the median length of genes in dsDNA phages is approximately half that of their bacterial hosts (408 bp vs 801 bp, respectively) (<xref ref-type="bibr" rid="ref-10">Brocchieri &amp; Karlin, 2005</xref>; <xref ref-type="bibr" rid="ref-44">Mahmoudabadi &amp; Phillips, 2018</xref>), and the vast majority of viral genes in both dsDNA viral isolates and viral metagenomes (&gt;50% and up to 93%, respectively) have no known function (<xref ref-type="bibr" rid="ref-30">Hurwitz &amp; Sullivan, 2013</xref>; <xref ref-type="bibr" rid="ref-44">Mahmoudabadi &amp; Phillips, 2018</xref>), making it difficult to evaluate the quality of gene calls from metagenomic assemblies.</p>
         <p>Here, we adapted a Long-Read Linker-Amplified Shotgun Library (LASL) approach for quantitative viral metagenomics (<xref ref-type="bibr" rid="ref-21">Duhaime et al., 2012</xref>) to obtain sufficient quantities of high-molecular weight DNA from nanograms of viral community dsDNA for sequencing using the MinION sequencer from Oxford Nanopore Technology. We then established a bioinformatic workflow to combine such long-read data with complimentary short-read sequencing data to maximise the advantages and minimise the weaknesses of both sequencing technologies. Following validation on mock viral communities, we applied our new approach to the first marine viral metagenome from the Western English Channel (WEC). Here, we present the first use of long-read sequencing technology for viral metagenomics and show that this novel approach provides significant benefits when combined with short-read metagenomics.</p>
      </sec>
      <sec sec-type="materials|methods">
         <title>Materials &amp; Methods</title>
         <sec>
            <title>Construction of the mock viral community</title>
            <p>A mock viral community comprised of six isolated and sequenced marine Caudovirales with genome sizes ranging from 38.5–129.4 kbp was produced as described previously (<xref ref-type="bibr" rid="ref-65">Roux et al., 2016b</xref>). Briefly, viruses were cultivated from host <italic>Pseudoalteromonas</italic> or <italic>Cellulophaga</italic> via plaque assay, collected into MSM buffer (0.45 M NaCl, 0.05 M Mg, 0.05 M Tris base, pH 7.6) and purified by 0.2 µm filtration followed by treatment with DNase I (100 U/mL for 2 hr at RT; terminated by the addition of 0.1 M EGTA and 0.1 M EDTA). Viral capsids were enumerated via epifluorescence microscopy (SYBR Gold; wet mount method) (<xref ref-type="bibr" rid="ref-56">Noble, 2001</xref>; <xref ref-type="bibr" rid="ref-16">Cunningham et al., 2015</xref>). 1.4 × 10<sup>9</sup> virus particles from each culture were pooled, and DNA extracted via the Wizard<sup>®</sup> DNA Clean-up System (Promega A7280). DNA was quantified via Qubit fluorometer (Life Technologies).</p>
         </sec>
         <sec>
            <title>Construction of the Western English Channel viral metagenome</title>
            <p>A total of 20 L of seawater was collected in rosette-mounted Niskin bottles at a depth of 5m from the Western Channel Observatory (WCO; <ext-link ext-link-type="uri" xlink:href="http://www.westernchannelobservatory.org.uk/">http://www.westernchannelobservatory.org.uk/</ext-link>) coastal station ‘L4’ (50°15.0′N; 4°13.0′W) on the 28th September 2016. Seawater was transferred immediately to a clean collection bottle, and processed to remove the cellular fraction (within 4 h of collection) via sequential filtration through glass fibre (GF/D: pore size 2.7 µm) and polyethersulfone (pore size 0.22 µm) filters in a 142 mm polycarbonate rig, with peristaltic pump. Precipitation of viruses from filtrate (denoted as the viral fraction) and primary concentration of virus particles was conducted by iron chloride flocculation and collection on 1.0 µm polycarbonate filters (<xref ref-type="bibr" rid="ref-34">John et al., 2011</xref>); filters were stored in the dark at 4 °C. Viruses were resuspended in ascorbate-EDTA buffer (0.1 M EDTA, 0.2 M MgCl<sub>2</sub>, 0.2 M ascorbic acid, pH 6.1), and transferred to Amicon Ultra 100 kDa centrifugal filter units (Millipore UFC910024) (<xref ref-type="bibr" rid="ref-28">Hurwitz et al., 2013</xref>) that had been pre-treated with 1% bovine serum albumin buffer to minimise capsid-filter adhesion (<xref ref-type="bibr" rid="ref-19">Deng et al., 2014</xref>) and flushed with SM buffer (0.1 M NaCl; 0.05 M Tris–HCl; 0.008 M MgCl<sub>2</sub>). Following concentration to 500–600 µL, virus particles were washed with SM buffer (<xref ref-type="bibr" rid="ref-7">Bonilla et al., 2016</xref>) and purified with DNase I (100 U/mL; 2 h at RT) to remove unprotected DNA (i.e., encapsulated DNA); DNase I activity was terminated by the addition of 0.1 M EGTA and 0.1 M EDTA (<xref ref-type="bibr" rid="ref-28">Hurwitz et al., 2013</xref>). Viral DNA was extracted from concentrated and purified viral particles using the Wizard<sup>®</sup> DNA Clean-up System (Promega A7280), removing PCR inhibitors (<xref ref-type="bibr" rid="ref-34">John et al., 2011</xref>).</p>
         </sec>
         <sec>
            <title>Library preparation, amplification and sequencing</title>
            <p>For short-read sequencing, Illumina libraries were generated from 1 ng of either mock viral community DNA (<xref ref-type="supplementary-material" rid="supp-2">Table S1</xref>), or 1 ng of environmental viral-fraction DNA, using Nextera XT v2 kits (Illumina) and the manufacturer’s protocol. After 12 cycles of amplification, the concentration and distribution in fragment sizes of the Illumina libraries were determined via Qubit and Bioanalyzer (Agilent), respectively. DNA was sequenced as 2 × 300 bp paired-end sequence reads, on a HiSeq 2500 (Illumina Inc.) in rapid mode, by the Exeter Sequencing Service (University of Exeter, UK).</p>
            <p>For VirION libraries (<xref ref-type="fig" rid="fig-1">Fig. 1</xref>), 20 ng (mock viral community) or 100 ng (WEC viral-fraction) of DNA was sheared to fragments averaging 8 kbp length via g-TUBE (Covaris 520079) as required to optimise MinION flow cell sequencing efficiency/yield (Oxford Nanopore Technologies: ONT). End-repair of DNA fragments, amplification of DNA with PCR-adapter ligation (i.e., Linker Amplified Shotgun Library: LASL preparation), and preparation of MinION-compatible libraries were performed following the manufacturer’s protocols for “2D Low input genomic DNA with PCR” using the ‘Ligation Sequencing kit 2D’ (ONT SQK-LSK208). PCR reaction conditions were modified with reference to NEBNext High-Fidelity 2X PCR Master Mix (NEB M0541S) manufacturer’s instructions in order to maximise DNA yield, whilst minimising production of chimeric sequences, as follows: 3 m at 95 °C (initial denaturation), 15 cycles of: 15 s at 95 °C (denaturation), 15 s at 62 °C (annealing), 5 min at 72 °C (extension); finally, 5 min at 72 °C (final extension), followed by 0.4 × AMPure bead clean-up. ∼1.5 µg of end-repaired, amplified DNA was carried forward for sequencing adapter ligation followed by purification of adapted DNA using MyOne C1 Streptavidin beads (Thermo Fisher Scientific Inc. 65001). The prepared long read library was sequenced on a single MinION Mk 1B flow cell with R9.4 pore chemistry for 48 h (Note—to remain up to date with changing ONT chemistry, a 1D ligation version of this protocol has also been tested and is available on protocols.io (<ext-link ext-link-type="uri" xlink:href="https://www.protocols.io/view/virion-long-read-low-input-viral-metagenomic-sequ-p8fdrtn">https://www.protocols.io/view/virion-long-read-low-input-viral-metagenomic-sequ-p8fdrtn</ext-link>). Quality control of short and long read libraries was performed as described in <xref ref-type="supplementary-material" rid="supp-1">Supplemental Information</xref>. Hiqh quality sequence data were used to generate short-read De Bruijn Graph assemblies (using metaSPAdes v. 3.11; <xref ref-type="bibr" rid="ref-58">Nurk et al., 2017</xref>), hybrid long-read scaffolded De Bruijn Graph assemblies (using metaSPAdes, with—nanopore parameter), and long-read overlap-layout consensus assemblies (with Canu; <xref ref-type="bibr" rid="ref-38">Koren et al., 2017</xref>) following optimisation for metagenomic data (see <xref ref-type="supplementary-material" rid="supp-1">Supplemental Information</xref>). Rates of chimerism in both VirION reads (formed during PCR amplification) and assemblies (formed by mis-assembly) were evaluated by aligning reads and contigs, respectively, from the mock viral community to their associated genomes (<xref ref-type="supplementary-material" rid="supp-2">Table S1</xref>).</p>
            <fig id="fig-1">
               <object-id pub-id-type="doi">10.7717/peerj.6800/fig-1</object-id><label>Figure 1</label><caption>
                  <title>Workflow for preparation of free-viral fraction DNA for MinION sequencing.</title>
                  <p>The long-read viral metagenomic method (VirION) developed includes FeCl<sub>3</sub> flocculation and resuspension (FFR), shearing of extracted viral DNA (to 8–9 kbp), random linker amplification (Linker Amplified Shotgun Library: LASL), MinION library preparation, and nanopore (Oxford Nanopore Technologies; ONT) sequencing.</p>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/fig-1.png"/>
            </fig>
         </sec>
         <sec>
            <title>Maximizing the benefits of long read and short read assemblies</title>
            <p>We developed a bioinformatic pipeline to maximise the benefits of VirION reads for viral metagenomics (<xref ref-type="fig" rid="fig-2">Fig. 2</xref>). Briefly, long-read assembly contigs of VirION reads were ‘polished’ with matching short-read sequences to remove sequencing error via consensus base-calling (using Pilon; <xref ref-type="bibr" rid="ref-77">Walker et al., 2014</xref>, v1.22). In order to capture the longest assemblies available from the short-read data, scaffolds from short-read and hybrid assemblies were combined and dereplicated using a cut-off of 95% average nucleotide identity over 80% of the length (via MUMmer v3.23; <xref ref-type="bibr" rid="ref-18">Delcher, Salzberg &amp; Phillippy, 2003</xref>) to cluster highly similar contigs into viral populations (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>). The longest representatives of each population were carried forward for analysis. Population representatives &gt;10 kbp were pooled with polished long-read assembly contigs &gt;10 kbp and evaluated with VirSorter (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>) (in virome decontamination mode) to identify putative viral contigs. Reads classified as category 3 (deemed unusual, but not necessarily viral; <xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>) were excluded from downstream analyses. Circular contigs (i.e., where the contig has matching ends) were identified by VirSorter and used as a proxy for successful assembly of a complete genome. Matching short-read data was then mapped against the representative viral population sequences (using bowtie2; <xref ref-type="bibr" rid="ref-40">Langmead &amp; Salzberg, 2012</xref>) for use in evaluating (1) relative abundances of contigs; (2) whether long read assembly captured more microdiverse genomes; and (3) recovery of genomic islands and their predicted functional composition at the population level (see <xref ref-type="supplementary-material" rid="supp-1">Supplemental Information</xref>).</p>
            <fig id="fig-2">
               <object-id pub-id-type="doi">10.7717/peerj.6800/fig-2</object-id><label>Figure 2</label><caption>
                  <title>Bioinformatics pipeline for VirION reads and complementary short-read sequencing for viral metagenomes.</title>
                  <p>The VirION bioinformatic pipeline to combine for short-read (Illumina) and long-read (MinION) sequencing to maximise the advantages of both sequencing platforms. Viral metagenomic short-read data and VirION reads from the Western English Channel were processed for identification of putative viral genomes as follows: (1) Short-read contigs and contigs scaffolded with VirION reads were generated via De Bruijn Graph Assembly using metaSPAdes (<xref ref-type="bibr" rid="ref-58">Nurk et al., 2017</xref>), and (2) de-replicated via average nucleotide identity of 95% similarity across 80% length. Separately, (3) long, error-prone VirION reads were assembled via overlap layout consensus Assembly using Canu (<xref ref-type="bibr" rid="ref-38">Koren et al., 2017</xref>) and (4) error-corrected via alignment of Illumina reads and consensus base calling with Pilon (<xref ref-type="bibr" rid="ref-77">Walker et al., 2014</xref>). (5) Putative viral genomes were identified using VirSorter (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>). (6) Relative and global abundances of the Western English Channel viral contigs were calculated via competitive recruitment of short read data with FastVirome Explorer (<xref ref-type="bibr" rid="ref-73">Tithi et al., 2018</xref>), and lastly, (7) viral clusters based on shared proteins were produced from Western English Channel viral contigs clustered with contigs from the Global Ocean Virome (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>) and NCBI’s RefSeq database (v.8.4 among others—see <xref ref-type="supplementary-material" rid="supp-3">Table S2</xref>) using vConTACT2 (<xref ref-type="bibr" rid="ref-6">Bolduc et al., 2017</xref>).</p>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/fig-2.png"/>
            </fig>
            <p>To direct future sampling efforts in environmental samples, we evaluated the short-read sequencing depth at which inclusion of long reads in hybrid assemblies offered no advantage in genome recovery. High-quality short read sequences were randomly subsampled in triplicate to seven discrete depths representing 10% and 70% of the full dataset (using seqtk <ext-link ext-link-type="uri" xlink:href="https://github.com/lh3/seqtk">https://github.com/lh3/seqtk</ext-link>). Subsampled reads were then assembled with and without scaffolding support from VirION reads (with metaSPAdes; <xref ref-type="bibr" rid="ref-58">Nurk et al., 2017</xref>). Scaffolds &gt;10 kbp in replicated assemblies were classified as viral using VirSorter (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>) in virome decontamination mode and the number of scaffolds classified as viral were calculated for each replicated assembly. Statistical significance of the number of viral or circular viral contigs between hybrid and short-read assemblies was calculated by a two-sided Student <italic>t</italic>-test between triplicate replicates at each sequencing depth.</p>
         </sec>
         <sec>
            <title>Validation of error correction of long reads in viral metagenomic data</title>
            <p>We evaluated whether it was possible to use short-read data to correct base-calling errors in long-read environmental metagenomic data in a similar way to that used for genomes of bacteria and eukaryotes from axenic samples (<xref ref-type="bibr" rid="ref-77">Walker et al., 2014</xref>). Western English Channel short-read data were sub-sampled to different sequencing depths, in triplicate; sub-samples were then mapped against the long-read assemblies of VirION reads. For error-correction with Pilon (<xref ref-type="bibr" rid="ref-77">Walker et al., 2014</xref>) and median coverage, the total number of fixed deletions and fixed insertions at each coverage depth were calculated. We then evaluated whether error-correction could be used to reduce the impact of frameshift errors on predicted gene length. Predicted coding sequences were identified using MetaGeneAnnotator (<xref ref-type="bibr" rid="ref-57">Noguchi, Taniguchi &amp; Itoh, 2008</xref>) on the following: (1) uncorrected VirION reads; (2) long-read assemblies of VirION reads; (3) long-read assemblies of VirION reads polished with the full short-read dataset; (4) contigs from scaffolded short-read assemblies; (5) contigs from the hybrid assembly. Distributions of the lengths of predicted coding sequences were compared against those in the genomes of <italic>Caudovirales</italic> from the NCBI RefSeq database (v.8.4), predicted proteins from the Global Ocean Virome (GOV; <xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>), and the single-amplified viruses in <xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al. (2017)</xref>. Effect size of different assembly types on genomic island length and density and associated 95% confidence intervals (CI) were calculated from bootstrapped medians (<xref ref-type="bibr" rid="ref-15">Cumming, 2014</xref>). For each bootstrap, 1000 predicted proteins were randomly subsampled from each dataset and their median length was calculated.</p>
         </sec>
         <sec>
            <title>Analysis of Tig404—a contig closely related to Pelagiphage HTVC010P</title>
            <p>Phage contigs closely related to Pelagiphage HTVC010P were identified via clustering of viruses at the ICTV-accepted level of genera by shared gene content (vContact2; <xref ref-type="bibr" rid="ref-6">Bolduc et al., 2017</xref>). Within this viral cluster, contig <italic>tig404</italic>, from the polished long-read assembly of VirION reads, was identified as a circular viral contig by VirSorter (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>). Whole genome alignment was performed with MUmmer (<xref ref-type="bibr" rid="ref-18">Delcher, Salzberg &amp; Phillippy, 2003</xref>) to calculate average nucleotide identity to HTVC010P. Contigs from short-read only and hybrid assemblies that shared 95% nucleotide identity over 80% of their length to tig404 were identified and mapped back to their respective loci with MUmmer (<xref ref-type="bibr" rid="ref-18">Delcher, Salzberg &amp; Phillippy, 2003</xref>). Genomic islands and nucleotide diversity of tig404 were calculated as described previously. To evaluate the contents of a 5.3 kb genomic island, unpolished VirION reads were mapped back against the tig404 genome and those which mapped to at least 100 bp on the borders of the genomic island were extracted. Mapped reads extending at least 1 kb into the genomic island were used as a query in a tBLASTx best-BLAST (<xref ref-type="bibr" rid="ref-12">Camacho et al., 2009</xref>) search against the NCBI NR database to annotate the reads whilst minimising the adverse impact of sequencing error within the uncorrected reads.</p>
         </sec>
         <sec>
            <title>Estimating relative abundance and viral clusters of WEC viruses in viral metagenomes</title>
            <p>FastViromeExplorer (<xref ref-type="bibr" rid="ref-73">Tithi et al., 2018</xref>) v.1.1 was used to quantify the relative abundances of WEC viral contigs. FastViromeExplorer is built upon the Kallisto (<xref ref-type="bibr" rid="ref-8">Bray et al., 2016</xref>) framework and competitively recruits reads against contigs, allowing for accurate recruitment to contigs that may share a degree of sequence similarity. Briefly, high quality short read datasets from the Global Ocean Virome (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>) and from our Western English Channel sample were randomly subsampled to 10 million reads using seqtk (<ext-link ext-link-type="uri" xlink:href="https://github.com/lh3/seqtk">https://github.com/lh3/seqtk</ext-link>) to standardise per-sample sequencing effort, with the number of reads selected to balance detection of lower abundance viral populations with maximising the number of samples that could be included in the survey. Subsampled reads for each sample were recruited against a Kallisto index comprising (1) The viral genomes &gt;10 kbp identified in this study; (2) A selection of phage genomes &gt;10 kbp from key metagenomic studies (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>; <xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>; <xref ref-type="bibr" rid="ref-43">Luo et al., 2017</xref>); (3) Cultured viruses from the NCBI RefSeq viral database (v8.4) (<xref ref-type="supplementary-material" rid="supp-3">Table S2</xref>). Contigs &gt;10 kbp were selected to maximise accuracy of VirSorter to correctly identify viral contigs (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>). For inclusion in downstream abundance analyses, contigs with less than 40% coverage as calculated by FastViromeExplorer were classified as having zero abundance to avoid over-representation of partial matches (<xref ref-type="bibr" rid="ref-73">Tithi et al., 2018</xref>). The top 100 most abundant contigs from each sample were also selected for downstream analyses. All phage genomes &gt;10 kbp (including those from RefSeq) were processed using VirSorter (v.1.03) on the CyVerse cyberinfrastructure (<xref ref-type="bibr" rid="ref-48">Merchant et al., 2016</xref>) to standardise gene-calling prior to clustering of viruses into ICTV-recognised genera by shared gene content using vContact2 (<xref ref-type="bibr" rid="ref-6">Bolduc et al., 2017</xref>). In the final stage of clustering, vContact2 uses ClusterONE (<xref ref-type="bibr" rid="ref-55">Nepusz, Yu &amp; Paccanaro, 2012</xref>) and assigns a <italic>p</italic>-value to a cluster depending on whether the in-cluster edge weights are significantly higher than the out-cluster edge weights. <italic>Q</italic>-values were calculated from cluster <italic>p</italic>-values using the qvalue R package (<xref ref-type="bibr" rid="ref-17">Dabney, Storey &amp; Warnes, 2015</xref>) to account for multiple testing and a <italic>q</italic>-value cutoff of &lt;0.05 was used to identify statistically significant clusters.</p>
         </sec>
      </sec>
      <sec>
         <title>Results &amp; Discussion</title>
         <p>Here, we present the first use of long-read sequencing technology for viral metagenomics and show that this novel approach provides significant benefits when combined with short-read metagenomics. Our bioinformatics pipeline overcame the high sequencing error associated with long-read technology and the addition of long reads enabled capture of complete viral genomes which were globally ubiquitous, and not represented by short-read only assemblies. Long-read assemblies also significantly improved the capture of viral genomic islands, demonstrating that this advance will facilitate better understanding of niche-differentiation and ecological speciation of viruses in environmental samples.</p>
         <sec>
            <title>Assembly of VirION reads successfully captured mock viral community genomes and retained relative abundance information</title>
            <p>VirION sequencing of the mock viral community produced 359,338 high quality (Q&gt;10) long reads (median length: 4,099 bp; max length 18,644 bp). 95% of the reads (341,718) mapped back to the genomes of the mock viral community. Considering viral DNA was sheared to 6–8 kbp fragments, the length of amplicons following LASL were shorter than expected, presumably due to preferential PCR amplification of shorter fragments (<xref ref-type="bibr" rid="ref-66">Shagin et al., 1999</xref>) (<xref ref-type="supplementary-material" rid="supp-7">Fig. S1A</xref>) and preferential diffusion (and thus sequencing) of shorter reads within the flowcell microfluidics (<xref ref-type="supplementary-material" rid="supp-7">Fig. S1B</xref>). Only 0.95% of LASL amplified reads were classified as chimeric (mapping to more than one location of the same or different genomes of the mock viral community), suggesting 15 rounds of PCR was sufficiently low to minimise production of chimeric artifacts, supporting previous findings (<xref ref-type="bibr" rid="ref-41">Laver et al., 2016</xref>). Several methods have been developed for sequencing dsDNA viral metagenomes without skewing relative abundance information important for comparative ecology, including an LASL approach optimised for 454 sequencing (<xref ref-type="bibr" rid="ref-21">Duhaime et al., 2012</xref>; <xref ref-type="bibr" rid="ref-28">Hurwitz et al., 2013</xref>) and Nextera sequencing (<xref ref-type="bibr" rid="ref-65">Roux et al., 2016b</xref>). Median per-genome coverages of VirION reads and short-read Nextera datasets (5.6 M 2 × 300 bp paired-end) from the mock viral community were strongly correlated (<italic>R</italic><sup>2</sup> = 0.975, <italic>p</italic> &lt; 0.001, <xref ref-type="fig" rid="fig-3">Fig. 3A</xref>), indicating that the LASL approach used here for multi-kilobasepair fragments retained relative abundance information observed in previous LASL approaches.</p>
            <p>Long-read and short-read assemblies of the mock viral community captured &gt;99.7% of the six mock viral community genomes (<xref ref-type="supplementary-material" rid="supp-2">Table S1</xref>). Neither the short-read only assembly, hybrid assembly nor long-read assemblies were able to capture all six genomes in six complete contigs. Long-read methods gave the most contiguous assemblies, capturing the six genomes across 14 contigs. In comparison, short-read only assemblies recovered the genomes across 26 contigs, whereas hybrid assembly reduced the number of contigs to 21. As expected, we identified &gt;250 times more indel errors in long-read only assemblies than in the short-read assemblies scaffolded with long reads (average of 474 vs &lt;2 indels per 100 kbp, respectively). Polishing of long-read only assemblies with short read data reduced the indel error rate to 22.78 per 100 kbp, indicating this was a successful strategy for reducing indel error of long-read assemblies in metagenomic samples, but was not able to remove such errors completely. There was no evidence of chimerism in any of the assemblies, indicating that Canu’s <italic>in silico</italic> correction of chimeras (<xref ref-type="bibr" rid="ref-38">Koren et al., 2017</xref>) successfully removed the low number of chimeric sequences observed in the VirION reads during assembly.</p>
         </sec>
         <sec>
            <title>Combining VirION reads with short read data improves viral metagenomic assembly in an environmental virome</title>
            <p>Long read sequencing of an environmental virome from the Western English Channel produced 108,718 high quality VirION reads (median length: 3,625 bp; max length: 17,019 bp, total yield of 0.39 Gbp). It is worth noting that recent developments of MinION technology have improved flowcell yields to &gt;10 Gbp (<italic>pers comms</italic>). Therefore, our analyses here represent low coverage of the viral community with long read data compared to currently available (and fast-improving) technology.</p>
            <p>Scaffolding short-read assemblies using VirION reads provided a small, but significant increase in the number of putative viral genomes recovered (between 1.1 to 1.5-fold increase, Student <italic>t</italic>-test, <italic>p</italic> &lt; 0.05) than short-read only assemblies up to a short-read sequencing depth of ∼12 Gbp (<xref ref-type="fig" rid="fig-3">Fig. 3B</xref>). Above this depth, there was no significant difference between short-read assemblies with and without scaffolding, suggesting assembly of short-read data was capturing most of the viral community above this sequencing depth. For comparison, the median sequencing depth of 137 Illumina sequenced viral metagenomes from the Global Ocean Virome survey (study <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena/data/search?query=PRJEB27181">PRJEB27181</ext-link> in the European Nucleotide Archive) was 8.67 Gbp (IQR = 5.22 Gbp), with 110 out of 137 samples sequenced to a depth of &lt;12 Gbp. Inclusion of VirION reads in hybrid assemblies significantly increased the number of ‘complete’ (i.e., circular contigs) viral genomes recovered once short-read sequencing depth increased above 12 Gbp (1.5 to 2.0-fold, Student <italic>t</italic>-test, <italic>p</italic> &lt; 0.05) (<xref ref-type="fig" rid="fig-3">Fig. 3B</xref>). Details of differences in means and <italic>p</italic>-values at each depth are available in <xref ref-type="supplementary-material" rid="supp-4">Tables S3</xref> and <xref ref-type="supplementary-material" rid="supp-5">S4</xref>. When the full (30.8 Gbp) short-read dataset was used, the inclusion of long reads for scaffolding De Bruijn Graph assemblies increased the median length of recovered viral genomes by an average of 1.8 kbp compared to short-read only assemblies (Mann–Whitney <italic>U</italic> test, <italic>n</italic> <sub>1</sub> = 1,400, <italic>n</italic> <sub>2</sub> = 879, <italic>p</italic>-value &lt; 0.001). With an estimated mean gene density of 1.4 genes per kb in phage dsDNA genomes (<xref ref-type="bibr" rid="ref-44">Mahmoudabadi &amp; Phillips, 2018</xref>), this increased length represents an extra 2.5 genes per contig.</p>
            <fig id="fig-3">
               <object-id pub-id-type="doi">10.7717/peerj.6800/fig-3</object-id><label>Figure 3</label><caption>
                  <title>Comparative performances of short-read and long-read data for the identification of marine viral genomes.</title>
                  <p>(A) Relative abundances of genome-mapped VirION reads and short-reads from a mock viral community composed of 6 different tailed bacteriophages. CBA: <italic>Cellulophaga</italic> phage; PSA: <italic>Pseudoalteromonas</italic> phage. The relative abundances of mock viral community members were strongly correlated using both approaches, showing amplification of sheared viral DNA for VirION sequencing was as quantitative as short read approaches for estimating relative viral abundance. (B) Efficiency of short-read only and hybrid sequencing approaches for detection of viral genomes at various depths/coverages of Illumina data using triplicate random subsamples of short read data from the Western English Channel viral metagenome: At all coverage depths tested, hybrid assemblies generated more circular (i.e., putatively complete) viral genomes than short-read assemblies; Below 10 Gbp of short-read data, hybrid assemblies captured more viral genomes (&gt;10 kbp) than short-read assemblies. Comparisons within grey boxes were found to be statistically significant (Student <italic>t</italic>-test).</p>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/fig-3.png"/>
            </fig>
            <p>Polishing of long-read assemblies of WEC VirION reads using complementary short-read data removed a maximum of 172,854 insertion errors and 12,674 deletion errors (<xref ref-type="supplementary-material" rid="supp-8">Fig. S2</xref>). Error correction reached an asymptote at ∼9 Gbp of short-read sequencing data, with a median coverage of ∼70. As expected, the errors associated with long-read sequencing adversely affected the lengths of protein predictions (<xref ref-type="supplementary-material" rid="supp-9">Fig. S3</xref>), in accordance with previous findings (<xref ref-type="bibr" rid="ref-78">Warr &amp; Watson, 2019</xref>). Proteins predicted from uncorrected VirION reads (median length of 72 aa, 70–74 aa 95% CI) were shorter (median difference = 61 aa, 69–53 aa 95% CI) than those from RefSeq Caudovirales genomes (median length of 133 aa, 126–141 aa 95% CI), and much shorter (median difference = 88 aa, 83–95 95% CI) than those from the GOV dataset (median length of 160 aa, 149–173 95% CI). Assembly of long reads with Canu includes a consensus-based error-correction step (<xref ref-type="bibr" rid="ref-38">Koren et al., 2017</xref>), which increased median predicted protein lengths to 87 aa (median difference of 15 aa, 14–15 95%CI) compared to raw VirION reads. Polishing of long-read assemblies of VirION reads with short read data was highly effective in restoring the length of predicted proteins (median length 127 aa, 120–135 aa 95%CI) to lengths similar to those observed in RefSeq Caudovirales (median length = 133 aa, 126–141 aa 95%CI). Proteins from polished reads had a median difference of −6 aa (−18–6 95%CI) compared to RefSeq Caudovirales proteins. This suggests that not all frameshift errors were corrected in the long-read assemblies, corroborated by evidence of increased indel errors observed in long-read assemblies of mock viral community data compared to short-read assemblies.</p>
            <p>Interestingly, predicted protein lengths from the GOV dataset (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>) (median length = 160 aa), short-read only assembly of the WEC virome (median length = 157 aa); hybrid assembly of the WEC virome (median length = 160 aa) and data from single-amplified viral genomes (<xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>) (median length = 152 aa) were all of similar length and 19 to 27 aa longer compared to those from RefSeq Caudovirales genomes, and 25 to 33 aa longer than those from WEC polished long-read assemblies. In comparison, median predicted protein length in 899 dsDNA phages was previously estimated at 136 aa (<xref ref-type="bibr" rid="ref-44">Mahmoudabadi &amp; Phillips, 2018</xref>)—similar to those found in our polished long-read assemblies from VirION reads. Thus, either both the RefSeq Caudovirales dataset and that of Mahmoudabadi and Phillips are under-representing longer viral predicted proteins found in marine viral metagenomes, or predicted protein lengths in viral genomes from metagenomic data are longer than those observed in cultured representatives. Whether this difference is biological or an artifact of metagenomic assembly and gene calling is an interesting area for further investigation.</p>
            <p><italic>Assembly and mapping of VirION reads captures more information about potential niche-defining genomic islands than short-read only or hybrid assemblies</italic>: In marine bacteria, genomic islands have been identified as playing an important role in niche specialisation that drives ecological speciation (<xref ref-type="bibr" rid="ref-14">Coleman et al., 2006</xref>). Genomic islands have also been found to be a common feature of viral genomes and are typically enriched in functions associated with host recognition (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>). At all nucleotide identity cut-offs tested, genomic islands captured on long-read assemblies were between 145 bp (112–184 bp 95%CI) and 225 bp (189–259 bp, 95% CI) longer than those captured on short-read only or hybrid assemblies. (<xref ref-type="fig" rid="fig-4">Fig. 4A</xref>, <xref ref-type="supplementary-material" rid="supp-8">Fig. S4A</xref>). There were no significant differences between the lengths of genomic islands captured on short-read only or hybrid assemblies. The largest genomic islands in each assembly type were 2.47 kbp, 5.75 kbp and 5.65 kbp in short-read only assemblies, hybrid assemblies and long-read assemblies, respectively. In comparison, the largest genomic islands identified in fosmid-based viral metagenomes were ∼4.6 kbp (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>), suggesting that both hybrid and long-read approaches capture similar length genomic islands as previous fosmid-based methods. Similarly, the density of GIs was significantly greater in long-read assemblies (at between 40 bp (20–60 bp, 95%CI) and 100 bp (80–110 bp, 95%CI) of GI per kbp of genome) compared to short-read or hybrid assemblies (<xref ref-type="fig" rid="fig-4">Fig. 4B</xref>, <xref ref-type="supplementary-material" rid="supp-8">Fig. S4B</xref>). Again, there was no significant difference between short-read only and hybrid assemblies. At a nucleotide identity cut-off of 98% for read mapping, the length of GIs in long-read assemblies were longer than those at 92% and 95%, (59 bp (18–106 bp, 95%CI) and 61 bp (13–105 bp, 95%CI) respectively), indicating that residual error in the polished reads may be contributing to a slight increase in predicted GI length and density at high nucleotide identity. However, these effect sizes are much smaller than those observed between long-read assemblies and short and hybrid assemblies across all identity cut-offs, suggesting that long reads do indeed improve the capture of genomic islands.</p>
            <fig id="fig-4">
               <object-id pub-id-type="doi">10.7717/peerj.6800/fig-4</object-id><label>Figure 4</label><caption>
                  <title>Long-read assemblies capture longer genomic islands than short-read methods.</title>
                  <p>Comparison of the (A) length of genomic islands (GI) and (B) normalised length of GI per kb of genome per contig captured on long read assemblies of VirION reads compared to short- read only and hybrid assemblies of viral contigs from the Western English Channel. Genomic islands were identified by mapping reads back against contig across a range of nucleotide percentage identities (92, 95, 98%) to account for residual error remaining in polished long- read assemblies. (C) and (D) represent pairwise significance calculated using a Wilcoxon Rank Sum Test, with <italic>p</italic>-values adjusted (Benjamini-Hochberg) for multiple testing, for (A) and (B), respectively. Effect sizes and 95% confidence intervals can be found in <xref ref-type="supplementary-material" rid="supp-10">Fig. S4</xref>.</p>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/fig-4.png"/>
            </fig>
            <p>Previous work to identify viral genomic islands by recruiting short-reads back against assembled contigs from fosmid libraries showed that nucleotide diversity within genomic islands was associated with a constant-diversity model, with under-recruiting islands containing proteins associated with host recognition and penetration, phage structure and DNA packaging structural proteins (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>). Here, we were able to gain further insight into viral genomic islands by investigating whether diversity also occurred at the functional level. VirION reads that spanned the full width of GIs were identified and their gene content was predicted at the nucleotide level. Reads spanning the same GI were compared to see if different proteins were encoded on different template strands prior to amplification.</p>
            <p>In total, 137 genomic islands on 84 viral contigs had at least 10 VirION reads spanning their full length. The 3,072 reads spanning these islands encoded 6,445 predicted proteins, of which 4,599 could be aligned to a protein within the NR database. Just 711 (15%) of aligned predicted proteins returned a hit with known function, indicating that genomic islands are an important source of ‘genetic dark matter’ (i.e., sequence of unknown function) in viral metagenomes (<xref ref-type="bibr" rid="ref-39">Krishnamurthy &amp; Wang, 2017</xref>). In total 66 genomic islands contained genes with an assigned function. These islands captured a range of functional proteins including those associated with nucleotide biosynthesis; DNA methylation; redirection of host machinery; structural proteins and associated chaperonins; endo and exonucleases and integrases (<xref ref-type="supplementary-material" rid="supp-6">Table S5</xref>). 35 of the genomic islands contained structural proteins (capsid, tail proteins, co-chaperonin GroES, YapH); proteins associated with membrane recognition (carbohydrate-binding module, lectin-binding proteins) or proteins associated with reconfiguring host metabolic machinery for viral synthesis or defence suppression (RNA polymerase sigma factor, methyltransferases, tRNA synthetases, anti-restriction protein), supporting the hypothesis that viral genomic islands are a hotspot for Constant-Diversity evolutionary dynamics (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>). Seven out of eight genomic islands containing a thymidylate synthase (an enzyme involved in pyrimidine metabolism) also encoded partial hits to ribonucleotide reductase (involved in both purine and pyrimidine metabolism). Ribonucleotide reductase has previously been identified as the nucleotide metabolism gene most frequently interrupted by self-splicing introns (<xref ref-type="bibr" rid="ref-22">Dwivedi et al., 2013</xref>). Similarly, thymidylate synthase has been found to contain self-splicing, group I introns in phage genomes (<xref ref-type="bibr" rid="ref-13">Chu et al., 1984</xref>; <xref ref-type="bibr" rid="ref-5">Bechhofer, Hue &amp; Shub, 1994</xref>), potentially identifying intron splicing activity as a source of regulatory/functional variability and/or as a mechanism to promote the movement of genetic material within the viral genomic islands in our data. Functional, putatively niche-defining metabolic genes were also identified in the genomic islands, including an ultraviolet light damage repair gene <italic>uvsE</italic> and genes associated with photosystem II <italic>(psbA</italic>). Twenty-five out of the 66 genomic islands showed evidence of alternative gene arrangements across their spanning reads, suggesting the content of genomic islands can vary within viral populations at the structural, functional and nucleotide level.</p>
         </sec>
         <sec>
            <title>Assembly of VirION reads capture important, microdiverse populations previously missed by short-read data</title>
            <p>It has been hypothesised that genomes assembled from short-read metagenomes may be biased away from microdiverse populations (<xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>; <xref ref-type="bibr" rid="ref-63">Roux et al., 2017</xref>). We reasoned that overlap layout consensus assembly of long reads, followed by error correction might better capture genomes with high levels of microdiversity by avoiding the unresolvable branches of De Bruijn Graph assemblies. We evaluated genome-level nucleotide diversity (<italic>π</italic>) (<xref ref-type="bibr" rid="ref-54">Nei &amp; Li, 1979</xref>) of both short-read assemblies and polished long-read assemblies from the Western English Channel virome. Median levels of <italic>π</italic> were significantly (3-fold) higher in polished long-read contigs than those derived from De Bruijn Graph assemblies (two-sided Mann–Whitney <italic>U</italic> test: <italic>W</italic> = 105,830, <italic>n</italic> <sub>1</sub> = 758, <italic>n</italic> <sub>2</sub> = 206, <italic>p</italic> = 4.81 × 10<sup>−15</sup>; <xref ref-type="supplementary-material" rid="supp-11">Fig. S5</xref>), consistent with the hypothesis that long-read assembly of VirION reads captured genomes previously lost due to failure to resolve assembly graphs as a consequence of microdiversity.</p>
         </sec>
         <sec>
            <title>Tig404—an example of how VirION reads improve viral metagenomics</title>
            <p>The benefit of using VirION reads for viral metagenomics is exemplified by a polished contig from long-read assemblies that showed high nucleotide similarity and shared gene content to the globally abundant pelagiphage HTVC010P (<xref ref-type="bibr" rid="ref-82">Zhao et al., 2013</xref>). This ecologically important virus and its closely associated phages contain numerous genomic islands that comprise ∼10% of their genome and a shared 5.3 kbp genomic island containing a putative ribonuclease, bounded by tail fibre proteins (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>). It has also been predicted to possess high microdiversity that challenges assembly from short-read data, leading to fragmentation and thus under-representation in short-read viral metagenomes, but is successfully captured using fosmid approaches and single-virus genomics (<xref ref-type="bibr" rid="ref-53">Mizuno et al., 2013</xref>; <xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>). Clustering of viral contigs from the WEC by shared-gene content using vContact2 (<xref ref-type="bibr" rid="ref-6">Bolduc et al., 2017</xref>) identified a virus called ‘tig404’ from long-read assembly of VirION reads that was 89% identical at the nucleotide level to HTVC010P. We mapped contigs from short-read only and hybrid assemblies against this genome at 95% nucleotide identity over 80% of the length to evaluate the success of short-read and hybrid assembly methods at capturing this genome, and identified its genomic islands as described above (<xref ref-type="fig" rid="fig-5">Fig. 5</xref>). Both short-read only and hybrid assemblies were highly fragmented across the genome. Analysis of median nucleotide diversity of tig404 was extremely high (<xref ref-type="supplementary-material" rid="supp-11">Fig. S5</xref>) and provided supporting evidence that fragmentation may be a result of high microdiversity in this phage. In contrast, VirION reads successfully overlapped across the genome and enabled recovery of the genome through long-read assembly. Comparison of the genome of tig404 with that of HTVC010P identified a shared genomic island containing a putative ribonuclease protein and bounded by a tail fibre protein (<xref ref-type="fig" rid="fig-5">Fig. 5</xref>), similar to those observed in closely related taxa from fosmid libraries (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>).</p>
            <fig id="fig-5">
               <object-id pub-id-type="doi">10.7717/peerj.6800/fig-5</object-id><label>Figure 5</label><caption>
                  <title>Long-read sequencing resolves microdiversity and assembly issues across genomic islands in ecologically important viral taxa.</title>
                  <p>De Bruijn Graph (DBG) assembly of short reads, even with VirION reads for scaffolding failed to assemble the genome of tig404, a virus closely related to the globally abundant pelagiphage HTVC010P. Only long-read assembly of VirION reads, followed by error correction with short read data was able to capture the complete genome on a single 29.2 kbp contig. A 200 bp sliding window analysis was used to calculate median coverage (A) of the assembly and (B) maximum nucleotide diversity (<italic>π</italic>), revealing six genomic islands (GIs) (C) and high levels of nucleotide diversity. The impact of this on short-read (light brown) only and hybrid assembly (green) can be seen in (C), where the assemblies aligned to the long-read assembly are highly fragmented. Conversely, long VirION reads (dark brown) were capable of spanning these regions across the whole genome and thus enabling assembly (D). One genomic island on tig404 was conserved with that of HTVC010P (E). Thus, we were able to identify the genomic content of this island at the population level by mapping VirION reads to HTVC010P and identifying those that spanned the genomic island. Encoded function was then predicted using tBLASTx to overcome high sequencing error in uncorrected VirION reads.</p>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/fig-5.png"/>
            </fig>
            <p>In addition, we were able to exploit an additional benefit of long reads and use unpolished VirION reads to explore the contents of the shared genomic island across the tig404 population within the WEC virome. As each read is derived from a single DNA strand (excluding the low abundance of chimeric reads), variance in the content of the genomic island within a population would be captured on reads that align to the ends, or across, the genomic island. In total, 31 VirION reads extended from the boundaries into the genomic island (<xref ref-type="fig" rid="fig-5">Fig. 5</xref>). Of these, 17 had sufficient overlap to use for identifying functional genes. Those at the 5′ end of the genomic island all contained a putative ribonuclease, whilst those at the 3′ end all contained an internal virion protein thought to be associated with puncturing the cell membrane in T7-like phages (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>). Thus, it would appear that, for this shared genomic island at the population level, diversity occurs at the nucleotide level, rather than gene content level. The fact that a similar gene content has now been found in the Western English Channel (this study), the Sargasso Sea (<xref ref-type="bibr" rid="ref-82">Zhao et al., 2013</xref>) and the Mediterranean (<xref ref-type="bibr" rid="ref-51">Mizuno, Ghai &amp; Rodriguez-Valera, 2014</xref>) may indicate this is a conserved feature across the HTVC010P-like phages. The encoding of a ribonuclease within a genomic island offers an interesting glimpse into the host-virus interactions that occur during infection and suggests that degradation of RNA is an important feature of the arms-race in HTVC010P-like phages with their <italic>Pelagibacter</italic> hosts. Whether this is to shut down host metabolism, or to hijack host metabolism through manipulation of regulatory machinery enriched in riboswitches (<xref ref-type="bibr" rid="ref-49">Meyer et al., 2009</xref>) requires further investigation.</p>
            <p>In total, analysis of VirION reads using a strategy to combine short and long read assemblies (<xref ref-type="fig" rid="fig-2">Fig. 2</xref>) generated 2,645 putative viral contigs &gt;10 kbp from the Western English Channel. Of these, 2,279 were from the de-replicated short and hybrid De Bruijn Graph assemblies and 366 from polished long-read assemblies. Our dataset represents the first virome sequenced from the WEC and so we evaluated the global abundance of viral populations from the WEC by competitive mapping of 10 million subsampled short reads from both the WEC and the GOV dataset (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>). Representatives of viral populations from the WEC were then pooled with those &gt;10 kb from the GOV dataset and other marine virome datasets (<xref ref-type="supplementary-material" rid="supp-3">Table S2</xref>) to make a total dataset of 20,545 viral contigs. Following competitive read recruitment with FastViromeExplorer (<xref ref-type="bibr" rid="ref-73">Tithi et al., 2018</xref>), the top 50 most abundant viral genomes were identified in each of the WEC and GOV surface samples. Out of 1,598 contigs, 81 of the most abundant viral contigs were from long read assemblies of VirION reads from the WEC, representing a significant enrichment (hypergeometric test for enrichment, <italic>p</italic> = 6.6 ×10<sup>−19</sup>). WEC contigs from short-read only (42 contigs) and hybrid assemblies (77 contigs) were not significantly enriched in the most abundant viral contigs. Thus, it is likely that long-read assembly of VirION reads from the WEC captured important and globally abundant viral taxa previously missed in the GOV datasets. Examination of relative abundance of WEC contigs in surface water samples from the GOV showed that contigs from long-read assemblies of VirION reads recruited a large proportion of the recruited reads from global samples, particularly in the Southern Atlantic Ocean and waters off the Western coasts of Southern Africa and South America (<xref ref-type="fig" rid="fig-6">Fig. 6</xref>). In total, clustering VirION-derived contigs from the Western English Channel with contigs from previous studies (<xref ref-type="supplementary-material" rid="supp-3">Table S2</xref>) by shared protein content produced 668 statistically supported viral clusters. Of these, 202 contained contigs derived from long-read assembly of VirION reads, but just three of these were comprised solely of these contigs. Thus, we are confident that previous findings suggesting viral diversity at the genera level in surface oceans has been largely documented (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>) are robust. Instead, we propose that long read assembly of VirION reads provides greater phylogenetic resolution of viral clusters by capturing members previously missed due to limitations in short-read assembly.</p>
            <fig id="fig-6">
               <object-id pub-id-type="doi">10.7717/peerj.6800/fig-6</object-id><label>Figure 6</label><caption>
                  <title>VirION-derived viral genomes from the Western English Channel are abundant in global marine viromes.</title>
                  <p>Relative abundances were calculated via competitive recruitment of 10 million sub-sampled reads from each of 42 samples from the Global Ocean Virome (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>). Short reads were recruited against a database comprising VirION-derived viral genomes (both scaffolded and un-scaffolded De Bruijn Graph (DBG) assemblies and those from long-read assembly of VirION long reads) and viral genomes obtained from other key viral metagenomic studies (including those which have employed short-read sequencing (‘GOV’; <xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>, and ‘Luo 2017’; <xref ref-type="bibr" rid="ref-43">Luo et al., 2017</xref>), and long-sequence recovery via Single-virus genomics (‘vSAG’; <xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>), and fosmid libraries (‘fosmid’; <xref ref-type="bibr" rid="ref-53">Mizuno et al., 2013</xref>; <xref ref-type="bibr" rid="ref-52">Mizuno et al., 2016</xref>), and viruses from the NCBI RefSeq database v. 8.4 (all detailed in <xref ref-type="supplementary-material" rid="supp-3">Table S2</xref>). The Western English Channel sample is indicated with a ‘*’.</p>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/fig-6.png"/>
            </fig>
            <p>The most globally abundant and ubiquitous (identified in at least 10% of samples) viral genome was a contig from a hybrid assembly, denoted H_NODE_1248 (<xref ref-type="fig" rid="fig-7">Fig. 7</xref>). This contig was 22.4 kbp in length and occupied a viral cluster (based on shared protein content) with 57 other members, including vSAG-37-F6 (9th most abundant ubiquitous virus and 13th most abundant across all samples), previously identified the most globally abundant virus (<xref ref-type="bibr" rid="ref-45">Martinez-Hernandez et al., 2017</xref>; <xref ref-type="bibr" rid="ref-46">Martinez-Hernandez et al., 2018</xref>). The viral cluster also contained 10 other contigs from long-read assembly of VirION reads, ranging in size from 10 kbp to 27 kbp. Interestingly, pelagiphage HTVC010P, once thought to be the most abundant virus on Earth (<xref ref-type="bibr" rid="ref-82">Zhao et al., 2013</xref>) was ranked 128th in global abundance and did not meet the criteria of being both ubiquitous (identified in at least 10% of the samples) and abundant (in the top 100 most abundant viral taxa for each sample). Upon its discovery as the most abundant global virus we previously urged a cautious interpretation as any representative of a new viral clade will recruit reads from all similar viruses in the environment (<xref ref-type="bibr" rid="ref-82">Zhao et al., 2013</xref>). As new representatives of these clades are captured in metagenomic data it is likely that competitive recruitment will split reads between all clade members, reducing the estimated abundance of any one single member.</p>
            <fig id="fig-7">
               <object-id pub-id-type="doi">10.7717/peerj.6800/fig-7</object-id><label>Figure 7</label><caption>
                  <title>Ubiquity of VirION-derived Western English Channel viruses in Global Ocean surface waters.</title>
                  <p>Heatmap shows the top 50 most abundant and ubiquitous (appear in &gt;10% of samples) viral contigs in the surface samples of the Global Ocean Virome (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>). Competitive recruitment of 10 million subsampled short reads was performed using FastViromeExplorer (<xref ref-type="bibr" rid="ref-73">Tithi et al., 2018</xref>) against a contig database comprising: (1) viral population contigs from this study; (2) viral genomes derived from other key viral metagenomic studies (<xref ref-type="supplementary-material" rid="supp-3">Table S2</xref>); (3) Viruses from the NCBI RefSeq database. Estimated abundances are calculated from the total number of reads mapped to a contig, with reads mapping to multiple contigs apportioned to a single contig via an expectation-maximum algorithm (<xref ref-type="bibr" rid="ref-73">Tithi et al., 2018</xref>). Matrix columns are ordered (left to right) by total number of mapped reads across all samples. The most abundant contig was H_NODE_1248, which is related at the genus level to the ubiquitous pelagiphage vSAG-37-F6. The Western English Channel sample is highlighted in a pink box, showing globally ubiquitous and abundant viruses from oceanic provinces were not particularly abundant in this coastal sample.</p>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/fig-7.png"/>
            </fig>
            <p>60% of the top 50 most abundant populations in the WEC were represented by a WEC contig derived from long-read assemblies of VirION reads (<xref ref-type="supplementary-material" rid="supp-13">Fig. S7</xref>). The viral community in the WEC sample was dominated by a 39,972 bp circular genome from a hybrid assembly. Denoted H_NODE_525, this contig recruited 3.28 times more reads than the next most abundant contig (<xref ref-type="supplementary-material" rid="supp-13">Fig. S7</xref>), but was not identified as globally abundant and ubiquitous (<xref ref-type="fig" rid="fig-7">Fig. 7</xref>). This virus shared a viral cluster with the siphovirus <italic>Pseudoalteromonas phage</italic> vB_PspS-H6/1 but we were not able to determine its putative host despite using a variety of tools (<xref ref-type="bibr" rid="ref-3">Ahlgren et al., 2016</xref>; <xref ref-type="bibr" rid="ref-25">Galiez et al., 2017</xref>) (<ext-link ext-link-type="uri" xlink:href="https://github.com/dutilh/CAT">https://github.com/dutilh/CAT</ext-link>). A viral contig from hybrid assembly, denoted H_NODE_6 was the longest complete viral genome identified in this study, with a 316 kbp genome. In the short read-only assembly, this genome was broken into two contiguous contigs of 204 kbp and 112 kbp, respectively (<xref ref-type="supplementary-material" rid="supp-14">Fig. S8</xref>). H_NODE_6 shared a viral cluster with the myoviruses <italic>Cronobacter sakasakii</italic> phage GAP32 and Enterobacter phage vB_KleM-RaK2. At 359 kb and 346 kb respectively (<xref ref-type="bibr" rid="ref-67">Šimoliūnas et al., 2012</xref>; <xref ref-type="bibr" rid="ref-1">Abbasifar et al., 2014</xref>), these are some of the largest phage genomes ever isolated. Recovery of this complete genome demonstrates the capacity for hybrid assembly with VirION reads to capture complete genomes of very large phages from complex communities on single contigs, which were fragmented using short-read only assemblies.</p>
         </sec>
      </sec>
      <sec sec-type="conclusions">
         <title>Conclusions</title>
         <p>In summary, this investigation represents the first use of long-read sequencing for viral metagenomics. We have shown that using long-reads to scaffold short read De Bruijn Graph assemblies improves recovery of complete viral genomes. Furthermore, overlap-layout consensus assembly of VirION reads, followed by error correction with short reads captures abundant and ubiquitous viral populations that are missed (possibly as a result of genome fragmentation) by current short-read metagenomic methods. By combining these two approaches, our proposed bioinformatics pipeline maximises the capture of viral diversity whilst minimising the impact of high error rates associated with long-read sequencing and represents a major addition to the viral metagenomics toolset. Improved capture of viral genomic islands will enable better understanding of mechanisms underpinning host–virus interactions, as demonstrated in our capture of a shared genomic island on the newly observed HTVC010P-like pelagiphage tig404. Importantly, long-read sequencing on the MinION platform is undergoing rapid improvements in terms of yield, with current technology providing at least an order of magnitude more sequencing data than that produced in this study, at a cost of &lt;$1000 per flowcell. Thus, our approach represents a significant advantage in terms of cost, yield and efficiency over fosmid and single-amplified genome approaches to capturing marine viruses that are otherwise challenging to assemble.</p>
         <p>As error rates associated with MinION technology continue to fall, we envisage less and less complementary short-read data being required for polishing. A recent update to basecalling methods has led to a significant reduction in indel errors and their associated impact on protein prediction (<xref ref-type="bibr" rid="ref-37">Koren et al., 2019</xref>). Furthermore, there is no technical reason to prevent our VirION approach being used in conjunction with PacBio sequencing to further reduce error rates using circular consensus sequencing. Such an approach would remove the need for short-read error correction (<xref ref-type="bibr" rid="ref-24">Frank et al., 2016</xref>) and avoid the remaining indel errors observed following polishing of MinION read assemblies with short-read data. Reductions in DNA input requirements and/or improvements in DNA polymerases for increasing VirION amplicon lengths will further increase its utility in recovering viral genomes from metagenomic samples.</p>
         <p>Ultimately, community efforts to align the input requirements of long-read sequencing with DNA recovery rates from viral communities will be rewarded by the ability to capture full-length viral genomes on single reads (<xref ref-type="bibr" rid="ref-27">Houldcroft, Beale &amp; Breuer, 2017</xref>), including all associated nucleotide modifications (<xref ref-type="bibr" rid="ref-76">Viehweger et al., 2018</xref>). Oxford Nanopore sequencing interprets single stranded nucleotides as they pass through the pore and there are significant efforts to develop protocols for direct sequencing of RNA, dsDNA and ssDNA viruses (<xref ref-type="bibr" rid="ref-35">Keller et al., 2018</xref>; <xref ref-type="bibr" rid="ref-76">Viehweger et al., 2018</xref>; <xref ref-type="bibr" rid="ref-47">McCabe et al., 2018</xref>). It is theoretically possible that with the right combination of ligases and optimised buffers, we will soon be able to sequence dsDNA, ssDNA and RNA viruses, with associated nucleotide modifications, within a single library preparation. We expect that the VirION approach could be readily adapted for use with ssDNA or RNA viruses, provided appropriate amplification of starting material could be achieved to meet the requirements for sequencing. Therefore, the approach described here provides a significant step towards capturing the full diversity of the viral community.</p>
         <p>VirION offers a framework for robust downstream bioinformatic approaches to maximise the benefits of long read sequencing, both now and as the technology continues to improve. Here, we have shown that VirION long-read metagenomics of dsDNA viral communities offers the potential to significantly improve our understanding of niche-differentiation, ecological speciation and the role of viruses in microbial communities within aquatic (<xref ref-type="bibr" rid="ref-62">Roux et al., 2016a</xref>) and soil (<xref ref-type="bibr" rid="ref-61">Pratama &amp; Van Elsas, 2018</xref>) environments, human health (<xref ref-type="bibr" rid="ref-50">Mirzaei &amp; Maurice, 2017</xref>; <xref ref-type="bibr" rid="ref-2">Aggarwala, Liang &amp; Bushman, 2017</xref>) and industrial settings.</p>
      </sec>
      <sec sec-type="supplementary-material" id="supplemental-information">
         <title> Supplemental Information</title>
         <supplementary-material id="supp-1" mimetype="application" mime-subtype="pdf" xlink:href="https://peerj.com/articles/6800/Supplementary_Methods_2019_04_04.pdf">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-1</object-id><label>Supplementary Methods</label><caption>
               <title>Supplementary Methods</title>
               <p>Describes the quality control of sequence data and methods used to evaluate (1) relative abundances of contigs; (2) whether long read assembly captured more microdiverse genomes; (3) recovery of genomic islands and their predicted functional composition at the population level; (4) evaluation of functional variance of viral genomic islands spanned by VirION reads.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-2" mimetype="application" mime-subtype="vnd.openxmlformats-officedocument.wordprocessingml.document" xlink:href="https://peerj.com/articles/6800/TableS1.docx">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-2</object-id><label>Table S1</label><caption>
               <title>Mock viral community member characteristics</title>
               <p>Genomic characteristics of the six phages chosen for the mock viral community to develop and evaluate VirION protocols.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-3" mimetype="application" mime-subtype="vnd.openxmlformats-officedocument.wordprocessingml.document" xlink:href="https://peerj.com/articles/6800/TableS2.docx">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-3</object-id><label>Table S2</label><caption>
               <title>The numbers of phage genomes identified in this study using short, hybrid and error-corrected long read assembly of VirION reads, as identified by VirSorter (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>)</title>
               <p>For comparison important viral metagenomic studies (see references) and viruses from ‘RefSeq’. Prior to quantification of global relative abundances and (shared-protein) clustering, phage genomes were re-analysed using VirSorter to ensure uniformity of gene-calling, resulting in above classifications. Note: VirSorter Categories as follows: 1 and 4: “most confident” predictions (viral and lysogen, respectively); 2 and 5: “likely” predictions (viral and lysogen, respectively).</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-4" mimetype="application" mime-subtype="vnd.openxmlformats-officedocument.wordprocessingml.document" xlink:href="https://peerj.com/articles/6800/TableS3.docx">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-4</object-id><label>Table S3</label><caption>
               <title>Student <italic>t</italic>-test results to identify significant differences between the number of circular viral contigs from short read only vs. hybrid assemblies</title>
               <p>Student <italic>t</italic>-test results to identify significant differences between the number of circular viral contigs (as identified by VirSorter (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>) from short read only vs. hybrid assemblies with VirION reads using metaSPAdes assemblies from triplicate random subsamples of short reads across different levels of sequencing depth. Significant differences are highlighted in bold.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-5" mimetype="application" mime-subtype="vnd.openxmlformats-officedocument.wordprocessingml.document" xlink:href="https://peerj.com/articles/6800/TableS4.docx">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-5</object-id><label>Table S4</label><caption>
               <title>Student <italic>t</italic>-test results to identify significant differences between the number of viral contigs from short read only vs. hybrid assemblies with VirION reads</title>
               <p>Student <italic>t</italic>-test results to identify significant differences between the number of viral contigs (as identified by VirSorter (<xref ref-type="bibr" rid="ref-64">Roux et al., 2015</xref>) from short read only vs. hybrid assemblies with VirION reads using metaSPAdes assemblies from triplicate random subsamples of short reads across different levels of sequencing coverage. Significant differences are highlighted in bold.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-6" mimetype="application" mime-subtype="vnd.openxmlformats-officedocument.spreadsheetml.sheet" xlink:href="https://peerj.com/articles/6800/TableS5.xlsx">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-6</object-id><label>Table S5</label><caption>
               <title>Predicted genes located within 66 genomic islands spanned by VirION reads</title>
               <p>For each spanning read, putative start and stop codons were estimated by hierarchical clustering and used as queries in a BLASTx alignment against the NR database. Genes with unknown function were removed and the remaining putatively classified genes were used to assess functional variance within viral genomic islands</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-7" mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/FigureS1.png">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-7</object-id><label>Figure S1</label><caption>
               <title>Fragment length of LASL-amplified VirION reads before and after sequencing</title>
               <p>(A) Bioanalyzer (Agilent) electropherogram showing the fragment length distribution of linker-amplified mock viral community DNA produced from 20 ng template DNA sheared to ∼8kbp. Amplicon length peaked at ∼5.4 Kbp, demonstrating PCR preference for amplification of shorter DNA fragments; (B) Read length distribution of VirION mock viral community amplicons (as shown in ‘A’; red dashed lines indicate approximate length of sheared template DNA); mean average read length was ∼4 kbp, likely due to preferential sequencing of shorter DNA fragments.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-8" mimetype="application" mime-subtype="pdf" xlink:href="https://peerj.com/articles/6800/FigureS2.pdf">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-8</object-id><label>Figure S2</label><caption>
               <title>Evaluation of error correction of long-read assemblies using short read data</title>
               <p>Impact of using short read sequencing to error correct overlap layout consensus-derived contigs with Pilon shows that approximate limits of the number of insertions and deletions that can be fixed is reached at ∼9 Gbp of short read data (median coverage of ∼70). Analysis was performed against the full contig set from Overlap layout consensus assembly of VirION reads from the Western English Channel ( <italic>n</italic> = 1,500).</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-9" mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/FigureS3.png">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-9</object-id><label>Figure S3</label><caption>
               <title>Difference and 95% CI of median predicted protein length of different assembly types to evaluate the impact of sequencing error and error correction of VirION reads with short-read data</title>
               <p>Median predicted protein length of 1,000 randomly selected proteins were calculated and compared to a similar treatment of proteins from a RefSeq v.8.4 Caudovirales database to measure effect size. This process was bootstrapped 1,000 times to provide 95% confidence intervals. The distributions on the graph represent distributions of differences in medians (<xref ref-type="bibr" rid="ref-15">Cumming, 2014</xref>) . The median effect size (bold number) and the 95% CI boundaries (black line under each distribution, and numbers in brackets) are shown.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-10" mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/FigureS4.png">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-10</object-id><label>Figure S4</label><caption>
               <title>Statistical significance of effects of assembly type on genomic island length and density</title>
               <p>Effect size and bootstrapped median 95% CI intervals for impact of different assembly types on (A) genomic island length and (B) genomic island density (kbp of genomic island per kbp of genome). Values in boxes represent the median difference between 1,000 bootstrapped medians (95% CI). Green boxes represent significant ( <italic>p</italic> &lt; 0.05) differences calculated with a Wilcoxon Rank Sum test.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-11" mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/FigureS5.png">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-11</object-id><label>Figure S5</label><caption>
               <title>Evaluation of genome-wide nucleotide diversity</title>
               <p>The data point for long-read assembled contig tig404 (described in the main text) is highlighted; this virus belongs in the same viral cluster as pelagiphage HTVC010P, an abundant phage that fails to assemble in metagenomic datasets, potentially due to high microdiversity.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-12" mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/FigureS6.png">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-12</object-id><label>Figure S6</label><caption>
               <title>Alignment of the genome of HTVC010P with tig404 assembled using the VirION pipeline</title>
               <p>Genomes were 89% identical at nucleotide in shared regions and both shared a conserved genomic island (green) bounded by structural proteins. Genome alignments were produced by Mauve (Darling et al., 2004) within the Geneious software (Kearse et al., 2012).</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-13" mimetype="application" mime-subtype="pdf" xlink:href="https://peerj.com/articles/6800/FigureS7.pdf">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-13</object-id><label>Figure S7</label><caption>
               <title>Top 50 most abundant viral contigs in a Western English Channel virome</title>
               <p>Estimated relative abundances (number of recruited reads from the short-read dataset) of the Western English Channel viral contigs were calculated by competitive recruitment of short reads back to viral contigs derived from the VirION bioinformatics pipeline using FastViromeExplorer (<xref ref-type="bibr" rid="ref-73">Tithi et al., 2018</xref>). 60% of the top 50 most abundant viruses are detected only in the error-corrected overlap layout consensus assemblies.</p>
            </caption>
         </supplementary-material>
         <supplementary-material id="supp-14" mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/6800/FigureS8.png">
            <object-id pub-id-type="doi">10.7717/peerj.6800/supp-14</object-id><label>Figure S8</label><caption>
               <title>The longest complete viral genome from our study was 316 kbp in length</title>
               <p>H_NODE_6 was the longest recovered virus captured by scaffolding of a De Bruijn Graph assembly using VirION reads (red). Alignment of short read only contigs (blue) against the complete genome show the full length is only captured by the scaffolding approach, whereas the short-read approach results in a breakage at ∼205 kbp (grey box). Coverage and Shannon Entropy are both shown as median values of a 200 bp sliding window, with 100 bp overlap.</p>
            </caption>
         </supplementary-material>
      </sec>
   </body>
   <back>
      <ack>
         <p>The authors thank the crew of the Plymouth Marine Laboratory vessel ‘Quest’ for collection of seawater samples, as well as Dr Simon Roux and Dr Benjamin Bolduc for guidance and advice on bioinformatic analyses. Portions of this research were conducted with high performance computing resources provided by Louisiana State University (<ext-link ext-link-type="uri" xlink:href="http://www.hpc.lsu.edu">http://www.hpc.lsu.edu</ext-link>), <xref ref-type="bibr" rid="ref-59">Ohio Supercomputer Center (1987)</xref>, and the HPC infrastructure at University of Exeter.</p>
      </ack>
      <sec sec-type="additional-information">
         <title>Additional Information and Declarations</title>
         <fn-group content-type="competing-interests">
            <title>Competing Interests</title><fn id="conflict-1" fn-type="conflict"><p>The authors declare there are no competing interests.</p></fn></fn-group>
         <fn-group content-type="author-contributions">
            <title>Author Contributions</title><fn id="contribution-1" fn-type="con"><p><xref ref-type="contrib" rid="author-1">Joanna Warwick-Dugdale</xref> performed the experiments, analyzed the data, prepared figures and/or tables, authored or reviewed drafts of the paper, approved the final draft.</p></fn><fn id="contribution-2" fn-type="con"><p><xref ref-type="contrib" rid="author-2">Natalie Solonenko</xref> performed the experiments.</p></fn><fn id="contribution-3" fn-type="con"><p><xref ref-type="contrib" rid="author-3">Karen Moore</xref> performed the experiments, contributed reagents/materials/analysis tools.</p></fn><fn id="contribution-4" fn-type="con"><p><xref ref-type="contrib" rid="author-4">Lauren Chittick</xref> performed the experiments.</p></fn><fn id="contribution-5" fn-type="con"><p><xref ref-type="contrib" rid="author-5">Ann C. Gregory</xref> analyzed the data.</p></fn><fn id="contribution-6" fn-type="con"><p><xref ref-type="contrib" rid="author-6">Michael J. Allen</xref> authored or reviewed drafts of the paper, approved the final draft.</p></fn><fn id="contribution-7" fn-type="con"><p><xref ref-type="contrib" rid="author-7">Matthew B. Sullivan</xref> contributed reagents/materials/analysis tools, authored or reviewed drafts of the paper, approved the final draft.</p></fn><fn id="contribution-8" fn-type="con"><p><xref ref-type="contrib" rid="author-8">Ben Temperton</xref> conceived and designed the experiments, analyzed the data, contributed reagents/materials/analysis tools, prepared figures and/or tables, authored or reviewed drafts of the paper, approved the final draft.</p></fn></fn-group>
         <fn-group content-type="other">
            <title>DNA Deposition</title><fn id="addinfo-1"><p>The following information was supplied regarding the deposition of DNA sequences:</p>
            <p>Sequencing data and assemblies are available at the European Nucleotide Archive under the project accession number <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena/data/search?query=PRJEB27181">PRJEB27181</ext-link>.</p></fn></fn-group>
         <fn-group content-type="other">
            <title>Data Availability</title><fn id="addinfo-2"><p>The following information was supplied regarding data availability:</p>
            <p>All code and analyses can be found in a GitHub repository: <ext-link ext-link-type="uri" xlink:href="https://github.com/btemperton/long_read_viromics">https://github.com/btemperton/long_read_viromics</ext-link>.</p></fn></fn-group>
      </sec>
      <ref-list content-type="authoryear">
         <title>References</title>
         <ref id="ref-1"><label>Abbasifar et al. (2014)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Abbasifar</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Griffiths</surname>
                     <given-names>MW</given-names>
                  </name>
                  <name>
                     <surname>Sabour</surname>
                     <given-names>PM</given-names>
                  </name>
                  <name>
                     <surname>Ackermann</surname>
                     <given-names>H-W</given-names>
                  </name>
                  <name>
                     <surname>Vandersteegen</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Lavigne</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Noben</surname>
                     <given-names>J-P</given-names>
                  </name>
                  <name>
                     <surname>Alanis Villa</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Abbasifar</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Nash</surname>
                     <given-names>JHE</given-names>
                  </name>
                  <name>
                     <surname>Kropinski</surname>
                     <given-names>AM</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2014">2014</year>
               <article-title>Supersize me: <italic>Cronobacter sakazakii</italic> phage GAP32</article-title>
               <source>Virology</source>
               <volume>460–461</volume>
               <fpage>138</fpage>
               <lpage>146</lpage>
               <pub-id pub-id-type="doi">10.1016/j.virol.2014.05.003</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-2"><label>Aggarwala, Liang &amp; Bushman (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Aggarwala</surname>
                     <given-names>V</given-names>
                  </name>
                  <name>
                     <surname>Liang</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Bushman</surname>
                     <given-names>FD</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Viral communities of the human gut: metagenomic analysis of composition and dynamics</article-title>
               <source>Mobile DNA</source>
               <volume>8</volume>
               <fpage>12</fpage>
               <pub-id pub-id-type="doi">10.1186/s13100-017-0095-y</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-3"><label>Ahlgren et al. (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Ahlgren</surname>
                     <given-names>NA</given-names>
                  </name>
                  <name>
                     <surname>Ren</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Lu</surname>
                     <given-names>YY</given-names>
                  </name>
                  <name>
                     <surname>Fuhrman</surname>
                     <given-names>JA</given-names>
                  </name>
                  <name>
                     <surname>Sun</surname>
                     <given-names>F</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>Alignment-free oligonucleotide frequency dissimilarity measure improves prediction of hosts from metagenomically-derived viral sequences</article-title>
               <source>Nucleic Acids Research</source>
               <volume>45</volume>
               <fpage>39</fpage>
               <lpage>53</lpage>
               <pub-id pub-id-type="doi">10.1093/nar/gkw1002</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-4"><label>Ashton et al. (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Ashton</surname>
                     <given-names>PM</given-names>
                  </name>
                  <name>
                     <surname>Nair</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Dallman</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Rubino</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Rabsch</surname>
                     <given-names>W</given-names>
                  </name>
                  <name>
                     <surname>Mwaigwisya</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Wain</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>O’Grady</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>MinION nanopore sequencing identifies the position and structure of a bacterial antibiotic resistance island</article-title>
               <source>Nature Biotechnology</source>
               <volume>33</volume>
               <fpage>296</fpage>
               <lpage>300</lpage>
               <pub-id pub-id-type="doi">10.1038/nbt.3103</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-5"><label>Bechhofer, Hue &amp; Shub (1994)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Bechhofer</surname>
                     <given-names>DH</given-names>
                  </name>
                  <name>
                     <surname>Hue</surname>
                     <given-names>KK</given-names>
                  </name>
                  <name>
                     <surname>Shub</surname>
                     <given-names>DA</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="1994">1994</year>
               <article-title>An intron in the thymidylate synthase gene of Bacillus bacteriophage beta 22: evidence for independent evolution of a gene, its group I intron, and the intron open reading frame</article-title>
               <source>Proceedings of the National Academy of Sciences of the United States of America</source>
               <volume>91</volume>
               <issue>24</issue>
               <fpage>11669</fpage>
               <lpage>11673</lpage>
               <pub-id pub-id-type="doi">10.1073/pnas.91.24.11669</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-6"><label>Bolduc et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Bolduc</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Jang</surname>
                     <given-names>HB</given-names>
                  </name>
                  <name>
                     <surname>Doulcier</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>You</surname>
                     <given-names>Z-Q</given-names>
                  </name>
                  <name>
                     <surname>Roux</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>vConTACT: an iVirus tool to classify double-stranded DNA viruses that infect Archaea and Bacteria</article-title>
               <source>PeerJ</source>
               <volume>5</volume>
               <elocation-id>e3243</elocation-id>
               <pub-id pub-id-type="doi">10.7717/peerj.3243</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-7"><label>Bonilla et al. (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Bonilla</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Rojas</surname>
                     <given-names>MI</given-names>
                  </name>
                  <name>
                     <surname>Netto</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Cruz</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Hung</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Rohwer</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Barr</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <article-title>Phage on Tap—a quick and efficient protocol for the preparation of bacteriophage laboratory stocks</article-title>
               <source>PeerJ</source>
               <year iso-8601-date="2016">2016</year>
               <volume>4</volume>
               <elocation-id>e2261</elocation-id>
               <pub-id pub-id-type="doi">10.7717/peerj.2261</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-8"><label>Bray et al. (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Bray</surname>
                     <given-names>NL</given-names>
                  </name>
                  <name>
                     <surname>Pimentel</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Melsted</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Pachter</surname>
                     <given-names>L</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>Near-optimal probabilistic RNA-seq quantification</article-title>
               <source>Nature Biotechnology</source>
               <volume>34</volume>
               <fpage>525</fpage>
               <lpage>527</lpage>
               <pub-id pub-id-type="doi">10.1038/nbt.3519</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-9"><label>Breitbart et al. (2007)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Breitbart</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Thompson</surname>
                     <given-names>LR</given-names>
                  </name>
                  <name>
                     <surname>Suttle</surname>
                     <given-names>CA</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2007">2007</year>
               <article-title>Exploring the vast diversity of marine viruses</article-title>
               <source>Oceanography</source>
               <volume>20</volume>
               <fpage>135</fpage>
               <lpage>139</lpage>
               <pub-id pub-id-type="doi">10.5670/oceanog.2007.58</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-10"><label>Brocchieri &amp; Karlin (2005)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Brocchieri</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Karlin</surname>
                     <given-names>S</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2005">2005</year>
               <article-title>Protein length in eukaryotic and prokaryotic proteomes</article-title>
               <source>Nucleic Acids Research</source>
               <volume>33</volume>
               <issue>10</issue>
               <fpage>3390</fpage>
               <lpage>3400</lpage>
               <pub-id pub-id-type="doi">10.1093/nar/gki615</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-11"><label>Brum et al. (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Brum</surname>
                     <given-names>JR</given-names>
                  </name>
                  <name>
                     <surname>Ignacio-Espinoza</surname>
                     <given-names>JC</given-names>
                  </name>
                  <name>
                     <surname>Roux</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Doulcier</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Acinas</surname>
                     <given-names>SG</given-names>
                  </name>
                  <name>
                     <surname>Alberti</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Chaffron</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Cruaud</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>De Vargas</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Gasol</surname>
                     <given-names>JM</given-names>
                  </name>
                  <name>
                     <surname>Gorsky</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Gregory</surname>
                     <given-names>AC</given-names>
                  </name>
                  <name>
                     <surname>Guidi</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Hingamp</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Iudicone</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Not</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Ogata</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Pesant</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Poulos</surname>
                     <given-names>BT</given-names>
                  </name>
                  <name>
                     <surname>Schwenck</surname>
                     <given-names>SM</given-names>
                  </name>
                  <name>
                     <surname>Speich</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Dimier</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Kandels-Lewis</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Picheral</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Searson</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Tara Oceans</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Bork</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Bowler</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Sunagawa</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Wincker</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Karsenti</surname>
                     <given-names>E</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>Ocean plankton. Patterns and ecological drivers of ocean viral communities</article-title>
               <source>Science</source>
               <volume>348</volume>
               <issue>6237</issue>
               <fpage>1261498</fpage>
               <pub-id pub-id-type="doi">10.1126/science.1261498</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-12"><label>Camacho et al. (2009)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Camacho</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Coulouris</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Avagyan</surname>
                     <given-names>V</given-names>
                  </name>
                  <name>
                     <surname>Ma</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Papadopoulos</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Bealer</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Madden</surname>
                     <given-names>TL</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2009">2009</year>
               <article-title>BLAST+: architecture and applications</article-title>
               <source>BMC Bioinformatics</source>
               <volume>10</volume>
               <fpage>421</fpage>
               <pub-id pub-id-type="doi">10.1186/1471-2105-10-421</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-13"><label>Chu et al. (1984)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Chu</surname>
                     <given-names>FK</given-names>
                  </name>
                  <name>
                     <surname>Maley</surname>
                     <given-names>GF</given-names>
                  </name>
                  <name>
                     <surname>Maley</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Belfort</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="1984">1984</year>
               <article-title>Intervening sequence in the thymidylate synthase gene of bacteriophage T4</article-title>
               <source>Proceedings of the National Academy of Sciences of the United States of America</source>
               <volume>81</volume>
               <fpage>3049</fpage>
               <lpage>3053</lpage>
               <pub-id pub-id-type="doi">10.1073/pnas.81.10.3049</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-14"><label>Coleman et al. (2006)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Coleman</surname>
                     <given-names>ML</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
                  <name>
                     <surname>Martiny</surname>
                     <given-names>AC</given-names>
                  </name>
                  <name>
                     <surname>Steglich</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Barry</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Delong</surname>
                     <given-names>EF</given-names>
                  </name>
                  <name>
                     <surname>Chisholm</surname>
                     <given-names>SW</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2006">2006</year>
               <article-title>Genomic islands and the ecology and evolution of <italic>Prochlorococcus</italic></article-title>
               <source>Science</source>
               <volume>311</volume>
               <issue>5768</issue>
               <fpage>1768</fpage>
               <lpage>1770</lpage>
               <pub-id pub-id-type="doi">10.1126/science.1122050</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-15"><label>Cumming (2014)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Cumming</surname>
                     <given-names>G</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2014">2014</year>
               <article-title>The new statistics: why and how</article-title>
               <source>Psychological Science</source>
               <volume>25</volume>
               <fpage>7</fpage>
               <lpage>29</lpage>
               <pub-id pub-id-type="doi">10.1177/0956797613504966</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-16"><label>Cunningham et al. (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Cunningham</surname>
                     <given-names>BR</given-names>
                  </name>
                  <name>
                     <surname>Brum</surname>
                     <given-names>JR</given-names>
                  </name>
                  <name>
                     <surname>Schwenck</surname>
                     <given-names>SM</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
                  <name>
                     <surname>John</surname>
                     <given-names>SG</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>An inexpensive, accurate, and precise wet-mount method for enumerating aquatic viruses</article-title>
               <source>Applied and Environmental Microbiology</source>
               <volume>81</volume>
               <fpage>2995</fpage>
               <lpage>3000</lpage>
               <pub-id pub-id-type="doi">10.1128/AEM.03642-14</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-17"><label>Dabney, Storey &amp; Warnes (2015)</label><element-citation publication-type="software">
               <person-group person-group-type="author">
                  <name>
                     <surname>Dabney</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Storey</surname>
                     <given-names>JD</given-names>
                  </name>
                  <name>
                     <surname>Warnes</surname>
                     <given-names>GR</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <data-title>qvalue: Q-value estimation for false discovery rate control</data-title>
               <version designator="2.12.0">R package version 2.12.0</version>
               <uri>https://www.bioconductor.org/packages/release/bioc/html/qvalue.html</uri>
            </element-citation>
         </ref>
         <ref id="ref-18"><label>Delcher, Salzberg &amp; Phillippy (2003)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Delcher</surname>
                     <given-names>AL</given-names>
                  </name>
                  <name>
                     <surname>Salzberg</surname>
                     <given-names>SL</given-names>
                  </name>
                  <name>
                     <surname>Phillippy</surname>
                     <given-names>AM</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2003">2003</year>
               <article-title>Using MUMmer to identify similar regions in large sequence sets</article-title>
               <source>Current Protocols in Bioinformatics</source>
               <volume>Chapter 10</volume>
               <comment>Unit 10.3</comment>
            </element-citation>
         </ref>
         <ref id="ref-19"><label>Deng et al. (2014)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Deng</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Ignacio-Espinoza</surname>
                     <given-names>JC</given-names>
                  </name>
                  <name>
                     <surname>Gregory</surname>
                     <given-names>AC</given-names>
                  </name>
                  <name>
                     <surname>Poulos</surname>
                     <given-names>BT</given-names>
                  </name>
                  <name>
                     <surname>Weitz</surname>
                     <given-names>JS</given-names>
                  </name>
                  <name>
                     <surname>Hugenholtz</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2014">2014</year>
               <article-title>Viral tagging reveals discrete populations in <italic>Synechococcus</italic> viral genome sequence space</article-title>
               <source>Nature</source>
               <volume>513</volume>
               <fpage>242</fpage>
               <lpage>245</lpage>
               <pub-id pub-id-type="doi">10.1038/nature13459</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-20"><label>Driscoll et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Driscoll</surname>
                     <given-names>CB</given-names>
                  </name>
                  <name>
                     <surname>Otten</surname>
                     <given-names>TG</given-names>
                  </name>
                  <name>
                     <surname>Brown</surname>
                     <given-names>NM</given-names>
                  </name>
                  <name>
                     <surname>Dreher</surname>
                     <given-names>TW</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Towards long-read metagenomics: complete assembly of three novel genomes from bacteria dependent on a diazotrophic cyanobacterium in a freshwater lake co-culture</article-title>
               <source>Standards in Genomic Sciences</source>
               <volume>12</volume>
               <fpage>9</fpage>
               <pub-id pub-id-type="doi">10.1186/s40793-017-0224-8</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-21"><label>Duhaime et al. (2012)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Duhaime</surname>
                     <given-names>MB</given-names>
                  </name>
                  <name>
                     <surname>Deng</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Poulos</surname>
                     <given-names>BT</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2012">2012</year>
               <article-title>Towards quantitative metagenomics of wild viruses and other ultra-low concentration DNA samples: a rigorous assessment and optimization of the linker amplification method</article-title>
               <source>Environmental Microbiology</source>
               <volume>14</volume>
               <fpage>2526</fpage>
               <lpage>2537</lpage>
               <pub-id pub-id-type="doi">10.1111/j.1462-2920.2012.02791.x</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-22"><label>Dwivedi et al. (2013)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Dwivedi</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Xue</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Lundin</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Edwards</surname>
                     <given-names>RA</given-names>
                  </name>
                  <name>
                     <surname>Breitbart</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2013">2013</year>
               <article-title>A bioinformatic analysis of ribonucleotide reductase genes in phage genomes and metagenomes</article-title>
               <source>BMC Evolutionary Biology</source>
               <volume>13</volume>
               <fpage>33</fpage>
               <pub-id pub-id-type="doi">10.1186/1471-2148-13-33</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-23"><label>Forterre (2013)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Forterre</surname>
                     <given-names>P</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2013">2013</year>
               <article-title>The virocell concept and environmental microbiology</article-title>
               <source>The ISME Journal</source>
               <volume>7</volume>
               <fpage>233</fpage>
               <lpage>236</lpage>
               <pub-id pub-id-type="doi">10.1038/ismej.2012.110</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-24"><label>Frank et al. (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Frank</surname>
                     <given-names>JA</given-names>
                  </name>
                  <name>
                     <surname>Pan</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Tooming-Klunderud</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Eijsink</surname>
                     <given-names>VGH</given-names>
                  </name>
                  <name>
                     <surname>McHardy</surname>
                     <given-names>AC</given-names>
                  </name>
                  <name>
                     <surname>Nederbragt</surname>
                     <given-names>AJ</given-names>
                  </name>
                  <name>
                     <surname>Pope</surname>
                     <given-names>PB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>Improved metagenome assemblies and taxonomic binning using long-read circular consensus sequence data</article-title>
               <source>Scientific Reports</source>
               <volume>6</volume>
               <fpage>25373</fpage>
               <pub-id pub-id-type="doi">10.1038/srep25373</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-25"><label>Galiez et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Galiez</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Siebert</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Enault</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Vincent</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Söding</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>WIsH: who is the host? Predicting prokaryotic hosts from metagenomic phage contigs</article-title>
               <source>Bioinformatics</source>
               <volume>33</volume>
               <fpage>3113</fpage>
               <lpage>3114</lpage>
               <pub-id pub-id-type="doi">10.1093/bioinformatics/btx383</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-26"><label>Giovannoni (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Giovannoni</surname>
                     <given-names>S</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>SAR11 bacteria: the most abundant plankton in the oceans</article-title>
               <source>Annual Review of Marine Science</source>
               <volume>9</volume>
               <fpage>231</fpage>
               <lpage>255</lpage>
               <pub-id pub-id-type="doi">10.1146/annurev-marine-010814-015934</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-27"><label>Houldcroft, Beale &amp; Breuer (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Houldcroft</surname>
                     <given-names>CJ</given-names>
                  </name>
                  <name>
                     <surname>Beale</surname>
                     <given-names>MA</given-names>
                  </name>
                  <name>
                     <surname>Breuer</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Clinical and biological insights from viral genome sequencing</article-title>
               <source>Nature Reviews. Microbiology</source>
               <volume>15</volume>
               <fpage>183</fpage>
               <lpage>192</lpage>
               <pub-id pub-id-type="doi">10.1038/nrmicro.2016.182</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-28"><label>Hurwitz et al. (2013)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Hurwitz</surname>
                     <given-names>BL</given-names>
                  </name>
                  <name>
                     <surname>Deng</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Poulos</surname>
                     <given-names>BT</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2013">2013</year>
               <article-title>Evaluation of methods to concentrate and purify ocean virus communities through comparative, replicated metagenomics</article-title>
               <source>Environmental Microbiology</source>
               <volume>15</volume>
               <fpage>1428</fpage>
               <lpage>1440</lpage>
               <pub-id pub-id-type="doi">10.1111/j.1462-2920.2012.02836.x</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-29"><label>Hurwitz, Hallam &amp; Sullivan (2013)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Hurwitz</surname>
                     <given-names>BL</given-names>
                  </name>
                  <name>
                     <surname>Hallam</surname>
                     <given-names>SJ</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2013">2013</year>
               <article-title>Metabolic reprogramming by viruses in the sunlit and dark ocean</article-title>
               <source>Genome Biology</source>
               <volume>14</volume>
               <fpage>R123</fpage>
               <pub-id pub-id-type="doi">10.1186/gb-2013-14-11-r123</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-30"><label>Hurwitz &amp; Sullivan (2013)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Hurwitz</surname>
                     <given-names>BL</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2013">2013</year>
               <article-title>The Pacific Ocean Virome (POV): a marine viral metagenomic dataset and associated protein clusters for quantitative viral ecology</article-title>
               <source>PLOS ONE</source>
               <volume>8</volume>
               <issue>2</issue>
               <elocation-id>e57355</elocation-id>
               <pub-id pub-id-type="doi">10.1371/journal.pone.0057355</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-31"><label>Hurwitz &amp; U’Ren (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Hurwitz</surname>
                     <given-names>BL</given-names>
                  </name>
                  <name>
                     <surname>U’Ren</surname>
                     <given-names>JM</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>Viral metabolic reprogramming in marine ecosystems</article-title>
               <source>Current Opinion in Microbiology</source>
               <volume>31</volume>
               <fpage>161</fpage>
               <lpage>168</lpage>
               <pub-id pub-id-type="doi">10.1016/j.mib.2016.04.002</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-32"><label>Jain et al. (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Jain</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Fiddes</surname>
                     <given-names>IT</given-names>
                  </name>
                  <name>
                     <surname>Miga</surname>
                     <given-names>KH</given-names>
                  </name>
                  <name>
                     <surname>Olsen</surname>
                     <given-names>HE</given-names>
                  </name>
                  <name>
                     <surname>Paten</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Akeson</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>Improved data analysis for the MinION nanopore sequencer</article-title>
               <source>Nature Methods</source>
               <volume>12</volume>
               <fpage>351</fpage>
               <lpage>356</lpage>
               <pub-id pub-id-type="doi">10.1038/nmeth.3290</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-33"><label>Jain et al. (2018)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Jain</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Koren</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Miga</surname>
                     <given-names>KH</given-names>
                  </name>
                  <name>
                     <surname>Quick</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Rand</surname>
                     <given-names>AC</given-names>
                  </name>
                  <name>
                     <surname>Sasani</surname>
                     <given-names>TA</given-names>
                  </name>
                  <name>
                     <surname>Tyson</surname>
                     <given-names>JR</given-names>
                  </name>
                  <name>
                     <surname>Beggs</surname>
                     <given-names>AD</given-names>
                  </name>
                  <name>
                     <surname>Dilthey</surname>
                     <given-names>AT</given-names>
                  </name>
                  <name>
                     <surname>Fiddes</surname>
                     <given-names>IT</given-names>
                  </name>
                  <name>
                     <surname>Malla</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Marriott</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Nieto</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>O’Grady</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Olsen</surname>
                     <given-names>HE</given-names>
                  </name>
                  <name>
                     <surname>Pedersen</surname>
                     <given-names>BS</given-names>
                  </name>
                  <name>
                     <surname>Rhie</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Richardson</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Quinlan</surname>
                     <given-names>AR</given-names>
                  </name>
                  <name>
                     <surname>Snutch</surname>
                     <given-names>TP</given-names>
                  </name>
                  <name>
                     <surname>Tee</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Paten</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Phillippy</surname>
                     <given-names>AM</given-names>
                  </name>
                  <name>
                     <surname>Simpson</surname>
                     <given-names>JT</given-names>
                  </name>
                  <name>
                     <surname>Loman</surname>
                     <given-names>NJ</given-names>
                  </name>
                  <name>
                     <surname>Loose</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Nanopore sequencing and assembly of a human genome with ultra-long reads</article-title>
               <source>Nature Biotechnology</source>
               <volume>36</volume>
               <fpage>338</fpage>
               <lpage>345</lpage>
               <pub-id pub-id-type="doi">10.1038/nbt.4060</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-34"><label>John et al. (2011)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>John</surname>
                     <given-names>SG</given-names>
                  </name>
                  <name>
                     <surname>Mendez</surname>
                     <given-names>CB</given-names>
                  </name>
                  <name>
                     <surname>Deng</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Poulos</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Kauffman</surname>
                     <given-names>AK</given-names>
                  </name>
                  <name>
                     <surname>Kern</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Brum</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Polz</surname>
                     <given-names>MF</given-names>
                  </name>
                  <name>
                     <surname>Boyle</surname>
                     <given-names>EA</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2011">2011</year>
               <article-title>A simple and efficient method for concentration of ocean viruses by chemical flocculation</article-title>
               <source>Environmental Microbiology Reports</source>
               <volume>3</volume>
               <fpage>195</fpage>
               <lpage>202</lpage>
               <pub-id pub-id-type="doi">10.1111/j.1758-2229.2010.00208.x</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-35"><label>Keller et al. (2018)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Keller</surname>
                     <given-names>MW</given-names>
                  </name>
                  <name>
                     <surname>Rambo-Martin</surname>
                     <given-names>BL</given-names>
                  </name>
                  <name>
                     <surname>Wilson</surname>
                     <given-names>MM</given-names>
                  </name>
                  <name>
                     <surname>Ridenour</surname>
                     <given-names>CA</given-names>
                  </name>
                  <name>
                     <surname>Shepard</surname>
                     <given-names>SS</given-names>
                  </name>
                  <name>
                     <surname>Stark</surname>
                     <given-names>TJ</given-names>
                  </name>
                  <name>
                     <surname>Neuhaus</surname>
                     <given-names>EB</given-names>
                  </name>
                  <name>
                     <surname>Dugan</surname>
                     <given-names>VG</given-names>
                  </name>
                  <name>
                     <surname>Wentworth</surname>
                     <given-names>DE</given-names>
                  </name>
                  <name>
                     <surname>Barnes</surname>
                     <given-names>JR</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Direct RNA sequencing of the coding complete influenza a virus genome</article-title>
               <source>Scientific Reports</source>
               <volume>8</volume>
               <fpage>14408</fpage>
               <pub-id pub-id-type="doi">10.1038/s41598-018-32615-8</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-36"><label>Koren &amp; Phillippy (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Koren</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Phillippy</surname>
                     <given-names>AM</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>One chromosome, one contig: complete microbial genomes from long-read sequencing and assembly</article-title>
               <source>Current Opinion in Microbiology</source>
               <volume>23</volume>
               <fpage>110</fpage>
               <lpage>120</lpage>
               <pub-id pub-id-type="doi">10.1016/j.mib.2014.11.014</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-37"><label>Koren et al. (2019)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Koren</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Phillippy</surname>
                     <given-names>AM</given-names>
                  </name>
                  <name>
                     <surname>Simpson</surname>
                     <given-names>JT</given-names>
                  </name>
                  <name>
                     <surname>Loman</surname>
                     <given-names>NJ</given-names>
                  </name>
                  <name>
                     <surname>Loose</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019</year>
               <article-title>Reply to errors in long-read assemblies can critically affect protein prediction</article-title>
               <source>Nature Biotechnology</source>
               <volume>37</volume>
               <issue>2</issue>
               <fpage>127</fpage>
               <lpage>128</lpage>
               <pub-id pub-id-type="doi">10.1038/s41587-018-0005-y</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-38"><label>Koren et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Koren</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Walenz</surname>
                     <given-names>BP</given-names>
                  </name>
                  <name>
                     <surname>Berlin</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Miller</surname>
                     <given-names>JR</given-names>
                  </name>
                  <name>
                     <surname>Bergman</surname>
                     <given-names>NH</given-names>
                  </name>
                  <name>
                     <surname>Phillippy</surname>
                     <given-names>AM</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Canu: scalable and accurate long-read assembly via adaptive k-mer weighting and repeat separation</article-title>
               <source>Genome Research</source>
               <volume>27</volume>
               <fpage>722</fpage>
               <lpage>736</lpage>
               <pub-id pub-id-type="doi">10.1101/gr.215087.116</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-39"><label>Krishnamurthy &amp; Wang (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Krishnamurthy</surname>
                     <given-names>SR</given-names>
                  </name>
                  <name>
                     <surname>Wang</surname>
                     <given-names>D</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Origins and challenges of viral dark matter</article-title>
               <source>Virus Research</source>
               <volume>239</volume>
               <fpage>136</fpage>
               <lpage>142</lpage>
               <pub-id pub-id-type="doi">10.1016/j.virusres.2017.02.002</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-40"><label>Langmead &amp; Salzberg (2012)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Langmead</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Salzberg</surname>
                     <given-names>SL</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2012">2012</year>
               <article-title>Fast gapped-read alignment with Bowtie 2</article-title>
               <source>Nature Methods</source>
               <volume>9</volume>
               <fpage>357</fpage>
               <lpage>359</lpage>
               <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-41"><label>Laver et al. (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Laver</surname>
                     <given-names>TW</given-names>
                  </name>
                  <name>
                     <surname>Caswell</surname>
                     <given-names>RC</given-names>
                  </name>
                  <name>
                     <surname>Moore</surname>
                     <given-names>KA</given-names>
                  </name>
                  <name>
                     <surname>Poschmann</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Johnson</surname>
                     <given-names>MB</given-names>
                  </name>
                  <name>
                     <surname>Owens</surname>
                     <given-names>MM</given-names>
                  </name>
                  <name>
                     <surname>Ellard</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Paszkiewicz</surname>
                     <given-names>KH</given-names>
                  </name>
                  <name>
                     <surname>Weedon</surname>
                     <given-names>MN</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>Pitfalls of haplotype phasing from amplicon-based long-read sequencing</article-title>
               <source>Scientific Reports</source>
               <volume>6</volume>
               <fpage>21746</fpage>
               <pub-id pub-id-type="doi">10.1038/srep21746</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-42"><label>Loman, Quick &amp; Simpson (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Loman</surname>
                     <given-names>NJ</given-names>
                  </name>
                  <name>
                     <surname>Quick</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Simpson</surname>
                     <given-names>JT</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>A complete bacterial genome assembled de novo using only nanopore sequencing data</article-title>
               <source>Nature Methods</source>
               <volume>12</volume>
               <fpage>733</fpage>
               <lpage>735</lpage>
               <pub-id pub-id-type="doi">10.1038/nmeth.3444</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-43"><label>Luo et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Luo</surname>
                     <given-names>E</given-names>
                  </name>
                  <name>
                     <surname>Aylward</surname>
                     <given-names>FO</given-names>
                  </name>
                  <name>
                     <surname>Mende</surname>
                     <given-names>DR</given-names>
                  </name>
                  <name>
                     <surname>DeLong</surname>
                     <given-names>EF</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Bacteriophage distributions and temporal variability in the ocean’s interior</article-title>
               <source>mBio</source>
               <volume>8</volume>
               <issue>6</issue>
               <elocation-id>e01903-17</elocation-id>
               <pub-id pub-id-type="doi">10.1128/mBio.01903-17</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-44"><label>Mahmoudabadi &amp; Phillips (2018)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Mahmoudabadi</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Phillips</surname>
                     <given-names>R</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>A comprehensive and quantitative exploration of thousands of viral genomes</article-title>
               <source>eLife</source>
               <volume>7</volume>
               <elocation-id>e31955</elocation-id>
               <pub-id pub-id-type="doi">10.7554/eLife.31955</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-45"><label>Martinez-Hernandez et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Martinez-Hernandez</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Fornas</surname>
                     <given-names>O</given-names>
                  </name>
                  <name>
                     <surname>Lluesma Gomez</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Bolduc</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>De la Cruz Peña</surname>
                     <given-names>MJ</given-names>
                  </name>
                  <name>
                     <surname>Martínez</surname>
                     <given-names>JM</given-names>
                  </name>
                  <name>
                     <surname>Anton</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Gasol</surname>
                     <given-names>JM</given-names>
                  </name>
                  <name>
                     <surname>Rosselli</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Rodriguez-Valera</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
                  <name>
                     <surname>Acinas</surname>
                     <given-names>SG</given-names>
                  </name>
                  <name>
                     <surname>Martinez-Garcia</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Single-virus genomics reveals hidden cosmopolitan and abundant viruses</article-title>
               <source>Nature Communications</source>
               <volume>8</volume>
               <fpage>15892</fpage>
               <pub-id pub-id-type="doi">10.1038/ncomms15892</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-46"><label>Martinez-Hernandez et al. (2018)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Martinez-Hernandez</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Fornas</surname>
                     <given-names>Ò</given-names>
                  </name>
                  <name>
                     <surname>Lluesma Gomez</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Garcia-Heredia</surname>
                     <given-names>I</given-names>
                  </name>
                  <name>
                     <surname>Maestre-Carballa</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>López-Pérez</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Haro-Moreno</surname>
                     <given-names>JM</given-names>
                  </name>
                  <name>
                     <surname>Rodriguez-Valera</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Martinez-Garcia</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Single-cell genomics uncover <italic>Pelagibacter</italic> as the putative host of the extremely abundant uncultured 37-F6 viral population in the ocean</article-title>
               <source>The ISME Journal</source>
               <volume>13</volume>
               <fpage>232</fpage>
               <lpage>236</lpage>
               <pub-id pub-id-type="doi">10.1038/s41396-018-0278-7</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-47"><label>McCabe et al. (2018)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>McCabe</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Cormican</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Johnston</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Earley</surname>
                     <given-names>B</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Simultaneous detection of DNA and RNA virus species involved in bovine respiratory disease by PCR-free rapid tagmentation-based library preparation and MinION nanopore sequencing</article-title>
               <source>bioRxiv</source>
               <pub-id pub-id-type="doi">10.1101/269936</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-48"><label>Merchant et al. (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Merchant</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Lyons</surname>
                     <given-names>E</given-names>
                  </name>
                  <name>
                     <surname>Goff</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Vaughn</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Ware</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Micklos</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Antin</surname>
                     <given-names>P</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>The iPlant collaborative: cyberinfrastructure for enabling data to discovery for the life sciences</article-title>
               <source>PLOS Biology</source>
               <volume>14</volume>
               <issue>1</issue>
               <elocation-id>e1002342</elocation-id>
               <pub-id pub-id-type="doi">10.1371/journal.pbio.1002342</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-49"><label>Meyer et al. (2009)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Meyer</surname>
                     <given-names>MM</given-names>
                  </name>
                  <name>
                     <surname>Ames</surname>
                     <given-names>TD</given-names>
                  </name>
                  <name>
                     <surname>Smith</surname>
                     <given-names>DP</given-names>
                  </name>
                  <name>
                     <surname>Weinberg</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Schwalbach</surname>
                     <given-names>MS</given-names>
                  </name>
                  <name>
                     <surname>Giovannoni</surname>
                     <given-names>SJ</given-names>
                  </name>
                  <name>
                     <surname>Breaker</surname>
                     <given-names>RR</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2009">2009</year>
               <article-title>Identification of candidate structured RNAs in the marine organism <italic>Candidatus</italic> Pelagibacter ubique</article-title>
               <source>BMC Genomics</source>
               <volume>10</volume>
               <fpage>268</fpage>
               <pub-id pub-id-type="doi">10.1186/1471-2164-10-268</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-50"><label>Mirzaei &amp; Maurice (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Mirzaei</surname>
                     <given-names>MK</given-names>
                  </name>
                  <name>
                     <surname>Maurice</surname>
                     <given-names>CF</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Ménage à trois in the human gut: interactions between host, bacteria and phages</article-title>
               <source>Nature Reviews. Microbiology</source>
               <volume>15</volume>
               <issue>7</issue>
               <fpage>397</fpage>
               <lpage>408</lpage>
               <pub-id pub-id-type="doi">10.1038/nrmicro.2017.30</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-51"><label>Mizuno, Ghai &amp; Rodriguez-Valera (2014)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Mizuno</surname>
                     <given-names>CM</given-names>
                  </name>
                  <name>
                     <surname>Ghai</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Rodriguez-Valera</surname>
                     <given-names>F</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2014">2014</year>
               <article-title>Evidence for metaviromic islands in marine phages</article-title>
               <source>Frontiers in Microbiology</source>
               <volume>5</volume>
               <issue>27</issue>
               <pub-id pub-id-type="doi">10.3389/fmicb.2014.00027</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-52"><label>Mizuno et al. (2016)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Mizuno</surname>
                     <given-names>CM</given-names>
                  </name>
                  <name>
                     <surname>Ghai</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Saghaï</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>López-García</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Rodriguez-Valera</surname>
                     <given-names>F</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>Genomes of abundant and widespread viruses from the deep ocean</article-title>
               <source>mBio</source>
               <volume>7</volume>
               <issue>4</issue>
               <elocation-id>e00805-16</elocation-id>
               <pub-id pub-id-type="doi">10.1128/mBio.00805-16</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-53"><label>Mizuno et al. (2013)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Mizuno</surname>
                     <given-names>CM</given-names>
                  </name>
                  <name>
                     <surname>Rodriguez-Valera</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Kimes</surname>
                     <given-names>NE</given-names>
                  </name>
                  <name>
                     <surname>Ghai</surname>
                     <given-names>R</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2013">2013</year>
               <article-title>Expanding the marine virosphere using metagenomics</article-title>
               <source>PLOS Genetics</source>
               <volume>9</volume>
               <issue>12</issue>
               <elocation-id>e1003987</elocation-id>
               <pub-id pub-id-type="doi">10.1371/journal.pgen.1003987</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-54"><label>Nei &amp; Li (1979)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Nei</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Li</surname>
                     <given-names>WH</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="1979">1979</year>
               <article-title>Mathematical model for studying genetic variation in terms of restriction endonucleases</article-title>
               <source>Proceedings of the National Academy of Sciences of the United States of America</source>
               <volume>76</volume>
               <fpage>5269</fpage>
               <lpage>5273</lpage>
               <pub-id pub-id-type="doi">10.1073/pnas.76.10.5269</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-55"><label>Nepusz, Yu &amp; Paccanaro (2012)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Nepusz</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Yu</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Paccanaro</surname>
                     <given-names>A</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2012">2012</year>
               <article-title>Detecting overlapping protein complexes in protein-protein interaction networks</article-title>
               <source>Nature Methods</source>
               <volume>9</volume>
               <fpage>471</fpage>
               <lpage>472</lpage>
               <pub-id pub-id-type="doi">10.1038/nmeth.1938</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-56"><label>Noble (2001)</label><element-citation publication-type="book">
               <person-group person-group-type="author">
                  <name>
                     <surname>Noble</surname>
                     <given-names>RT</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2001">2001</year>
               <article-title>Enumeration of viruses</article-title>
               <source>Methods in microbiology</source>
               <publisher-name>Academic Press</publisher-name>
               <publisher-loc>Cambridge</publisher-loc>
               <fpage>43</fpage>
               <lpage>51</lpage>
            </element-citation>
         </ref>
         <ref id="ref-57"><label>Noguchi, Taniguchi &amp; Itoh (2008)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Noguchi</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Taniguchi</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Itoh</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2008">2008</year>
               <article-title>MetaGeneAnnotator: detecting species-specific patterns of ribosomal binding site for precise gene prediction in anonymous prokaryotic and phage genomes</article-title>
               <source>DNA Research</source>
               <volume>15</volume>
               <issue>6</issue>
               <fpage>387</fpage>
               <lpage>396</lpage>
               <pub-id pub-id-type="doi">10.1093/dnares/dsn027</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-58"><label>Nurk et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Nurk</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Meleshko</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Korobeynikov</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Pevzner</surname>
                     <given-names>PA</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>metaSPAdes: a new versatile metagenomic assembler</article-title>
               <source>Genome Research</source>
               <volume>27</volume>
               <fpage>824</fpage>
               <lpage>834</lpage>
               <pub-id pub-id-type="doi">10.1101/gr.213959.116</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-59"><label>Ohio Supercomputer Center (1987)</label><element-citation publication-type="book">
               <person-group person-group-type="author">
                  <collab>Ohio Supercomputer Center</collab>
               </person-group>
               <year iso-8601-date="1987">1987</year>
               <source>Ohio supercomputer center</source>
               <publisher-name>Ohio Supercomputer Center</publisher-name>
               <publisher-loc>Columbus</publisher-loc>
            </element-citation>
         </ref>
         <ref id="ref-60"><label>Olson et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Olson</surname>
                     <given-names>ND</given-names>
                  </name>
                  <name>
                     <surname>Treangen</surname>
                     <given-names>TJ</given-names>
                  </name>
                  <name>
                     <surname>Hill</surname>
                     <given-names>CM</given-names>
                  </name>
                  <name>
                     <surname>Cepeda-Espinoza</surname>
                     <given-names>V</given-names>
                  </name>
                  <name>
                     <surname>Ghurye</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Koren</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Pop</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Metagenomic assembly through the lens of validation: recent advances in assessing and improving the quality of genomes assembled from metagenomes</article-title>
               <source>Briefings in Bioinformatics</source>
               <comment>Epub ahead of print Aug 7 2017</comment>
               <pub-id pub-id-type="doi">10.1093/bib/bbx098</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-61"><label>Pratama &amp; Van Elsas (2018)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Pratama</surname>
                     <given-names>AA</given-names>
                  </name>
                  <name>
                     <surname>Van Elsas</surname>
                     <given-names>JD</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>The neglected soil virome—potential role and impact</article-title>
               <source>Trends in Microbiology</source>
               <volume>26</volume>
               <fpage>649</fpage>
               <lpage>662</lpage>
               <pub-id pub-id-type="doi">10.1016/j.tim.2017.12.004</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-62"><label>Roux et al. (2016a)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Roux</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Brum</surname>
                     <given-names>JR</given-names>
                  </name>
                  <name>
                     <surname>Dutilh</surname>
                     <given-names>BE</given-names>
                  </name>
                  <name>
                     <surname>Sunagawa</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Duhaime</surname>
                     <given-names>MB</given-names>
                  </name>
                  <name>
                     <surname>Loy</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Poulos</surname>
                     <given-names>BT</given-names>
                  </name>
                  <name>
                     <surname>Solonenko</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Lara</surname>
                     <given-names>E</given-names>
                  </name>
                  <name>
                     <surname>Poulain</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Pesant</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Kandels-Lewis</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Dimier</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Picheral</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Searson</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Cruaud</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Alberti</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Duarte</surname>
                     <given-names>CM</given-names>
                  </name>
                  <name>
                     <surname>Gasol</surname>
                     <given-names>JM</given-names>
                  </name>
                  <name>
                     <surname>Vaque</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Oceans</surname>
                     <given-names>CTara</given-names>
                  </name>
                  <name>
                     <surname>Bork</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Acinas</surname>
                     <given-names>SG</given-names>
                  </name>
                  <name>
                     <surname>Wincker</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016a</year>
               <article-title>Ecogenomics and potential biogeochemical impacts of globally abundant ocean viruses</article-title>
               <source>Nature</source>
               <volume>537</volume>
               <fpage>689</fpage>
               <lpage>693</lpage>
               <pub-id pub-id-type="doi">10.1038/nature19366</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-63"><label>Roux et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Roux</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Emerson</surname>
                     <given-names>JB</given-names>
                  </name>
                  <name>
                     <surname>Eloe-Fadrosh</surname>
                     <given-names>EA</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Benchmarking viromics: an in silico evaluation of metagenome-enabled estimates of viral community composition and diversity</article-title>
               <source>PeerJ</source>
               <volume>5</volume>
               <elocation-id>e3817</elocation-id>
               <pub-id pub-id-type="doi">10.7717/peerj.3817</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-64"><label>Roux et al. (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Roux</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Enault</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Hurwitz</surname>
                     <given-names>BL</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>VirSorter: mining viral signal from microbial genomic data</article-title>
               <source>PeerJ</source>
               <volume>3</volume>
               <elocation-id>e985</elocation-id>
               <pub-id pub-id-type="doi">10.7717/peerj.985</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-65"><label>Roux et al. (2016b)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Roux</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Solonenko</surname>
                     <given-names>NE</given-names>
                  </name>
                  <name>
                     <surname>Dang</surname>
                     <given-names>VT</given-names>
                  </name>
                  <name>
                     <surname>Poulos</surname>
                     <given-names>BT</given-names>
                  </name>
                  <name>
                     <surname>Schwenck</surname>
                     <given-names>SM</given-names>
                  </name>
                  <name>
                     <surname>Goldsmith</surname>
                     <given-names>DB</given-names>
                  </name>
                  <name>
                     <surname>Coleman</surname>
                     <given-names>ML</given-names>
                  </name>
                  <name>
                     <surname>Breitbart</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016b</year>
               <article-title>Towards quantitative viromics for both double-stranded and single-stranded DNA viruses</article-title>
               <source>PeerJ</source>
               <volume>4</volume>
               <elocation-id>e2777</elocation-id>
               <pub-id pub-id-type="doi">10.7717/peerj.2777</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-66"><label>Shagin et al. (1999)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Shagin</surname>
                     <given-names>DA</given-names>
                  </name>
                  <name>
                     <surname>Lukyanov</surname>
                     <given-names>KA</given-names>
                  </name>
                  <name>
                     <surname>Vagner</surname>
                     <given-names>LL</given-names>
                  </name>
                  <name>
                     <surname>Matz</surname>
                     <given-names>MV</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="1999">1999</year>
               <article-title>Regulation of average length of complex PCR product</article-title>
               <source>Nucleic Acids Research</source>
               <volume>27</volume>
               <elocation-id>e23</elocation-id>
               <pub-id pub-id-type="doi">10.1093/nar/27.18.e23</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-67"><label>Šimoliūnas et al. (2012)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Šimoliūnas</surname>
                     <given-names>E</given-names>
                  </name>
                  <name>
                     <surname>Kaliniene</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Truncaite</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Klausa</surname>
                     <given-names>V</given-names>
                  </name>
                  <name>
                     <surname>Zajančkauskaite</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Meškys</surname>
                     <given-names>R</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2012">2012</year>
               <article-title>Genome of <italic>Klebsiella sp</italic>.-infecting bacteriophage vB_KleM_RaK2</article-title>
               <source>Journal of Virology</source>
               <volume>86</volume>
               <fpage>5406</fpage>
               <pub-id pub-id-type="doi">10.1128/JVI.00347-12</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-68"><label>Sullivan (2015)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>Viromes, not gene markers, for studying double-stranded DNA virus communities</article-title>
               <source>Journal of Virology</source>
               <volume>89</volume>
               <fpage>2459</fpage>
               <lpage>2461</lpage>
               <pub-id pub-id-type="doi">10.1128/JVI.03289-14</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-69"><label>Suttle (2005)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Suttle</surname>
                     <given-names>CA</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2005">2005</year>
               <article-title>Viruses in the sea</article-title>
               <source>Nature</source>
               <volume>437</volume>
               <fpage>356</fpage>
               <lpage>361</lpage>
               <pub-id pub-id-type="doi">10.1038/nature04160</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-70"><label>Suttle (2007)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Suttle</surname>
                     <given-names>CA</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2007">2007</year>
               <article-title>Marine viruses—major players in the global ecosystem</article-title>
               <source>Nature Reviews. Microbiology</source>
               <volume>5</volume>
               <fpage>801</fpage>
               <lpage>812</lpage>
               <pub-id pub-id-type="doi">10.1038/nrmicro1750</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-71"><label>Temperton &amp; Giovannoni (2012)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Temperton</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Giovannoni</surname>
                     <given-names>SJ</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2012">2012</year>
               <article-title>Metagenomics: microbial diversity through a scratched lens</article-title>
               <source>Current Opinion in Microbiology</source>
               <volume>15</volume>
               <issue>5</issue>
               <fpage>605</fpage>
               <lpage>612</lpage>
               <pub-id pub-id-type="doi">10.1016/j.mib.2012.07.001</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-72"><label>Thompson et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Thompson</surname>
                     <given-names>LR</given-names>
                  </name>
                  <name>
                     <surname>Sanders</surname>
                     <given-names>JG</given-names>
                  </name>
                  <name>
                     <surname>McDonald</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Amir</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Ladau</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Locey</surname>
                     <given-names>KJ</given-names>
                  </name>
                  <name>
                     <surname>Prill</surname>
                     <given-names>RJ</given-names>
                  </name>
                  <name>
                     <surname>Tripathi</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Gibbons</surname>
                     <given-names>SM</given-names>
                  </name>
                  <name>
                     <surname>Ackermann</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Navas-Molina</surname>
                     <given-names>JA</given-names>
                  </name>
                  <name>
                     <surname>Janssen</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Kopylova</surname>
                     <given-names>E</given-names>
                  </name>
                  <name>
                     <surname>Vázquez-Baeza</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>González</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Morton</surname>
                     <given-names>JT</given-names>
                  </name>
                  <name>
                     <surname>Mirarab</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Zech Xu</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Jiang</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Haroon</surname>
                     <given-names>MF</given-names>
                  </name>
                  <name>
                     <surname>Kanbar</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Zhu</surname>
                     <given-names>Q</given-names>
                  </name>
                  <name>
                     <surname>Jin Song</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Kosciolek</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Bokulich</surname>
                     <given-names>NA</given-names>
                  </name>
                  <name>
                     <surname>Lefler</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Brislawn</surname>
                     <given-names>CJ</given-names>
                  </name>
                  <name>
                     <surname>Humphrey</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Owens</surname>
                     <given-names>SM</given-names>
                  </name>
                  <name>
                     <surname>Hampton-Marcell</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Berg-Lyons</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>McKenzie</surname>
                     <given-names>V</given-names>
                  </name>
                  <name>
                     <surname>Fierer</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Fuhrman</surname>
                     <given-names>JA</given-names>
                  </name>
                  <name>
                     <surname>Clauset</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Stevens</surname>
                     <given-names>RL</given-names>
                  </name>
                  <name>
                     <surname>Shade</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Pollard</surname>
                     <given-names>KS</given-names>
                  </name>
                  <name>
                     <surname>Goodwin</surname>
                     <given-names>KD</given-names>
                  </name>
                  <name>
                     <surname>Jansson</surname>
                     <given-names>JK</given-names>
                  </name>
                  <name>
                     <surname>Gilbert</surname>
                     <given-names>JA</given-names>
                  </name>
                  <name>
                     <surname>Knight</surname>
                     <given-names>R</given-names>
                  </name>
                  <collab>Earth Microbiome Project Consortium</collab>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>A communal catalogue reveals Earth’s multiscale microbial diversity</article-title>
               <source>Nature</source>
               <volume>551</volume>
               <fpage>457</fpage>
               <lpage>463</lpage>
               <pub-id pub-id-type="doi">10.1038/nature24621</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-73"><label>Tithi et al. (2018)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Tithi</surname>
                     <given-names>SS</given-names>
                  </name>
                  <name>
                     <surname>Aylward</surname>
                     <given-names>FO</given-names>
                  </name>
                  <name>
                     <surname>Jensen</surname>
                     <given-names>RV</given-names>
                  </name>
                  <name>
                     <surname>Zhang</surname>
                     <given-names>L</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>FastViromeExplorer: a pipeline for virus and phage identification and abundance profiling in metagenomics data</article-title>
               <source>PeerJ</source>
               <volume>6</volume>
               <elocation-id>e4227</elocation-id>
               <pub-id pub-id-type="doi">10.7717/peerj.4227</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-74"><label>Torsvik &amp; Øvreås (2002)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Torsvik</surname>
                     <given-names>V</given-names>
                  </name>
                  <name>
                     <surname>Øvreås</surname>
                     <given-names>L</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2002">2002</year>
               <article-title>Microbial diversity and function in soil: from genes to ecosystems</article-title>
               <source>Current Opinion in Microbiology</source>
               <volume>5</volume>
               <issue>1</issue>
               <fpage>240</fpage>
               <lpage>245</lpage>
               <pub-id pub-id-type="doi">10.1016/S1369-5274(02)00324-7</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-75"><label>Treusch et al. (2009)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Treusch</surname>
                     <given-names>AH</given-names>
                  </name>
                  <name>
                     <surname>Vergin</surname>
                     <given-names>KL</given-names>
                  </name>
                  <name>
                     <surname>Finlay</surname>
                     <given-names>LA</given-names>
                  </name>
                  <name>
                     <surname>Donatz</surname>
                     <given-names>MG</given-names>
                  </name>
                  <name>
                     <surname>Burton</surname>
                     <given-names>RM</given-names>
                  </name>
                  <name>
                     <surname>Carlson</surname>
                     <given-names>CA</given-names>
                  </name>
                  <name>
                     <surname>Giovannoni</surname>
                     <given-names>SJ</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2009">2009</year>
               <article-title>Seasonality and vertical structure of microbial communities in an ocean gyre</article-title>
               <source>The ISME Journal</source>
               <volume>3</volume>
               <fpage>1148</fpage>
               <lpage>1163</lpage>
               <pub-id pub-id-type="doi">10.1038/ismej.2009.60</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-76"><label>Viehweger et al. (2018)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Viehweger</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Krautwurst</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Lamkiewicz</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Madhugiri</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Ziebuhr</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Hölzer</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Marz</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Nanopore direct RNA sequencing reveals modification in full-length coronavirus genomes</article-title>
               <source>bioRxiv preprint</source>
               <pub-id pub-id-type="doi">10.1101/483693</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-77"><label>Walker et al. (2014)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Walker</surname>
                     <given-names>BJ</given-names>
                  </name>
                  <name>
                     <surname>Abeel</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Shea</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Priest</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Abouelliel</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Sakthikumar</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Cuomo</surname>
                     <given-names>CA</given-names>
                  </name>
                  <name>
                     <surname>Zeng</surname>
                     <given-names>Q</given-names>
                  </name>
                  <name>
                     <surname>Wortman</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Young</surname>
                     <given-names>SK</given-names>
                  </name>
                  <name>
                     <surname>Earl</surname>
                     <given-names>AM</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2014">2014</year>
               <article-title>Pilon: an integrated tool for comprehensive microbial variant detection and genome assembly improvement</article-title>
               <source>PLOS ONE</source>
               <volume>9</volume>
               <issue>11</issue>
               <elocation-id>e112963</elocation-id>
               <pub-id pub-id-type="doi">10.1371/journal.pone.0112963</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-78"><label>Warr &amp; Watson (2019)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Warr</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Watson</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019</year>
               <article-title>Errors in long-read assemblies can critically affect protein prediction</article-title>
               <source>Nature Biotechnology</source>
               <volume>37</volume>
               <fpage>124</fpage>
               <lpage>126</lpage>
               <pub-id pub-id-type="doi">10.1038/s41587-018-0004-z</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-79"><label>Weinbauer (2004)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Weinbauer</surname>
                     <given-names>MG</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2004">2004</year>
               <article-title>Ecology of prokaryotic viruses</article-title>
               <source>FEMS Microbiology Reviews</source>
               <volume>28</volume>
               <issue>2</issue>
               <fpage>127</fpage>
               <lpage>181</lpage>
               <pub-id pub-id-type="doi">10.1016/j.femsre.2003.08.001</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-80"><label>Weirather et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Weirather</surname>
                     <given-names>JL</given-names>
                  </name>
                  <name>
                     <surname>De Cesare</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Wang</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Piazza</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Sebastiano</surname>
                     <given-names>V</given-names>
                  </name>
                  <name>
                     <surname>Wang</surname>
                     <given-names>X-J</given-names>
                  </name>
                  <name>
                     <surname>Buck</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Au</surname>
                     <given-names>KF</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Comprehensive comparison of Pacific Biosciences and Oxford Nanopore Technologies and their applications to transcriptome analysis</article-title>
               <source>F1000Research</source>
               <volume>6</volume>
               <fpage>100</fpage>
               <pub-id pub-id-type="doi">10.12688/f1000research.10571.2</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-81"><label>Wick et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Wick</surname>
                     <given-names>RR</given-names>
                  </name>
                  <name>
                     <surname>Judd</surname>
                     <given-names>LM</given-names>
                  </name>
                  <name>
                     <surname>Gorrie</surname>
                     <given-names>CL</given-names>
                  </name>
                  <name>
                     <surname>Holt</surname>
                     <given-names>KE</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Unicycler: resolving bacterial genome assemblies from short and long sequencing reads</article-title>
               <source>PLOS Computational Biology</source>
               <volume>13</volume>
               <issue>6</issue>
               <elocation-id>e1005595</elocation-id>
               <pub-id pub-id-type="doi">10.1371/journal.pcbi.1005595</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-82"><label>Zhao et al. (2013)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Zhao</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Temperton</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Thrash</surname>
                     <given-names>JC</given-names>
                  </name>
                  <name>
                     <surname>Schwalbach</surname>
                     <given-names>MS</given-names>
                  </name>
                  <name>
                     <surname>Vergin</surname>
                     <given-names>KL</given-names>
                  </name>
                  <name>
                     <surname>Landry</surname>
                     <given-names>ZC</given-names>
                  </name>
                  <name>
                     <surname>Ellisman</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Deerinck</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Sullivan</surname>
                     <given-names>MB</given-names>
                  </name>
                  <name>
                     <surname>Giovannoni</surname>
                     <given-names>SJ</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2013">2013</year>
               <article-title>Abundant SAR11 viruses in the ocean</article-title>
               <source>Nature</source>
               <volume>494</volume>
               <fpage>357</fpage>
               <lpage>360</lpage>
               <pub-id pub-id-type="doi">10.1038/nature11921</pub-id>
            </element-citation>
         </ref>
      </ref-list>
   </back>
</article>
