@@ -273,30 +273,18 @@ subsampling:
273273
274274 # Custom subsampling logic for region Asia over 1m
275275 # Grouping by division
276- # Separating three buckets for China, India and elsewhere
276+ # Grouping by country weighted by population size
277277 # 4375 total
278278 # 4:1 ratio of recent to early
279279 # 4:1 ratio of focal to context
280- # 3:2:2 proportions of Asia, China, India
281280 nextstrain_region_asia_grouped_by_division_1m :
282281 # Early focal samples for Asia
283282 asia_early :
284- group_by : " division year month"
285- max_sequences : 300
286- max_date : " --max-date 1M"
287- exclude : " --exclude-where 'region!=Asia' 'country=China' 'country=India'"
288- # Early focal samples for China
289- china_early :
290- group_by : " division year month"
291- max_sequences : 200
292- max_date : " --max-date 1M"
293- exclude : " --exclude-where 'country!=China'"
294- # Early focal samples for India
295- india_early :
296- group_by : " division year month"
297- max_sequences : 200
283+ group_by : " country year month"
284+ group_by_weights : " defaults/population_weights.tsv"
285+ max_sequences : 700
298286 max_date : " --max-date 1M"
299- exclude : " --exclude-where 'country!=India '"
287+ exclude : " --exclude-where 'region!=Asia '"
300288 # Early contextual samples from the rest of the world
301289 context_early :
302290 group_by : " country year month"
@@ -305,22 +293,11 @@ subsampling:
305293 exclude : " --exclude-where 'region=Asia'"
306294 # Recent focal samples for Asia
307295 asia_recent :
308- group_by : " division week"
309- max_sequences : 1200
296+ group_by : " country year month"
297+ group_by_weights : " defaults/population_weights.tsv"
298+ max_sequences : 2800
310299 min_date : " --min-date 1M"
311- exclude : " --exclude-where 'region!=Asia' 'country=China' 'country=India'"
312- # Recent focal samples for China
313- china_recent :
314- group_by : " division week"
315- max_sequences : 800
316- max_date : " --min-date 1M"
317- exclude : " --exclude-where 'country!=China'"
318- # Recent focal samples for India
319- india_recent :
320- group_by : " division week"
321- max_sequences : 800
322- max_date : " --min-date 1M"
323- exclude : " --exclude-where 'country!=India'"
300+ exclude : " --exclude-where 'region!=Asia'"
324301 # Early contextual samples from the rest of the world
325302 context_recent :
326303 group_by : " country week"
@@ -330,30 +307,18 @@ subsampling:
330307
331308 # Custom subsampling logic for region Asia over 2m
332309 # Grouping by division
333- # Separating three buckets for China, India and elsewhere
310+ # Grouping by country weighted by population size
334311 # 4375 total
335312 # 4:1 ratio of recent to early
336313 # 4:1 ratio of focal to context
337- # 3:2:2 proportions of Asia, China, India
338314 nextstrain_region_asia_grouped_by_division_2m :
339315 # Early focal samples for Asia
340316 asia_early :
341- group_by : " division year month"
342- max_sequences : 300
343- max_date : " --max-date 2M"
344- exclude : " --exclude-where 'region!=Asia' 'country=China' 'country=India'"
345- # Early focal samples for China
346- china_early :
347- group_by : " division year month"
348- max_sequences : 200
349- max_date : " --max-date 2M"
350- exclude : " --exclude-where 'country!=China'"
351- # Early focal samples for India
352- india_early :
353- group_by : " division year month"
354- max_sequences : 200
317+ group_by : " country year month"
318+ group_by_weights : " defaults/population_weights.tsv"
319+ max_sequences : 700
355320 max_date : " --max-date 2M"
356- exclude : " --exclude-where 'country!=India '"
321+ exclude : " --exclude-where 'region!=Asia '"
357322 # Early contextual samples from the rest of the world
358323 context_early :
359324 group_by : " country year month"
@@ -362,22 +327,11 @@ subsampling:
362327 exclude : " --exclude-where 'region=Asia'"
363328 # Recent focal samples for Asia
364329 asia_recent :
365- group_by : " division week"
366- max_sequences : 1200
330+ group_by : " country year month"
331+ group_by_weights : " defaults/population_weights.tsv"
332+ max_sequences : 2800
367333 min_date : " --min-date 2M"
368- exclude : " --exclude-where 'region!=Asia' 'country=China' 'country=India'"
369- # Recent focal samples for China
370- china_recent :
371- group_by : " division week"
372- max_sequences : 800
373- max_date : " --min-date 2M"
374- exclude : " --exclude-where 'country!=China'"
375- # Recent focal samples for India
376- india_recent :
377- group_by : " division week"
378- max_sequences : 800
379- max_date : " --min-date 2M"
380- exclude : " --exclude-where 'country!=India'"
334+ exclude : " --exclude-where 'region!=Asia'"
381335 # Early contextual samples from the rest of the world
382336 context_recent :
383337 group_by : " country week"
@@ -387,30 +341,18 @@ subsampling:
387341
388342 # Custom subsampling logic for region Asia over 6m
389343 # Grouping by division
390- # Separating three buckets for China, India and elsewhere
344+ # Grouping by country weighted by population size
391345 # 4375 total
392346 # 4:1 ratio of recent to early
393347 # 4:1 ratio of focal to context
394- # 3:2:2 proportions of Asia, China, India
395348 nextstrain_region_asia_grouped_by_division_6m :
396349 # Early focal samples for Asia
397350 asia_early :
398- group_by : " division year month"
399- max_sequences : 300
400- max_date : " --max-date 6M"
401- exclude : " --exclude-where 'region!=Asia' 'country=China' 'country=India'"
402- # Early focal samples for China
403- china_early :
404- group_by : " division year month"
405- max_sequences : 200
406- max_date : " --max-date 6M"
407- exclude : " --exclude-where 'country!=China'"
408- # Early focal samples for India
409- india_early :
410- group_by : " division year month"
411- max_sequences : 200
351+ group_by : " country year month"
352+ group_by_weights : " defaults/population_weights.tsv"
353+ max_sequences : 700
412354 max_date : " --max-date 6M"
413- exclude : " --exclude-where 'country!=India '"
355+ exclude : " --exclude-where 'region!=Asia '"
414356 # Early contextual samples from the rest of the world
415357 context_early :
416358 group_by : " country year month"
@@ -419,22 +361,11 @@ subsampling:
419361 exclude : " --exclude-where 'region=Asia'"
420362 # Recent focal samples for Asia
421363 asia_recent :
422- group_by : " division year month"
423- max_sequences : 1200
364+ group_by : " country year month"
365+ group_by_weights : " defaults/population_weights.tsv"
366+ max_sequences : 2800
424367 min_date : " --min-date 6M"
425- exclude : " --exclude-where 'region!=Asia' 'country=China' 'country=India'"
426- # Recent focal samples for China
427- china_recent :
428- group_by : " division year month"
429- max_sequences : 800
430- max_date : " --min-date 6M"
431- exclude : " --exclude-where 'country!=China'"
432- # Recent focal samples for India
433- india_recent :
434- group_by : " division year month"
435- max_sequences : 800
436- max_date : " --min-date 6M"
437- exclude : " --exclude-where 'country!=India'"
368+ exclude : " --exclude-where 'region!=Asia'"
438369 # Early contextual samples from the rest of the world
439370 context_recent :
440371 group_by : " country year month"
@@ -443,27 +374,16 @@ subsampling:
443374 exclude : " --exclude-where 'region=Asia'"
444375
445376 # Custom subsampling logic for region Asia over all-time
446- # Grouping by division
447- # Separating three buckets for China, India and elsewhere
377+ # Grouping by country weighted by population size
448378 # 4375 total
449379 # 4:1 ratio of focal to context
450- # 3:2:2 proportions of Asia, China, India
451380 nextstrain_region_asia_grouped_by_division_all_time :
452381 # Focal samples for Asia
453382 asia :
454- group_by : " division year month"
455- max_sequences : 1500
456- exclude : " --exclude-where 'region!=Asia' 'country=China' 'country=India'"
457- # Focal samples for China
458- china :
459- group_by : " division year month"
460- max_sequences : 1000
461- exclude : " --exclude-where 'country!=China'"
462- # Focal samples for India
463- india :
464- group_by : " division year month"
465- max_sequences : 1000
466- exclude : " --exclude-where 'country!=India'"
383+ group_by : " country year month"
384+ group_by_weights : " defaults/population_weights.tsv"
385+ max_sequences : 3500
386+ exclude : " --exclude-where 'region!=Asia'"
467387 # Contextual samples from the rest of the world
468388 context :
469389 group_by : " country year month"
0 commit comments