Skip to content

Commit 900acbd

Browse files
authored
Merge branch 'main' into conroy/test-fsharp
2 parents b5a4c4b + bf35048 commit 900acbd

10 files changed

Lines changed: 333 additions & 11 deletions

File tree

packages/@aws-cdk/aws-glue/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,31 @@ myTable.addPartitionIndex({
264264
});
265265
```
266266

267+
### Partition Filtering
268+
269+
If you have a table with a large number of partitions that grows over time, consider using AWS Glue partition indexing and filtering.
270+
271+
```ts
272+
declare const myDatabase: glue.Database;
273+
new glue.Table(this, 'MyTable', {
274+
database: myDatabase,
275+
tableName: 'my_table',
276+
columns: [{
277+
name: 'col1',
278+
type: glue.Schema.STRING,
279+
}],
280+
partitionKeys: [{
281+
name: 'year',
282+
type: glue.Schema.SMALL_INT,
283+
}, {
284+
name: 'month',
285+
type: glue.Schema.SMALL_INT,
286+
}],
287+
dataFormat: glue.DataFormat.JSON,
288+
enablePartitionFiltering: true,
289+
});
290+
```
291+
267292
## [Encryption](https://docs.aws.amazon.com/athena/latest/ug/encryption.html)
268293

269294
You can enable encryption on a Table's data:

packages/@aws-cdk/aws-glue/lib/table.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,15 @@ export interface TableProps {
172172
* @default false
173173
*/
174174
readonly storedAsSubDirectories?: boolean;
175+
176+
/**
177+
* Enables partition filtering.
178+
*
179+
* @see https://docs.aws.amazon.com/athena/latest/ug/glue-best-practices.html#glue-best-practices-partition-index
180+
*
181+
* @default - The parameter is not defined
182+
*/
183+
readonly enablePartitionFiltering?: boolean;
175184
}
176185

177186
/**
@@ -302,8 +311,9 @@ export class Table extends Resource implements ITable {
302311
partitionKeys: renderColumns(props.partitionKeys),
303312

304313
parameters: {
305-
classification: props.dataFormat.classificationString?.value,
306-
has_encrypted_data: this.encryption !== TableEncryption.UNENCRYPTED,
314+
'classification': props.dataFormat.classificationString?.value,
315+
'has_encrypted_data': this.encryption !== TableEncryption.UNENCRYPTED,
316+
'partition_filtering.enabled': props.enablePartitionFiltering,
307317
},
308318
storageDescriptor: {
309319
location: `s3://${this.bucket.bucketName}/${this.s3Prefix}`,

packages/@aws-cdk/aws-glue/test/integ.table.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,14 @@ const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', {
8787
encryptionKey: new kms.Key(stack, 'MyKey'),
8888
});
8989

90+
new glue.Table(stack, 'MyPartitionFilteredTable', {
91+
database,
92+
tableName: 'partition_filtered_table',
93+
columns,
94+
dataFormat: glue.DataFormat.JSON,
95+
enablePartitionFiltering: true,
96+
});
97+
9098
const user = new iam.User(stack, 'MyUser');
9199
csvTable.grantReadWrite(user);
92100
encryptedTable.grantReadWrite(user);

packages/@aws-cdk/aws-glue/test/table.integ.snapshot/aws-cdk-glue.assets.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
{
2-
"version": "17.0.0",
2+
"version": "20.0.0",
33
"files": {
4-
"92638b7a8efe38efd7c845883423f3767018a9e5bd3d67d8d638332f054d0d0f": {
4+
"419b39f03d496de4fb02e795181e9a2ab218fb90bf7a5c9354cf93baa6fea2cf": {
55
"source": {
66
"path": "aws-cdk-glue.template.json",
77
"packaging": "file"
88
},
99
"destinations": {
1010
"current_account-current_region": {
1111
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
12-
"objectKey": "92638b7a8efe38efd7c845883423f3767018a9e5bd3d67d8d638332f054d0d0f.json",
12+
"objectKey": "419b39f03d496de4fb02e795181e9a2ab218fb90bf7a5c9354cf93baa6fea2cf.json",
1313
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
1414
}
1515
}

packages/@aws-cdk/aws-glue/test/table.integ.snapshot/aws-cdk-glue.template.json

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,76 @@
423423
}
424424
}
425425
},
426+
"MyPartitionFilteredTableBucket6ACAA137": {
427+
"Type": "AWS::S3::Bucket",
428+
"UpdateReplacePolicy": "Retain",
429+
"DeletionPolicy": "Retain"
430+
},
431+
"MyPartitionFilteredTable324BA27A": {
432+
"Type": "AWS::Glue::Table",
433+
"Properties": {
434+
"CatalogId": {
435+
"Ref": "AWS::AccountId"
436+
},
437+
"DatabaseName": {
438+
"Ref": "MyDatabase1E2517DB"
439+
},
440+
"TableInput": {
441+
"Description": "partition_filtered_table generated by CDK",
442+
"Name": "partition_filtered_table",
443+
"Parameters": {
444+
"classification": "json",
445+
"has_encrypted_data": false,
446+
"partition_filtering.enabled": true
447+
},
448+
"StorageDescriptor": {
449+
"Columns": [
450+
{
451+
"Name": "col1",
452+
"Type": "string"
453+
},
454+
{
455+
"Comment": "col2 comment",
456+
"Name": "col2",
457+
"Type": "string"
458+
},
459+
{
460+
"Name": "col3",
461+
"Type": "array<string>"
462+
},
463+
{
464+
"Name": "col4",
465+
"Type": "map<string,string>"
466+
},
467+
{
468+
"Name": "col5",
469+
"Type": "struct<col1:string>"
470+
}
471+
],
472+
"Compressed": false,
473+
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
474+
"Location": {
475+
"Fn::Join": [
476+
"",
477+
[
478+
"s3://",
479+
{
480+
"Ref": "MyPartitionFilteredTableBucket6ACAA137"
481+
},
482+
"/"
483+
]
484+
]
485+
},
486+
"OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
487+
"SerdeInfo": {
488+
"SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe"
489+
},
490+
"StoredAsSubDirectories": false
491+
},
492+
"TableType": "EXTERNAL_TABLE"
493+
}
494+
}
495+
},
426496
"MyUserDC45028B": {
427497
"Type": "AWS::IAM::User"
428498
},
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"version":"17.0.0"}
1+
{"version":"20.0.0"}

packages/@aws-cdk/aws-glue/test/table.integ.snapshot/integ.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"version": "18.0.0",
2+
"version": "20.0.0",
33
"testCases": {
4-
"aws-glue/test/integ.table": {
4+
"integ.table": {
55
"stacks": [
66
"aws-cdk-glue"
77
],

packages/@aws-cdk/aws-glue/test/table.integ.snapshot/manifest.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"version": "17.0.0",
2+
"version": "20.0.0",
33
"artifacts": {
44
"Tree": {
55
"type": "cdk:tree",
@@ -69,6 +69,18 @@
6969
"data": "MyEncryptedTable981A88C6"
7070
}
7171
],
72+
"/aws-cdk-glue/MyPartitionFilteredTable/Bucket/Resource": [
73+
{
74+
"type": "aws:cdk:logicalId",
75+
"data": "MyPartitionFilteredTableBucket6ACAA137"
76+
}
77+
],
78+
"/aws-cdk-glue/MyPartitionFilteredTable/Table": [
79+
{
80+
"type": "aws:cdk:logicalId",
81+
"data": "MyPartitionFilteredTable324BA27A"
82+
}
83+
],
7284
"/aws-cdk-glue/MyUser/Resource": [
7385
{
7486
"type": "aws:cdk:logicalId",

packages/@aws-cdk/aws-glue/test/table.integ.snapshot/tree.json

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"path": "Tree",
1010
"constructInfo": {
1111
"fqn": "constructs.Construct",
12-
"version": "10.0.9"
12+
"version": "10.1.33"
1313
}
1414
},
1515
"aws-cdk-glue": {
@@ -596,6 +596,111 @@
596596
"version": "0.0.0"
597597
}
598598
},
599+
"MyPartitionFilteredTable": {
600+
"id": "MyPartitionFilteredTable",
601+
"path": "aws-cdk-glue/MyPartitionFilteredTable",
602+
"children": {
603+
"Bucket": {
604+
"id": "Bucket",
605+
"path": "aws-cdk-glue/MyPartitionFilteredTable/Bucket",
606+
"children": {
607+
"Resource": {
608+
"id": "Resource",
609+
"path": "aws-cdk-glue/MyPartitionFilteredTable/Bucket/Resource",
610+
"attributes": {
611+
"aws:cdk:cloudformation:type": "AWS::S3::Bucket",
612+
"aws:cdk:cloudformation:props": {}
613+
},
614+
"constructInfo": {
615+
"fqn": "@aws-cdk/aws-s3.CfnBucket",
616+
"version": "0.0.0"
617+
}
618+
}
619+
},
620+
"constructInfo": {
621+
"fqn": "@aws-cdk/aws-s3.Bucket",
622+
"version": "0.0.0"
623+
}
624+
},
625+
"Table": {
626+
"id": "Table",
627+
"path": "aws-cdk-glue/MyPartitionFilteredTable/Table",
628+
"attributes": {
629+
"aws:cdk:cloudformation:type": "AWS::Glue::Table",
630+
"aws:cdk:cloudformation:props": {
631+
"catalogId": {
632+
"Ref": "AWS::AccountId"
633+
},
634+
"databaseName": {
635+
"Ref": "MyDatabase1E2517DB"
636+
},
637+
"tableInput": {
638+
"name": "partition_filtered_table",
639+
"description": "partition_filtered_table generated by CDK",
640+
"parameters": {
641+
"classification": "json",
642+
"has_encrypted_data": false,
643+
"partition_filtering.enabled": true
644+
},
645+
"storageDescriptor": {
646+
"location": {
647+
"Fn::Join": [
648+
"",
649+
[
650+
"s3://",
651+
{
652+
"Ref": "MyPartitionFilteredTableBucket6ACAA137"
653+
},
654+
"/"
655+
]
656+
]
657+
},
658+
"compressed": false,
659+
"storedAsSubDirectories": false,
660+
"columns": [
661+
{
662+
"name": "col1",
663+
"type": "string"
664+
},
665+
{
666+
"name": "col2",
667+
"type": "string",
668+
"comment": "col2 comment"
669+
},
670+
{
671+
"name": "col3",
672+
"type": "array<string>"
673+
},
674+
{
675+
"name": "col4",
676+
"type": "map<string,string>"
677+
},
678+
{
679+
"name": "col5",
680+
"type": "struct<col1:string>"
681+
}
682+
],
683+
"inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
684+
"outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
685+
"serdeInfo": {
686+
"serializationLibrary": "org.openx.data.jsonserde.JsonSerDe"
687+
}
688+
},
689+
"tableType": "EXTERNAL_TABLE"
690+
}
691+
}
692+
},
693+
"constructInfo": {
694+
"fqn": "@aws-cdk/aws-glue.CfnTable",
695+
"version": "0.0.0"
696+
}
697+
}
698+
},
699+
"constructInfo": {
700+
"fqn": "@aws-cdk/aws-glue.Table",
701+
"version": "0.0.0"
702+
}
703+
},
599704
"MyUser": {
600705
"id": "MyUser",
601706
"path": "aws-cdk-glue/MyUser",

0 commit comments

Comments
 (0)