The disk creation was done with terraform on rack2 against a TF plan I've been using for at least 6 months without problems. Here is the error I got when the system attempted to create 20 disks, 10GiB each in size (TF uses 10 concurrent thread by default):
oxide_disk.boot[7]: Creating...
oxide_disk.boot[15]: Creating...
oxide_disk.boot[13]: Creating...
oxide_disk.boot[17]: Creating...
oxide_disk.boot[14]: Creating...
oxide_disk.boot[10]: Creating...
oxide_disk.boot[8]: Creating...
oxide_disk.boot[1]: Creating...
oxide_disk.boot[0]: Creating...
oxide_vpc_subnet.app: Creation complete after 1s [id=ac5d0381-aaa4-4189-8552-c6f4b734f29c]
oxide_disk.boot[11]: Creating...
oxide_disk.boot[18]: Creating...
oxide_disk.boot[4]: Creating...
oxide_disk.boot[6]: Creating...
oxide_disk.boot[12]: Creating...
oxide_disk.boot[5]: Creating...
oxide_disk.boot[2]: Creating...
oxide_disk.boot[16]: Creating...
oxide_disk.boot[9]: Creating...
oxide_disk.boot[3]: Creating...
oxide_disk.boot[19]: Creating...
oxide_disk.boot[5]: Creation complete after 4s [id=f2f26899-75be-417a-8bb6-592dd41d1335]
oxide_disk.boot[18]: Creation complete after 6s [id=471b62f8-5a17-4892-9320-99bdf1c2034e]
╷
│ Error: Error creating disk
│
│ with oxide_disk.boot[10],
│ on app.tf line 59, in resource "oxide_disk" "boot":
│ 59: resource "oxide_disk" "boot" {
│
│ API error: POST https://oxide.sys.rack2.eng.oxide.computer/v1/disks?project=5e49b6de-cb2d-438d-83af-95c415bbb901
│ ----------- RESPONSE -----------
│ Status: 500 Internal
│ Message: Internal Server Error
│ RequestID: 65ec3fc7-ee1c-4acc-95d2-525dc61aab78
│ ------- RESPONSE HEADERS -------
│ Content-Type: [application/json]
│ X-Request-Id: [65ec3fc7-ee1c-4acc-95d2-525dc61aab78]
│ Date: [Thu, 21 Mar 2024 04:43:12 GMT]
│ Content-Length: [124]
│
╵
╷
│ Error: Error creating disk
│
│ with oxide_disk.boot[11],
│ on app.tf line 59, in resource "oxide_disk" "boot":
│ 59: resource "oxide_disk" "boot" {
│
│ API error: POST https://oxide.sys.rack2.eng.oxide.computer/v1/disks?project=5e49b6de-cb2d-438d-83af-95c415bbb901
│ ----------- RESPONSE -----------
│ Status: 500 Internal
│ Message: Internal Server Error
│ RequestID: 9d553973-b41b-4ec7-b35d-3bd82ecea37f
│ ------- RESPONSE HEADERS -------
│ Content-Type: [application/json]
│ X-Request-Id: [9d553973-b41b-4ec7-b35d-3bd82ecea37f]
│ Date: [Thu, 21 Mar 2024 04:43:12 GMT]
│ Content-Length: [124]
│
The errors in the Nexus log all complain about memory budget being exceeded, e.g.
root@oxz_nexus_65a11c18:~# grep 'Internal Server' /var/svc/log/oxide-nexus\:default.log | looker
04:43:12.830Z INFO 65a11c18-7f59-41ac-b9e7-680627f996e7 (dropshot_external): request completed
error_message_external = Internal Server Error
error_message_internal = saga ACTION error at node "datasets_and_regions": unexpected database error: scan with start key /Table/434/2/"i\\xf0\\xb8c\\xf7?B\\xb2\\x98\\"\\xb2\u{2d9}\\xf0\\x90\\x03": root: memory budget exceeded: 133120 bytes requested, 134104377 currently allocated, 134217728 bytes in budget
file = /home/build/.cargo/git/checkouts/dropshot-a4a923d29dccc492/29ae98d/dropshot/src/server.rs:837
latency_us = 305395
local_addr = 172.30.2.5:443
method = POST
remote_addr = 172.20.17.42:60877
req_id = 18940d64-d8c1-40a8-9e97-01c03a5cf957
response_code = 500
uri = https://oxide.sys.rack2.eng.oxide.computer/v1/disks?project=5e49b6de-cb2d-438d-83af-95c415bbb901
04:43:12.830Z INFO 65a11c18-7f59-41ac-b9e7-680627f996e7 (dropshot_external): request completed
error_message_external = Internal Server Error
error_message_internal = saga ACTION error at node "datasets_and_regions": unexpected database error: scan with start key /Table/434/2/"\\x13\\x86c\\xad\\xa3\\x82E\\x95\\xba\\xf0\\b\\xf6\\xb0'jg": root: memory budget exceeded: 133120 bytes requested, 134104377 currently allocated, 134217728 bytes in budget
file = /home/build/.cargo/git/checkouts/dropshot-a4a923d29dccc492/29ae98d/dropshot/src/server.rs:837
latency_us = 304769
local_addr = 172.30.2.5:443
method = POST
remote_addr = 172.20.17.42:60877
req_id = 65ec3fc7-ee1c-4acc-95d2-525dc61aab78
response_code = 500
uri = https://oxide.sys.rack2.eng.oxide.computer/v1/disks?project=5e49b6de-cb2d-438d-83af-95c415bbb901
04:43:12.961Z INFO 65a11c18-7f59-41ac-b9e7-680627f996e7 (dropshot_external): request completed
error_message_external = Internal Server Error
error_message_internal = saga ACTION error at node "datasets_and_regions": unexpected database error: root: memory budget exceeded: 40960 bytes requested, 134190929 currently allocated, 134217728 bytes in budget
file = /home/build/.cargo/git/checkouts/dropshot-a4a923d29dccc492/29ae98d/dropshot/src/server.rs:837
latency_us = 435717
local_addr = 172.30.2.5:443
method = POST
remote_addr = 172.20.17.42:60877
req_id = 66546451-fac1-4c58-8a66-217afd0c71fc
response_code = 500
uri = https://oxide.sys.rack2.eng.oxide.computer/v1/disks?project=5e49b6de-cb2d-438d-83af-95c415bbb901
{
"block_size": 512,
"description": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9 test instance ",
"device_path": "/mnt/prov-time-32c-64m",
"id": "0228bbb9-07b3-4fd0-80be-1f80546d7baf",
"image_id": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9",
"name": "prov-time-32c-64m",
"project_id": "5e49b6de-cb2d-438d-83af-95c415bbb901",
"size": 68719476736,
"snapshot_id": null,
"state": {
"state": "creating"
},
"time_created": "2024-03-21T04:48:52.648048Z",
"time_modified": "2024-03-21T04:48:52.648048Z"
}
{
"block_size": 512,
"description": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9 test instance ",
"device_path": "/mnt/prov-time-32c-96m",
"id": "fc8c8f39-bd1d-4d81-94a3-9400c020f554",
"image_id": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9",
"name": "prov-time-32c-96m",
"project_id": "5e49b6de-cb2d-438d-83af-95c415bbb901",
"size": 103079215104,
"snapshot_id": null,
"state": {
"state": "creating"
},
"time_created": "2024-03-21T04:48:57.152238Z",
"time_modified": "2024-03-21T04:48:57.152238Z"
}
{
"block_size": 512,
"description": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9 test instance ",
"device_path": "/mnt/prov-time-32c-128m",
"id": "12e6a8c6-7f46-4a27-a23d-b6536622e196",
"image_id": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9",
"name": "prov-time-32c-128m",
"project_id": "5e49b6de-cb2d-438d-83af-95c415bbb901",
"size": 137438953472,
"snapshot_id": null,
"state": {
"state": "creating"
},
"time_created": "2024-03-21T04:49:02.273872Z",
"time_modified": "2024-03-21T04:49:02.273872Z"
}
{
"block_size": 512,
"description": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9 test instance ",
"device_path": "/mnt/prov-time-32c-256m",
"id": "1dab86ec-e1e2-4d3f-a4f2-40b0718a8661",
"image_id": "cb6eb1e9-69fd-40ad-9373-83926f8b32d9",
"name": "prov-time-32c-256m",
"project_id": "5e49b6de-cb2d-438d-83af-95c415bbb901",
"size": 274877906944,
"snapshot_id": null,
"state": {
"state": "creating"
},
"time_created": "2024-03-21T04:49:09.296486Z",
"time_modified": "2024-03-21T04:49:09.296486Z"
}
The disk creation was done with terraform on rack2 against a TF plan I've been using for at least 6 months without problems. Here is the error I got when the system attempted to create 20 disks, 10GiB each in size (TF uses 10 concurrent thread by default):
The errors in the Nexus log all complain about memory budget being exceeded, e.g.
The error didn't occur when I created disks sequentially, even though these were much bigger disks: