Skip to content

InlineFirehose does not work with index_parallel ingestion #8673

@vogievetsky

Description

@vogievetsky

In Druid 0.16.0

As the title says, if you try to submit a task like:

{
  "type": "index_parallel",
  "spec": {
    "type": "index_parallel",
    "ioConfig": {
      "type": "index_parallel",
      "firehose": {
        "type": "inline",
        "data": "{\"name\":\"Vadim\"}"
      }
    },
    "tuningConfig": {
      "type": "index_parallel"
    },
    "dataSchema": {
      "dataSource": "some_data",
      "granularitySpec": {
        "type": "uniform",
        "queryGranularity": "HOUR",
        "rollup": true,
        "segmentGranularity": "DAY"
      },
      "parser": {
        "type": "string",
        "parseSpec": {
          "format": "json",
          "timestampSpec": {
            "column": "!!!_no_such_column_!!!",
            "missingValue": "2010-01-01T00:00:00Z"
          },
          "dimensionsSpec": {
            "dimensions": [
              "name"
            ]
          }
        }
      },
      "metricsSpec": [
        {
          "name": "count",
          "type": "count"
        }
      ]
    }
  }
}

You get an error of:

{"error":"Instantiation of [simple type, class org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask] value failed: [InlineFirehoseFactory] should implement FiniteFirehoseFactory"}

However changing the ingestion to just index:

{
  "type": "index",
  "spec": {
    "type": "index",
    "ioConfig": {
      "type": "index",
      "firehose": {
        "type": "inline",
        "data": "{\"name\":\"Vadim\"}"
      }
    },
    "tuningConfig": {
      "type": "index"
    },
    "dataSchema": {
      "dataSource": "some_data",
      "granularitySpec": {
        "type": "uniform",
        "queryGranularity": "HOUR",
        "rollup": true,
        "segmentGranularity": "DAY"
      },
      "parser": {
        "type": "string",
        "parseSpec": {
          "format": "json",
          "timestampSpec": {
            "column": "!!!_no_such_column_!!!",
            "missingValue": "2010-01-01T00:00:00Z"
          },
          "dimensionsSpec": {
            "dimensions": [
              "name"
            ]
          }
        }
      },
      "metricsSpec": [
        {
          "name": "count",
          "type": "count"
        }
      ]
    }
  }
}

Makes it all work

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions