Improve utility scripts with modern dependencies and enhanced features#7461
Improve utility scripts with modern dependencies and enhanced features#7461DennisOSRM merged 6 commits intomasterfrom
Conversation
- osm2cucumber.js: Switch from xml2js to fast-xml-parser for better performance - osrm-runner.js: Add verbose flag, URL path extraction, and query statistics (min/max/avg/median) - poly2req.js: Add CLI argument support with help, support multiple query types/regions, output CSV-quoted format Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
There was a problem hiding this comment.
Pull request overview
This PR modernizes and extends the repository’s Node.js utility scripts used for generating OSRM query inputs, running query benchmarks, and converting OSM extracts to Cucumber scenarios, alongside dependency/lockfile updates to support these changes.
Changes:
- Enhances
poly2req.jswith a real CLI (help/options), predefined bounding boxes, and CSV-quoted output intended to feedosrm-runner.js. - Extends
osrm-runner.jswith a--verbosemode, URL→path normalization for query files, and aggregate query timing statistics. - Updates
osm2cucumber.jsto parse OSM XML viafast-xml-parserinstead ofxml2jsand bumps various npm lockfile entries.
Reviewed changes
Copilot reviewed 3 out of 4 changed files in this pull request and generated 7 comments.
| File | Description |
|---|---|
| scripts/poly2req.js | Adds CLI parsing + bbox presets and emits CSV-quoted query URLs |
| scripts/osrm-runner.js | Adds verbose output toggle, extracts path from full URLs, and prints summary stats |
| scripts/osm2cucumber.js | Switches XML parsing implementation to fast-xml-parser |
| package-lock.json | Updates lockfile for dependency/version/engine changes |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| function makeQuery(coords) { | ||
| let coords_string = coords.map((c) => coord_templates[VERSION].replace('{lon}', c[0]).replace('{lat}', c[1])).join(coords_separators[VERSION]); | ||
| return url_templates[VERSION].replace('{coords}', coords_string).replace('{port}', PORT); | ||
| const coords_string = coords.map((c) => coord_template.replace('{lon}', c[0]).replace('{lat}', c[1])).join(coords_separator); | ||
| const path = url_templates[options.type].replace('{coords}', coords_string); | ||
| return `http://${options.host}:${options.port}${path}`; | ||
| } | ||
|
|
||
| // Generate queries based on distance or waypoint count | ||
| if (axis == 'distance') | ||
| // Output in CSV format (quoted) for compatibility with osrm-runner.js | ||
| if (options.axis == 'distance') | ||
| { | ||
| for (let i = 0; i < NUM_REQUEST; ++i) | ||
| for (let i = 0; i < options.number; ++i) | ||
| { | ||
| let coords = []; | ||
| for (let j = 0; j < NUM_COORDS; ++j) | ||
| for (let j = 0; j < options.coords; ++j) | ||
| { | ||
| coords.push(getRandomCoordinate()); | ||
| } | ||
| console.log(makeQuery(coords)); | ||
| console.log(`"${makeQuery(coords)}"`); | ||
| } | ||
| } | ||
| else if (axis == 'waypoints') | ||
| else if (options.axis == 'waypoints') | ||
| { |
There was a problem hiding this comment.
options.type and options.axis are used as unchecked selectors (url_templates[options.type], branching on options.axis). If a user passes an unexpected value, this can throw (or silently produce no output). Consider validating these options against the allowed set and exiting with a clear error + usage.
| // Collect all results for statistics | ||
| const all_results = []; | ||
| let completed_queries = 0; | ||
|
|
||
| queries.forEach(query => { | ||
| run_query(query, options.filter, (query, code, ttfb, total, results) => { | ||
| const data = results ? JSON.stringify(results[0]) : ''; | ||
| const record = [[query, code, ttfb, total, data]]; | ||
| process.stdout.write(csv_stringify(record)); | ||
| const record = { | ||
| query, | ||
| code, | ||
| ttfb: ttfb ?? 0, | ||
| total: total ?? 0, | ||
| data | ||
| }; |
There was a problem hiding this comment.
Even when --verbose is not set, the runner collects and stores data (JSON string) for every query. For large query sets this can be a significant memory hit. Consider only serializing/storing data when options.verbose is true, and otherwise keep only the fields needed for statistics.
| queries.forEach(query => { | ||
| run_query(query, options.filter, (query, code, ttfb, total, results) => { | ||
| const data = results ? JSON.stringify(results[0]) : ''; | ||
| const record = [[query, code, ttfb, total, data]]; | ||
| process.stdout.write(csv_stringify(record)); | ||
| const record = { | ||
| query, | ||
| code, | ||
| ttfb: ttfb ?? 0, | ||
| total: total ?? 0, | ||
| data | ||
| }; | ||
| all_results.push(record); | ||
| completed_queries++; | ||
|
|
||
| // When all queries are done, print statistics and CSV output | ||
| if (completed_queries === queries.length) { | ||
| print_statistics(all_results); | ||
|
|
||
| // Output CSV data only if verbose flag is set | ||
| if (options.verbose) { | ||
| const csv_records = all_results.map(r => [r.query, r.code, r.ttfb, r.total, r.data]); | ||
| process.stdout.write(csv_stringify(csv_records)); | ||
| } | ||
| } | ||
| }); | ||
| }); | ||
| }); |
There was a problem hiding this comment.
If queries.length === 0 (empty/invalid queries file, or all rows filtered out), the forEach body never runs and statistics are never printed. Handle the empty case explicitly (print stats showing 0 total, and exit with a non-zero status if appropriate).
| // Load queries from file or generate random coordinate-based queries | ||
| let queries = []; | ||
| if (options.hasOwnProperty('queries-files')) { | ||
| queries = fs.readFileSync(options['queries-files']) | ||
| .toString() | ||
| .split('\n') | ||
| .map(r => { const match = /^"([^\"]+)"/.exec(r); return match ? match[1] : null; }) | ||
| .filter(q => q); | ||
| .filter(q => q) | ||
| .map(q => { | ||
| // If query is a full URL, extract just the path portion | ||
| const url_match = /^https?:\/\/[^\/]+(\/.*?)$/.exec(q); | ||
| return url_match ? url_match[1] : q; | ||
| }); |
There was a problem hiding this comment.
The --queries-files help text says "queries in the first row", but this parser expects one query per line and extracts the first quoted field on each line. Consider aligning the option description and/or supporting a real CSV parser if the input is intended to be CSV with multiple columns.
Allow passing a .poly file or named bbox as a positional argument, restoring the previous usage: scripts/poly2req.js ./test/data/monaco.poly Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
- Fix ne bounding box init to [-Infinity, -Infinity] to handle negative coords - Fix --path option description: v2 -> v1 to match actual default - Add fast-xml-parser to devDependencies for osm2cucumber.js Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Changes
osm2cucumber.js
.$to@_prefix formatosrm-runner.js
--verboseflag to optionally print CSV output (stats always shown)poly2req.js