Skip to content

Commit 66e3379

Browse files
Allow Configurable Converters on CSV (#8858)
Merge pull request 8858
1 parent d4e10d5 commit 66e3379

6 files changed

Lines changed: 119 additions & 7 deletions

File tree

.github/actions/spelling/patterns.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ themes\.googleusercontent\.com/static/fonts/[^/]+/v\d+/[^.]+.
4848
# google_site_verification:
4949
google_site_verification: [-a-zA-Z=;:/0-9+]*
5050

51+
# Ruby-doc.org
52+
https://ruby-doc\.org/.*
53+
5154
# Contributors
5255
alphabetical order.*:.*
5356
twitter_handle: .*

docs/_docs/datafiles.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,30 @@ author: dave
148148
{% endraw %}
149149

150150
For information on how to build robust navigation for your site (especially if you have a documentation website or another type of Jekyll site with a lot of pages to organize), see [Navigation]({{ '/tutorials/navigation/' | relative_url }}).
151+
152+
## CSV/TSV Parse Options
153+
154+
The way Ruby parses CSV and TSV files can be customized with the `csv_reader` and `tsv_reader`
155+
configuration options. Each configuration key exposes the same options:
156+
157+
`converters`: What [CSV converters](https://ruby-doc.org/stdlib-2.5.0/libdoc/csv/rdoc/CSV.html#Converters) should be
158+
used when parsing the file. Available options are `integer`, `float`, `numeric`, `date`, `date_time` and
159+
`all`. By default, this list is empty.
160+
`encoding`: What encoding the files are in. Defaults to the site `encoding` configuration option.
161+
`headers`: Boolean field for whether to parse the first line of the file as headers. When `false`, it treats the
162+
first row as data. Defaults to `true`.
163+
164+
Examples:
165+
166+
```yaml
167+
csv_reader:
168+
converters:
169+
- numeric
170+
- datetime
171+
headers: true
172+
encoding: utf-8
173+
tsv_reader:
174+
converters:
175+
- all
176+
headers: false
177+
```

lib/jekyll/readers/data_reader.rb

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,9 @@ def read_data_file(path)
5959

6060
case File.extname(path).downcase
6161
when ".csv"
62-
CSV.read(path,
63-
:headers => true,
64-
:encoding => site.config["encoding"]).map(&:to_hash)
62+
CSV.read(path, **csv_config).map { |row| convert_row(row) }
6563
when ".tsv"
66-
CSV.read(path,
67-
:col_sep => "\t",
68-
:headers => true,
69-
:encoding => site.config["encoding"]).map(&:to_hash)
64+
CSV.read(path, **tsv_config).map { |row| convert_row(row) }
7065
else
7166
SafeYAML.load_file(path)
7267
end
@@ -76,5 +71,43 @@ def sanitize_filename(name)
7671
name.gsub(%r![^\w\s-]+|(?<=^|\b\s)\s+(?=$|\s?\b)!, "")
7772
.gsub(%r!\s+!, "_")
7873
end
74+
75+
private
76+
77+
# @return [Hash]
78+
def csv_config
79+
@csv_config ||= read_config("csv_reader")
80+
end
81+
82+
# @return [Hash]
83+
def tsv_config
84+
@tsv_config ||= read_config("tsv_reader", { :col_sep => "\t" })
85+
end
86+
87+
# @param config_key [String]
88+
# @param overrides [Hash]
89+
# @return [Hash]
90+
# @see https://ruby-doc.org/stdlib-2.5.0/libdoc/csv/rdoc/CSV.html#Converters
91+
def read_config(config_key, overrides = {})
92+
reader_config = config[config_key] || {}
93+
94+
defaults = {
95+
:converters => reader_config.fetch("csv_converters", []).map(&:to_sym),
96+
:headers => reader_config.fetch("headers", true),
97+
:encoding => reader_config.fetch("encoding", config["encoding"]),
98+
}
99+
100+
defaults.merge(overrides)
101+
end
102+
103+
def config
104+
@config ||= site.config
105+
end
106+
107+
# @param row [Array, CSV::Row]
108+
# @return [Array, Hash]
109+
def convert_row(row)
110+
row.instance_of?(CSV::Row) ? row.to_hash : row
111+
end
79112
end
80113
end

test/fixtures/sample.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,field_a
2+
1,"foo"
3+
2,"bar"

test/fixtures/sample.tsv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id field_a
2+
1 "foo"
3+
2 "bar"

test/test_data_reader.rb

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,47 @@ class TestDataReader < JekyllUnitTest
1414
)
1515
end
1616
end
17+
18+
context "with no csv options set" do
19+
setup do
20+
@reader = DataReader.new(fixture_site)
21+
@parsed = [{ "id" => "1", "field_a" => "foo" }, { "id" => "2", "field_a" => "bar" }]
22+
end
23+
24+
should "parse CSV normally" do
25+
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__))
26+
end
27+
28+
should "parse TSV normally" do
29+
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__))
30+
end
31+
end
32+
33+
context "with csv options set" do
34+
setup do
35+
reader_config = {
36+
"csv_converters" => [:numeric],
37+
"headers" => false,
38+
}
39+
40+
@reader = DataReader.new(
41+
fixture_site(
42+
{
43+
"csv_reader" => reader_config,
44+
"tsv_reader" => reader_config,
45+
}
46+
)
47+
)
48+
49+
@parsed = [%w(id field_a), [1, "foo"], [2, "bar"]]
50+
end
51+
52+
should "parse CSV with options" do
53+
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__))
54+
end
55+
56+
should "parse TSV with options" do
57+
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__))
58+
end
59+
end
1760
end

0 commit comments

Comments
 (0)