From 2fc2c039518bab17950d003c8b3923238b4a937d Mon Sep 17 00:00:00 2001 From: Igor Fedoronchuk Date: Mon, 15 Jun 2026 12:58:45 +0200 Subject: [PATCH] fix: compose successive batch_slice_columns calls (#186) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit batch_slice_columns memoized a single index set for the whole import, so a second call reused the first call's column positions against already-sliced rows — corrupting values instead of narrowing further (e.g. the name column received the last_name). Reset the working headers to the parsed source set at the start of each batch, so each call computes indices from the current headers and successive calls narrow the previous call's result. @source_headers holds the complete parsed header row; @headers is its per-batch working copy. Cover successive calls across one and multiple batches. --- lib/active_admin_import/importer.rb | 40 ++++++++++------------ spec/fixtures/files/authors_many.csv | 6 ++++ spec/import_spec.rb | 50 ++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 22 deletions(-) create mode 100644 spec/fixtures/files/authors_many.csv diff --git a/lib/active_admin_import/importer.rb b/lib/active_admin_import/importer.rb index 4272476..47b38e6 100644 --- a/lib/active_admin_import/importer.rb +++ b/lib/active_admin_import/importer.rb @@ -82,23 +82,15 @@ def batch_replace(header_key, options) # end # def batch_slice_columns(slice_columns) - # Only set @use_indexes for the first batch so that @use_indexes are in correct - # position for subsequent batches - unless defined?(@use_indexes) - @use_indexes = [] - headers.values.each_with_index do |val, index| - @use_indexes << index if val.in?(slice_columns) - end - return csv_lines if @use_indexes.empty? - - # slice CSV headers - @headers = headers.to_a.values_at(*@use_indexes).to_h - end + columns = headers.values + indexes = columns.each_index.select { |i| columns[i].in?(slice_columns) } + return csv_lines if indexes.empty? - # slice CSV values - csv_lines.map! do |line| - line.values_at(*@use_indexes) - end + # @headers is reset to the full set at the start of every batch (see + # #batch_import), so each call narrows the previous call's result and every + # batch slices the same way — calling this more than once now composes (#186). + @headers = headers.to_a.values_at(*indexes).to_h + csv_lines.map! { |line| line.values_at(*indexes) } end def values_at(header_key) @@ -129,17 +121,18 @@ def process_file end def prepare_headers - headers = self.headers.present? ? self.headers : yield - blank_positions = headers.each_index.select { |i| headers[i].to_s.strip.empty? } + names = self.headers.present? ? self.headers : yield + blank_positions = names.each_index.select { |i| names[i].to_s.strip.empty? } unless blank_positions.empty? raise ActiveAdminImport::Exception, "blank column header at column #{blank_positions.map { |i| i + 1 }.join(', ')}" end - headers = headers.map(&:to_s) - @headers = Hash[headers.zip(headers.map { |el| el.underscore.gsub(/\s+/, '_') })].with_indifferent_access - @headers.merge!(options[:headers_rewrites].symbolize_keys.slice(*@headers.symbolize_keys.keys)) - @headers + names = names.map(&:to_s) + # @source_headers is the complete parsed header row; batch_import copies it + # into the per-batch working @headers (see #batch_import). + @source_headers = Hash[names.zip(names.map { |el| el.underscore.gsub(/\s+/, '_') })].with_indifferent_access + @source_headers.merge!(options[:headers_rewrites].symbolize_keys.slice(*@source_headers.symbolize_keys.keys)) end def run_callback(name) @@ -148,6 +141,9 @@ def run_callback(name) def batch_import batch_result = nil + # Every batch re-parses full-width rows, so restore the full header set + # before slicing; batch_slice_columns then narrows this working copy. + @headers = @source_headers.dup @resource.transaction do run_callback(:before_batch_import) batch_result = resource.import(headers.values, csv_lines, import_options) diff --git a/spec/fixtures/files/authors_many.csv b/spec/fixtures/files/authors_many.csv new file mode 100644 index 0000000..7f1e037 --- /dev/null +++ b/spec/fixtures/files/authors_many.csv @@ -0,0 +1,6 @@ +Birthday,Name,Last name +1986-05-01,John,Doe +1988-11-16,Jane,Roe +1990-01-01,Jack,Smith +1991-02-02,Jill,Jones +1992-03-03,Joe,Brown diff --git a/spec/import_spec.rb b/spec/import_spec.rb index f904cf8..e44a500 100644 --- a/spec/import_spec.rb +++ b/spec/import_spec.rb @@ -656,6 +656,56 @@ def upload_file!(name, ext = 'csv') end end + # Issue #186: each call must slice the result of the previous one, not re-apply + # the first call's indices to an already-sliced row. + context "with successive batch_slice_columns calls" do + before do + # validate: false isolates the slicing behaviour from the model's + # uniqueness validation, which would otherwise reject a later batch's + # author for sharing a NULL last_name with an earlier one. + add_author_resource template_object: ActiveAdminImport::Model.new, + validate: false, + before_batch_import: lambda { |importer| + importer.batch_slice_columns(%w(name last_name)) + importer.batch_slice_columns(%w(name)) + }, + batch_size: batch_size + visit "/admin/authors/import" + upload_file!(fixture) + end + + context "within a single batch" do + let(:batch_size) { 2 } # authors.csv has 2 rows -> 1 batch + let(:fixture) { :authors } + + it "narrows the columns progressively" do + expect(Author.pluck(:name, :last_name, :birthday)).to match_array( + [ + ["Jane", nil, nil], + ["John", nil, nil] + ] + ) + end + end + + context "across several batches" do + let(:batch_size) { 2 } # authors_many.csv has 5 rows -> 3 batches + let(:fixture) { :authors_many } + + it "narrows the columns progressively in every batch" do + expect(Author.pluck(:name, :last_name, :birthday)).to match_array( + [ + ["John", nil, nil], + ["Jane", nil, nil], + ["Jack", nil, nil], + ["Jill", nil, nil], + ["Joe", nil, nil] + ] + ) + end + end + end + context 'with invalid options' do let(:options) { { invalid_option: :invalid_value } }