diff --git a/lib/mindee/image/image_extractor.rb b/lib/mindee/image/image_extractor.rb index e8231a36..d26e20f9 100644 --- a/lib/mindee/image/image_extractor.rb +++ b/lib/mindee/image/image_extractor.rb @@ -37,35 +37,33 @@ def self.extract_multiple_images_from_source(input_source, page_id, polygons) new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id) new_stream.seek(0) - extract_images_from_polygons(input_source, new_stream, page_id, polygons) + extract_images_from_polygons(input_source, page_id, polygons) end # Extracts images from their positions on a file (as polygons). # # @param [Input::Source::LocalInputSource] input_source Local input source. - # @param [StringIO] pdf_stream Buffer of the PDF. # @param [Integer] page_id Page ID. # @param [Array] polygons # @return [Array] Extracted Images. - def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons) + def self.extract_images_from_polygons(input_source, page_id, polygons) extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage] + input_source.io_stream.rewind + pdf_stream = StringIO.new(input_source.io_stream.read.to_s) + input_source.io_stream.rewind polygons.each_with_index do |polygon, element_id| polygon = ImageUtils.normalize_polygon(polygon) page_content = ImageUtils.read_page_content(pdf_stream) + points = [ + polygon.top_left, + polygon.bottom_right, + polygon.top_right, + polygon.bottom_left, + ] - min_max_x = Geometry.get_min_max_x([ - polygon.top_left, - polygon.bottom_right, - polygon.top_right, - polygon.bottom_left, - ]) - min_max_y = Geometry.get_min_max_y([ - polygon.top_left, - polygon.bottom_right, - polygon.top_right, - polygon.bottom_left, - ]) + min_max_x = Geometry.get_min_max_x(points) + min_max_y = Geometry.get_min_max_y(points) file_extension = ImageUtils.determine_file_extension(input_source) cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y) if file_extension == 'pdf' diff --git a/sig/mindee/image/image_extractor.rbs b/sig/mindee/image/image_extractor.rbs index 5f57c512..c873379f 100644 --- a/sig/mindee/image/image_extractor.rbs +++ b/sig/mindee/image/image_extractor.rbs @@ -5,7 +5,7 @@ module Mindee def self.attach_image_as_new_file: (StringIO | File, ?format: String) -> Origami::PDF def self.to_blob: () -> String def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage] - def self.extract_images_from_polygons: (Input::Source::LocalInputSource, StringIO | File, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage] + def self.extract_images_from_polygons: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage] def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> ExtractedImage def self.load_input_source_pdf_page_as_stringio: (Input::Source::LocalInputSource, Integer) -> (StringIO | File) end