chromium/src/third_party/blink/renderer/core/frame/smart_clip.cc - manifest_repos/chromium_src - Git at Google

 /*
  * Copyright (C) 2013 Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
  *     * Neither the name of Google Inc. nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "third_party/blink/renderer/core/frame/smart_clip.h"

 #include "third_party/blink/renderer/core/dom/container_node.h"
 #include "third_party/blink/renderer/core/dom/document.h"
 #include "third_party/blink/renderer/core/dom/node_computed_style.h"
 #include "third_party/blink/renderer/core/dom/node_traversal.h"
 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
 #include "third_party/blink/renderer/core/frame/local_frame_view.h"
 #include "third_party/blink/renderer/core/html/html_frame_owner_element.h"
 #include "third_party/blink/renderer/core/layout/layout_object.h"
 #include "third_party/blink/renderer/core/page/page.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"

 namespace blink {

 static IntRect ConvertToContentCoordinatesWithoutCollapsingToZero(
     const IntRect& rect_in_viewport,
     const LocalFrameView* view) {
   IntRect rect_in_contents = view->ViewportToFrame(rect_in_viewport);
   if (rect_in_viewport.Width() > 0 && !rect_in_contents.Width())
     rect_in_contents.SetWidth(1);
   if (rect_in_viewport.Height() > 0 && !rect_in_contents.Height())
     rect_in_contents.SetHeight(1);
   return rect_in_contents;
 }

 static Node* NodeInsideFrame(Node* node) {
   if (auto* frame_owner_element = DynamicTo<HTMLFrameOwnerElement>(node))
     return frame_owner_element->contentDocument();
   return nullptr;
 }

 SmartClip::SmartClip(LocalFrame* frame) : frame_(frame) {}

 SmartClipData SmartClip::DataForRect(const IntRect& crop_rect_in_viewport) {
   Node* best_node =
       FindBestOverlappingNode(frame_->GetDocument(), crop_rect_in_viewport);
   if (!best_node)
     return SmartClipData();

   if (Node* node_from_frame = NodeInsideFrame(best_node)) {
     // FIXME: This code only hit-tests a single iframe. It seems like we ought
     // support nested frames.
     if (Node* best_node_in_frame =
             FindBestOverlappingNode(node_from_frame, crop_rect_in_viewport))
       best_node = best_node_in_frame;
   }

   HeapVector<Member<Node>> hit_nodes;
   CollectOverlappingChildNodes(best_node, crop_rect_in_viewport, hit_nodes);

   if (hit_nodes.IsEmpty() || hit_nodes.size() == best_node->CountChildren()) {
     hit_nodes.clear();
     hit_nodes.push_back(best_node);
   }

   // Unite won't work with the empty rect, so we initialize to the first rect.
   IntRect united_rects = hit_nodes[0]->PixelSnappedBoundingBox();
   StringBuilder collected_text;
   for (wtf_size_t i = 0; i < hit_nodes.size(); ++i) {
     collected_text.Append(ExtractTextFromNode(hit_nodes[i]));
     united_rects.Unite(hit_nodes[i]->PixelSnappedBoundingBox());
   }

   return SmartClipData(
       frame_->GetDocument()->View()->FrameToViewport(united_rects),
       collected_text.ToString());
 }

 float SmartClip::PageScaleFactor() {
   return frame_->GetPage()->PageScaleFactor();
 }

 // This function is a bit of a mystery. If you understand what it does, please
 // consider adding a more descriptive name.
 Node* SmartClip::MinNodeContainsNodes(Node* min_node, Node* new_node) {
   if (!new_node)
     return min_node;
   if (!min_node)
     return new_node;

   IntRect min_node_rect = min_node->PixelSnappedBoundingBox();
   IntRect new_node_rect = new_node->PixelSnappedBoundingBox();

   Node* parent_min_node = min_node->parentNode();
   Node* parent_new_node = new_node->parentNode();

   if (min_node_rect.Contains(new_node_rect)) {
     if (parent_min_node && parent_new_node &&
         parent_new_node->parentNode() == parent_min_node)
       return parent_min_node;
     return min_node;
   }

   if (new_node_rect.Contains(min_node_rect)) {
     if (parent_min_node && parent_new_node &&
         parent_min_node->parentNode() == parent_new_node)
       return parent_new_node;
     return new_node;
   }

   // This loop appears to find the nearest ancestor of minNode (in DOM order)
   // that contains the newNodeRect. It's very unclear to me why that's an
   // interesting node to find. Presumably this loop will often just return
   // the documentElement.
   Node* node = min_node;
   while (node) {
     if (node->GetLayoutObject()) {
       IntRect node_rect = node->PixelSnappedBoundingBox();
       if (node_rect.Contains(new_node_rect)) {
         return node;
       }
     }
     node = node->parentNode();
   }

   return nullptr;
 }

 Node* SmartClip::FindBestOverlappingNode(Node* root_node,
                                          const IntRect& crop_rect_in_viewport) {
   if (!root_node)
     return nullptr;

   IntRect resized_crop_rect =
       ConvertToContentCoordinatesWithoutCollapsingToZero(
           crop_rect_in_viewport, root_node->GetDocument().View());

   Node* node = root_node;
   Node* min_node = nullptr;

   while (node) {
     IntRect node_rect = node->PixelSnappedBoundingBox();
     auto* element = DynamicTo<Element>(node);
     if (element &&
         EqualIgnoringASCIICase(
             element->FastGetAttribute(html_names::kAriaHiddenAttr), "true")) {
       node = NodeTraversal::NextSkippingChildren(*node, root_node);
       continue;
     }

     LayoutObject* layout_object = node->GetLayoutObject();
     if (layout_object && !node_rect.IsEmpty()) {
       if (layout_object->IsText() || layout_object->IsLayoutImage() ||
           node->IsFrameOwnerElement() ||
           (layout_object->StyleRef().HasBackgroundImage() &&
            !ShouldSkipBackgroundImage(node))) {
         if (resized_crop_rect.Intersects(node_rect)) {
           min_node = MinNodeContainsNodes(min_node, node);
         } else {
           node = NodeTraversal::NextSkippingChildren(*node, root_node);
           continue;
         }
       }
     }
     node = NodeTraversal::Next(*node, root_node);
   }

   return min_node;
 }

 // This function appears to heuristically guess whether to include a background
 // image in the smart clip. It seems to want to include sprites created from
 // CSS background images but to skip actual backgrounds.
 bool SmartClip::ShouldSkipBackgroundImage(Node* node) {
   DCHECK(node);
   // Apparently we're only interested in background images on spans and divs.
   if (!IsA<HTMLSpanElement>(*node) && !IsA<HTMLDivElement>(*node))
     return true;

   // This check actually makes a bit of sense. If you're going to sprite an
   // image out of a CSS background, you're probably going to specify a height
   // or a width. On the other hand, if we've got a legit background image,
   // it's very likely the height or the width will be set to auto.
   LayoutObject* layout_object = node->GetLayoutObject();
   if (layout_object && (layout_object->StyleRef().LogicalHeight().IsAuto() ||
                         layout_object->StyleRef().LogicalWidth().IsAuto()))
     return true;

   return false;
 }

 void SmartClip::CollectOverlappingChildNodes(
     Node* parent_node,
     const IntRect& crop_rect_in_viewport,
     HeapVector<Member<Node>>& hit_nodes) {
   if (!parent_node)
     return;
   IntRect resized_crop_rect =
       ConvertToContentCoordinatesWithoutCollapsingToZero(
           crop_rect_in_viewport, parent_node->GetDocument().View());
   for (Node* child = parent_node->firstChild(); child;
        child = child->nextSibling()) {
     IntRect child_rect = child->PixelSnappedBoundingBox();
     if (resized_crop_rect.Intersects(child_rect))
       hit_nodes.push_back(child);
   }
 }

 String SmartClip::ExtractTextFromNode(Node* node) {
   // Science has proven that no text nodes are ever positioned at y == -99999.
   int prev_y_pos = -99999;

   StringBuilder result;
   for (Node& current_node : NodeTraversal::InclusiveDescendantsOf(*node)) {
     const ComputedStyle* style = current_node.GetComputedStyle();
     if (!style || style->UserSelect() == EUserSelect::kNone)
       continue;

     if (Node* node_from_frame = NodeInsideFrame(&current_node))
       result.Append(ExtractTextFromNode(node_from_frame));

     IntRect node_rect = current_node.PixelSnappedBoundingBox();
     if (current_node.GetLayoutObject() && !node_rect.IsEmpty()) {
       if (current_node.IsTextNode()) {
         String node_value = current_node.nodeValue();

         // It's unclear why we disallowed solitary "\n" node values.
         // Maybe we're trying to ignore <br> tags somehow?
         if (node_value == "\n")
           node_value = "";

         if (node_rect.Y() != prev_y_pos) {
           prev_y_pos = node_rect.Y();
           result.Append('\n');
         }

         result.Append(node_value);
       }
     }
   }

   return result.ToString();
 }

 }  // namespace blink
	/*
	* Copyright (C) 2013 Google Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are
	* met:
	*
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above
	* copyright notice, this list of conditions and the following disclaimer
	* in the documentation and/or other materials provided with the
	* distribution.
	* * Neither the name of Google Inc. nor the names of its
	* contributors may be used to endorse or promote products derived from
	* this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "third_party/blink/renderer/core/frame/smart_clip.h"

	#include "third_party/blink/renderer/core/dom/container_node.h"
	#include "third_party/blink/renderer/core/dom/document.h"
	#include "third_party/blink/renderer/core/dom/node_computed_style.h"
	#include "third_party/blink/renderer/core/dom/node_traversal.h"
	#include "third_party/blink/renderer/core/frame/local_dom_window.h"
	#include "third_party/blink/renderer/core/frame/local_frame_view.h"
	#include "third_party/blink/renderer/core/html/html_frame_owner_element.h"
	#include "third_party/blink/renderer/core/layout/layout_object.h"
	#include "third_party/blink/renderer/core/page/page.h"
	#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"

	namespace blink {

	static IntRect ConvertToContentCoordinatesWithoutCollapsingToZero(
	const IntRect& rect_in_viewport,
	const LocalFrameView* view) {
	IntRect rect_in_contents = view->ViewportToFrame(rect_in_viewport);
	if (rect_in_viewport.Width() > 0 && !rect_in_contents.Width())
	rect_in_contents.SetWidth(1);
	if (rect_in_viewport.Height() > 0 && !rect_in_contents.Height())
	rect_in_contents.SetHeight(1);
	return rect_in_contents;
	}

	static Node* NodeInsideFrame(Node* node) {
	if (auto* frame_owner_element = DynamicTo<HTMLFrameOwnerElement>(node))
	return frame_owner_element->contentDocument();
	return nullptr;
	}

	SmartClip::SmartClip(LocalFrame* frame) : frame_(frame) {}

	SmartClipData SmartClip::DataForRect(const IntRect& crop_rect_in_viewport) {
	Node* best_node =
	FindBestOverlappingNode(frame_->GetDocument(), crop_rect_in_viewport);
	if (!best_node)
	return SmartClipData();

	if (Node* node_from_frame = NodeInsideFrame(best_node)) {
	// FIXME: This code only hit-tests a single iframe. It seems like we ought
	// support nested frames.
	if (Node* best_node_in_frame =
	FindBestOverlappingNode(node_from_frame, crop_rect_in_viewport))
	best_node = best_node_in_frame;
	}

	HeapVector<Member<Node>> hit_nodes;
	CollectOverlappingChildNodes(best_node, crop_rect_in_viewport, hit_nodes);

	if (hit_nodes.IsEmpty() \|\| hit_nodes.size() == best_node->CountChildren()) {
	hit_nodes.clear();
	hit_nodes.push_back(best_node);
	}

	// Unite won't work with the empty rect, so we initialize to the first rect.
	IntRect united_rects = hit_nodes[0]->PixelSnappedBoundingBox();
	StringBuilder collected_text;
	for (wtf_size_t i = 0; i < hit_nodes.size(); ++i) {
	collected_text.Append(ExtractTextFromNode(hit_nodes[i]));
	united_rects.Unite(hit_nodes[i]->PixelSnappedBoundingBox());
	}

	return SmartClipData(
	frame_->GetDocument()->View()->FrameToViewport(united_rects),
	collected_text.ToString());
	}

	float SmartClip::PageScaleFactor() {
	return frame_->GetPage()->PageScaleFactor();
	}

	// This function is a bit of a mystery. If you understand what it does, please
	// consider adding a more descriptive name.
	Node* SmartClip::MinNodeContainsNodes(Node* min_node, Node* new_node) {
	if (!new_node)
	return min_node;
	if (!min_node)
	return new_node;

	IntRect min_node_rect = min_node->PixelSnappedBoundingBox();
	IntRect new_node_rect = new_node->PixelSnappedBoundingBox();

	Node* parent_min_node = min_node->parentNode();
	Node* parent_new_node = new_node->parentNode();

	if (min_node_rect.Contains(new_node_rect)) {
	if (parent_min_node && parent_new_node &&
	parent_new_node->parentNode() == parent_min_node)
	return parent_min_node;
	return min_node;
	}

	if (new_node_rect.Contains(min_node_rect)) {
	if (parent_min_node && parent_new_node &&
	parent_min_node->parentNode() == parent_new_node)
	return parent_new_node;
	return new_node;
	}

	// This loop appears to find the nearest ancestor of minNode (in DOM order)
	// that contains the newNodeRect. It's very unclear to me why that's an
	// interesting node to find. Presumably this loop will often just return
	// the documentElement.
	Node* node = min_node;
	while (node) {
	if (node->GetLayoutObject()) {
	IntRect node_rect = node->PixelSnappedBoundingBox();
	if (node_rect.Contains(new_node_rect)) {
	return node;
	}
	}
	node = node->parentNode();
	}

	return nullptr;
	}

	Node* SmartClip::FindBestOverlappingNode(Node* root_node,
	const IntRect& crop_rect_in_viewport) {
	if (!root_node)
	return nullptr;

	IntRect resized_crop_rect =
	ConvertToContentCoordinatesWithoutCollapsingToZero(
	crop_rect_in_viewport, root_node->GetDocument().View());

	Node* node = root_node;
	Node* min_node = nullptr;

	while (node) {
	IntRect node_rect = node->PixelSnappedBoundingBox();
	auto* element = DynamicTo<Element>(node);
	if (element &&
	EqualIgnoringASCIICase(
	element->FastGetAttribute(html_names::kAriaHiddenAttr), "true")) {
	node = NodeTraversal::NextSkippingChildren(*node, root_node);
	continue;
	}

	LayoutObject* layout_object = node->GetLayoutObject();
	if (layout_object && !node_rect.IsEmpty()) {
	if (layout_object->IsText() \|\| layout_object->IsLayoutImage() \|\|
	node->IsFrameOwnerElement() \|\|
	(layout_object->StyleRef().HasBackgroundImage() &&
	!ShouldSkipBackgroundImage(node))) {
	if (resized_crop_rect.Intersects(node_rect)) {
	min_node = MinNodeContainsNodes(min_node, node);
	} else {
	node = NodeTraversal::NextSkippingChildren(*node, root_node);
	continue;
	}
	}
	}
	node = NodeTraversal::Next(*node, root_node);
	}

	return min_node;
	}

	// This function appears to heuristically guess whether to include a background
	// image in the smart clip. It seems to want to include sprites created from
	// CSS background images but to skip actual backgrounds.
	bool SmartClip::ShouldSkipBackgroundImage(Node* node) {
	DCHECK(node);
	// Apparently we're only interested in background images on spans and divs.
	if (!IsA<HTMLSpanElement>(node) && !IsA<HTMLDivElement>(node))
	return true;

	// This check actually makes a bit of sense. If you're going to sprite an
	// image out of a CSS background, you're probably going to specify a height
	// or a width. On the other hand, if we've got a legit background image,
	// it's very likely the height or the width will be set to auto.
	LayoutObject* layout_object = node->GetLayoutObject();
	if (layout_object && (layout_object->StyleRef().LogicalHeight().IsAuto() \|\|
	layout_object->StyleRef().LogicalWidth().IsAuto()))
	return true;

	return false;
	}

	void SmartClip::CollectOverlappingChildNodes(
	Node* parent_node,
	const IntRect& crop_rect_in_viewport,
	HeapVector<Member<Node>>& hit_nodes) {
	if (!parent_node)
	return;
	IntRect resized_crop_rect =
	ConvertToContentCoordinatesWithoutCollapsingToZero(
	crop_rect_in_viewport, parent_node->GetDocument().View());
	for (Node* child = parent_node->firstChild(); child;
	child = child->nextSibling()) {
	IntRect child_rect = child->PixelSnappedBoundingBox();
	if (resized_crop_rect.Intersects(child_rect))
	hit_nodes.push_back(child);
	}
	}

	String SmartClip::ExtractTextFromNode(Node* node) {
	// Science has proven that no text nodes are ever positioned at y == -99999.
	int prev_y_pos = -99999;

	StringBuilder result;
	for (Node& current_node : NodeTraversal::InclusiveDescendantsOf(*node)) {
	const ComputedStyle* style = current_node.GetComputedStyle();
	if (!style \|\| style->UserSelect() == EUserSelect::kNone)
	continue;

	if (Node* node_from_frame = NodeInsideFrame(&current_node))
	result.Append(ExtractTextFromNode(node_from_frame));

	IntRect node_rect = current_node.PixelSnappedBoundingBox();
	if (current_node.GetLayoutObject() && !node_rect.IsEmpty()) {
	if (current_node.IsTextNode()) {
	String node_value = current_node.nodeValue();

	// It's unclear why we disallowed solitary "\n" node values.
	// Maybe we're trying to ignore <br> tags somehow?
	if (node_value == "\n")
	node_value = "";

	if (node_rect.Y() != prev_y_pos) {
	prev_y_pos = node_rect.Y();
	result.Append('\n');
	}

	result.Append(node_value);
	}
	}
	}

	return result.ToString();
	}

	} // namespace blink