| /* |
| * Copyright (C) 2013 Google Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following disclaimer |
| * in the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of Google Inc. nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "third_party/blink/renderer/core/frame/smart_clip.h" |
| |
| #include "third_party/blink/renderer/core/dom/container_node.h" |
| #include "third_party/blink/renderer/core/dom/document.h" |
| #include "third_party/blink/renderer/core/dom/node_computed_style.h" |
| #include "third_party/blink/renderer/core/dom/node_traversal.h" |
| #include "third_party/blink/renderer/core/frame/local_dom_window.h" |
| #include "third_party/blink/renderer/core/frame/local_frame_view.h" |
| #include "third_party/blink/renderer/core/html/html_frame_owner_element.h" |
| #include "third_party/blink/renderer/core/layout/layout_object.h" |
| #include "third_party/blink/renderer/core/page/page.h" |
| #include "third_party/blink/renderer/platform/wtf/text/string_builder.h" |
| |
| namespace blink { |
| |
| static IntRect ConvertToContentCoordinatesWithoutCollapsingToZero( |
| const IntRect& rect_in_viewport, |
| const LocalFrameView* view) { |
| IntRect rect_in_contents = view->ViewportToFrame(rect_in_viewport); |
| if (rect_in_viewport.Width() > 0 && !rect_in_contents.Width()) |
| rect_in_contents.SetWidth(1); |
| if (rect_in_viewport.Height() > 0 && !rect_in_contents.Height()) |
| rect_in_contents.SetHeight(1); |
| return rect_in_contents; |
| } |
| |
| static Node* NodeInsideFrame(Node* node) { |
| if (auto* frame_owner_element = DynamicTo<HTMLFrameOwnerElement>(node)) |
| return frame_owner_element->contentDocument(); |
| return nullptr; |
| } |
| |
| SmartClip::SmartClip(LocalFrame* frame) : frame_(frame) {} |
| |
| SmartClipData SmartClip::DataForRect(const IntRect& crop_rect_in_viewport) { |
| Node* best_node = |
| FindBestOverlappingNode(frame_->GetDocument(), crop_rect_in_viewport); |
| if (!best_node) |
| return SmartClipData(); |
| |
| if (Node* node_from_frame = NodeInsideFrame(best_node)) { |
| // FIXME: This code only hit-tests a single iframe. It seems like we ought |
| // support nested frames. |
| if (Node* best_node_in_frame = |
| FindBestOverlappingNode(node_from_frame, crop_rect_in_viewport)) |
| best_node = best_node_in_frame; |
| } |
| |
| HeapVector<Member<Node>> hit_nodes; |
| CollectOverlappingChildNodes(best_node, crop_rect_in_viewport, hit_nodes); |
| |
| if (hit_nodes.IsEmpty() || hit_nodes.size() == best_node->CountChildren()) { |
| hit_nodes.clear(); |
| hit_nodes.push_back(best_node); |
| } |
| |
| // Unite won't work with the empty rect, so we initialize to the first rect. |
| IntRect united_rects = hit_nodes[0]->PixelSnappedBoundingBox(); |
| StringBuilder collected_text; |
| for (wtf_size_t i = 0; i < hit_nodes.size(); ++i) { |
| collected_text.Append(ExtractTextFromNode(hit_nodes[i])); |
| united_rects.Unite(hit_nodes[i]->PixelSnappedBoundingBox()); |
| } |
| |
| return SmartClipData( |
| frame_->GetDocument()->View()->FrameToViewport(united_rects), |
| collected_text.ToString()); |
| } |
| |
| float SmartClip::PageScaleFactor() { |
| return frame_->GetPage()->PageScaleFactor(); |
| } |
| |
| // This function is a bit of a mystery. If you understand what it does, please |
| // consider adding a more descriptive name. |
| Node* SmartClip::MinNodeContainsNodes(Node* min_node, Node* new_node) { |
| if (!new_node) |
| return min_node; |
| if (!min_node) |
| return new_node; |
| |
| IntRect min_node_rect = min_node->PixelSnappedBoundingBox(); |
| IntRect new_node_rect = new_node->PixelSnappedBoundingBox(); |
| |
| Node* parent_min_node = min_node->parentNode(); |
| Node* parent_new_node = new_node->parentNode(); |
| |
| if (min_node_rect.Contains(new_node_rect)) { |
| if (parent_min_node && parent_new_node && |
| parent_new_node->parentNode() == parent_min_node) |
| return parent_min_node; |
| return min_node; |
| } |
| |
| if (new_node_rect.Contains(min_node_rect)) { |
| if (parent_min_node && parent_new_node && |
| parent_min_node->parentNode() == parent_new_node) |
| return parent_new_node; |
| return new_node; |
| } |
| |
| // This loop appears to find the nearest ancestor of minNode (in DOM order) |
| // that contains the newNodeRect. It's very unclear to me why that's an |
| // interesting node to find. Presumably this loop will often just return |
| // the documentElement. |
| Node* node = min_node; |
| while (node) { |
| if (node->GetLayoutObject()) { |
| IntRect node_rect = node->PixelSnappedBoundingBox(); |
| if (node_rect.Contains(new_node_rect)) { |
| return node; |
| } |
| } |
| node = node->parentNode(); |
| } |
| |
| return nullptr; |
| } |
| |
| Node* SmartClip::FindBestOverlappingNode(Node* root_node, |
| const IntRect& crop_rect_in_viewport) { |
| if (!root_node) |
| return nullptr; |
| |
| IntRect resized_crop_rect = |
| ConvertToContentCoordinatesWithoutCollapsingToZero( |
| crop_rect_in_viewport, root_node->GetDocument().View()); |
| |
| Node* node = root_node; |
| Node* min_node = nullptr; |
| |
| while (node) { |
| IntRect node_rect = node->PixelSnappedBoundingBox(); |
| auto* element = DynamicTo<Element>(node); |
| if (element && |
| EqualIgnoringASCIICase( |
| element->FastGetAttribute(html_names::kAriaHiddenAttr), "true")) { |
| node = NodeTraversal::NextSkippingChildren(*node, root_node); |
| continue; |
| } |
| |
| LayoutObject* layout_object = node->GetLayoutObject(); |
| if (layout_object && !node_rect.IsEmpty()) { |
| if (layout_object->IsText() || layout_object->IsLayoutImage() || |
| node->IsFrameOwnerElement() || |
| (layout_object->StyleRef().HasBackgroundImage() && |
| !ShouldSkipBackgroundImage(node))) { |
| if (resized_crop_rect.Intersects(node_rect)) { |
| min_node = MinNodeContainsNodes(min_node, node); |
| } else { |
| node = NodeTraversal::NextSkippingChildren(*node, root_node); |
| continue; |
| } |
| } |
| } |
| node = NodeTraversal::Next(*node, root_node); |
| } |
| |
| return min_node; |
| } |
| |
| // This function appears to heuristically guess whether to include a background |
| // image in the smart clip. It seems to want to include sprites created from |
| // CSS background images but to skip actual backgrounds. |
| bool SmartClip::ShouldSkipBackgroundImage(Node* node) { |
| DCHECK(node); |
| // Apparently we're only interested in background images on spans and divs. |
| if (!IsA<HTMLSpanElement>(*node) && !IsA<HTMLDivElement>(*node)) |
| return true; |
| |
| // This check actually makes a bit of sense. If you're going to sprite an |
| // image out of a CSS background, you're probably going to specify a height |
| // or a width. On the other hand, if we've got a legit background image, |
| // it's very likely the height or the width will be set to auto. |
| LayoutObject* layout_object = node->GetLayoutObject(); |
| if (layout_object && (layout_object->StyleRef().LogicalHeight().IsAuto() || |
| layout_object->StyleRef().LogicalWidth().IsAuto())) |
| return true; |
| |
| return false; |
| } |
| |
| void SmartClip::CollectOverlappingChildNodes( |
| Node* parent_node, |
| const IntRect& crop_rect_in_viewport, |
| HeapVector<Member<Node>>& hit_nodes) { |
| if (!parent_node) |
| return; |
| IntRect resized_crop_rect = |
| ConvertToContentCoordinatesWithoutCollapsingToZero( |
| crop_rect_in_viewport, parent_node->GetDocument().View()); |
| for (Node* child = parent_node->firstChild(); child; |
| child = child->nextSibling()) { |
| IntRect child_rect = child->PixelSnappedBoundingBox(); |
| if (resized_crop_rect.Intersects(child_rect)) |
| hit_nodes.push_back(child); |
| } |
| } |
| |
| String SmartClip::ExtractTextFromNode(Node* node) { |
| // Science has proven that no text nodes are ever positioned at y == -99999. |
| int prev_y_pos = -99999; |
| |
| StringBuilder result; |
| for (Node& current_node : NodeTraversal::InclusiveDescendantsOf(*node)) { |
| const ComputedStyle* style = current_node.GetComputedStyle(); |
| if (!style || style->UserSelect() == EUserSelect::kNone) |
| continue; |
| |
| if (Node* node_from_frame = NodeInsideFrame(¤t_node)) |
| result.Append(ExtractTextFromNode(node_from_frame)); |
| |
| IntRect node_rect = current_node.PixelSnappedBoundingBox(); |
| if (current_node.GetLayoutObject() && !node_rect.IsEmpty()) { |
| if (current_node.IsTextNode()) { |
| String node_value = current_node.nodeValue(); |
| |
| // It's unclear why we disallowed solitary "\n" node values. |
| // Maybe we're trying to ignore <br> tags somehow? |
| if (node_value == "\n") |
| node_value = ""; |
| |
| if (node_rect.Y() != prev_y_pos) { |
| prev_y_pos = node_rect.Y(); |
| result.Append('\n'); |
| } |
| |
| result.Append(node_value); |
| } |
| } |
| } |
| |
| return result.ToString(); |
| } |
| |
| } // namespace blink |