Project import
diff --git a/simpleperf/Android.mk b/simpleperf/Android.mk
new file mode 100644
index 0000000..f804736
--- /dev/null
+++ b/simpleperf/Android.mk
@@ -0,0 +1,345 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+simpleperf_version :=  $(shell git -C $(LOCAL_PATH) rev-parse --short=12 HEAD 2>/dev/null)
+
+simpleperf_common_cppflags := -Wextra -Wunused -Wno-unknown-pragmas \
+                              -DSIMPLEPERF_REVISION='"$(simpleperf_version)"'
+
+simpleperf_cppflags_target := $(simpleperf_common_cppflags)
+
+simpleperf_cppflags_host := $(simpleperf_common_cppflags) \
+                            -DUSE_BIONIC_UAPI_HEADERS -I bionic/libc/kernel \
+
+simpleperf_cppflags_host_darwin := -I $(LOCAL_PATH)/nonlinux_support/include
+simpleperf_cppflags_host_windows := -I $(LOCAL_PATH)/nonlinux_support/include
+
+
+LLVM_ROOT_PATH := external/llvm
+include $(LLVM_ROOT_PATH)/llvm.mk
+
+simpleperf_static_libraries_target := \
+  libbacktrace_offline \
+  libbacktrace \
+  libunwind \
+  libziparchive \
+  libz \
+  libbase \
+  libcutils \
+  liblog \
+  libutils \
+  liblzma \
+  libLLVMObject \
+  libLLVMBitReader \
+  libLLVMMC \
+  libLLVMMCParser \
+  libLLVMCore \
+  libLLVMSupport \
+  libprotobuf-cpp-lite \
+  libevent \
+  libc \
+
+simpleperf_static_libraries_host := \
+  libziparchive-host \
+  libbase \
+  liblog \
+  liblzma \
+  libz \
+  libutils \
+  libLLVMObject \
+  libLLVMBitReader \
+  libLLVMMC \
+  libLLVMMCParser \
+  libLLVMCore \
+  libLLVMSupport \
+  libprotobuf-cpp-lite \
+
+simpleperf_static_libraries_host_linux := \
+  libbacktrace_offline \
+  libbacktrace \
+  libunwind \
+  libcutils \
+  libevent \
+
+simpleperf_ldlibs_host_linux := -lrt
+
+# libsimpleperf
+# =========================================================
+libsimpleperf_src_files := \
+  cmd_dumprecord.cpp \
+  cmd_help.cpp \
+  cmd_kmem.cpp \
+  cmd_report.cpp \
+  cmd_report_sample.cpp \
+  command.cpp \
+  dso.cpp \
+  event_attr.cpp \
+  event_type.cpp \
+  perf_regs.cpp \
+  read_apk.cpp \
+  read_elf.cpp \
+  record.cpp \
+  record_file_reader.cpp \
+  report_sample.proto \
+  thread_tree.cpp \
+  tracing.cpp \
+  utils.cpp \
+
+libsimpleperf_src_files_linux := \
+  cmd_list.cpp \
+  cmd_record.cpp \
+  cmd_stat.cpp \
+  dwarf_unwind.cpp \
+  environment.cpp \
+  event_fd.cpp \
+  event_selection_set.cpp \
+  IOEventLoop.cpp \
+  record_file_writer.cpp \
+  workload.cpp \
+
+libsimpleperf_src_files_darwin := \
+  nonlinux_support/nonlinux_support.cpp \
+
+libsimpleperf_src_files_windows := \
+  nonlinux_support/nonlinux_support.cpp \
+
+# libsimpleperf target
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_MODULE := libsimpleperf
+LOCAL_MODULE_TAGS := debug
+LOCAL_MODULE_PATH := $(TARGET_OUT_OPTIONAL_EXECUTABLES)
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_target)
+LOCAL_SRC_FILES := \
+  $(libsimpleperf_src_files) \
+  $(libsimpleperf_src_files_linux) \
+
+LOCAL_STATIC_LIBRARIES := $(simpleperf_static_libraries_target)
+LOCAL_MULTILIB := both
+LOCAL_PROTOC_OPTIMIZE_TYPE := lite-static
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# libsimpleperf host
+include $(CLEAR_VARS)
+#LOCAL_CLANG := true  # Comment it to build on windows.
+LOCAL_MODULE := libsimpleperf
+LOCAL_MODULE_HOST_OS := darwin linux windows
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_host)
+LOCAL_CPPFLAGS_darwin := $(simpleperf_cppflags_host_darwin)
+LOCAL_CPPFLAGS_linux := $(simpleperf_cppflags_host_linux)
+LOCAL_CPPFLAGS_windows := $(simpleperf_cppflags_host_windows)
+LOCAL_SRC_FILES := $(libsimpleperf_src_files)
+LOCAL_SRC_FILES_darwin := $(libsimpleperf_src_files_darwin)
+LOCAL_SRC_FILES_linux := $(libsimpleperf_src_files_linux)
+LOCAL_SRC_FILES_windows := $(libsimpleperf_src_files_windows)
+LOCAL_STATIC_LIBRARIES := $(simpleperf_static_libraries_host)
+LOCAL_STATIC_LIBRARIES_linux := $(simpleperf_static_libraries_host_linux)
+LOCAL_LDLIBS_linux := $(simpleperf_ldlibs_host_linux)
+LOCAL_MULTILIB := first
+LOCAL_PROTOC_OPTIMIZE_TYPE := lite-static
+LOCAL_CXX_STL := libc++_static
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+
+# simpleperf
+# =========================================================
+
+# simpleperf target
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_MODULE := simpleperf
+LOCAL_MODULE_TAGS := debug
+LOCAL_MODULE_PATH := $(TARGET_OUT_OPTIONAL_EXECUTABLES)
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_target)
+LOCAL_SRC_FILES := main.cpp
+LOCAL_STATIC_LIBRARIES := libsimpleperf $(simpleperf_static_libraries_target)
+ifdef TARGET_2ND_ARCH
+LOCAL_MULTILIB := both
+LOCAL_MODULE_STEM_32 := simpleperf32
+LOCAL_MODULE_STEM_64 := simpleperf
+endif
+LOCAL_FORCE_STATIC_EXECUTABLE := true
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_EXECUTABLE)
+
+$(call dist-for-goals,sdk,$(ALL_MODULES.simpleperf.BUILT))
+ifdef TARGET_2ND_ARCH
+$(call dist-for-goals,sdk,$(ALL_MODULES.simpleperf$(TARGET_2ND_ARCH_MODULE_SUFFIX).BUILT))
+endif
+
+# simpleperf host
+include $(CLEAR_VARS)
+LOCAL_MODULE := simpleperf
+LOCAL_MODULE_HOST_OS := darwin linux windows
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_host)
+LOCAL_CPPFLAGS_darwin := $(simpleperf_cppflags_host_darwin)
+LOCAL_CPPFLAGS_linux := $(simpleperf_cppflags_host_linux)
+LOCAL_CPPFLAGS_windows := $(simpleperf_cppflags_host_windows)
+LOCAL_SRC_FILES := main.cpp
+LOCAL_STATIC_LIBRARIES := libsimpleperf $(simpleperf_static_libraries_host)
+LOCAL_STATIC_LIBRARIES_linux := $(simpleperf_static_libraries_host_linux)
+LOCAL_LDLIBS_linux := $(simpleperf_ldlibs_host_linux)
+LOCAL_MULTILIB := first
+LOCAL_CXX_STL := libc++_static
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+$(call dist-for-goals,sdk,$(LOCAL_BUILT_MODULE):simpleperf_host)
+$(call dist-for-goals,win_sdk,$(ALL_MODULES.host_cross_simpleperf.BUILT))
+
+# simpleperf report script
+include $(CLEAR_VARS)
+LOCAL_PREBUILT_EXECUTABLES := simpleperf_report.py
+include $(BUILD_HOST_PREBUILT)
+
+$(call dist-for-goals,sdk,$(ALL_MODULES.simpleperf_report.BUILT))
+
+# simpleperf_unit_test
+# =========================================================
+simpleperf_unit_test_src_files := \
+  cmd_kmem_test.cpp \
+  cmd_report_test.cpp \
+  cmd_report_sample_test.cpp \
+  command_test.cpp \
+  gtest_main.cpp \
+  read_apk_test.cpp \
+  read_elf_test.cpp \
+  record_test.cpp \
+  sample_tree_test.cpp \
+  utils_test.cpp \
+
+simpleperf_unit_test_src_files_linux := \
+  cmd_dumprecord_test.cpp \
+  cmd_list_test.cpp \
+  cmd_record_test.cpp \
+  cmd_stat_test.cpp \
+  environment_test.cpp \
+  IOEventLoop_test.cpp \
+  record_file_test.cpp \
+  workload_test.cpp \
+
+# simpleperf_unit_test target
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_MODULE := simpleperf_unit_test
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_target)
+LOCAL_SRC_FILES := \
+  $(simpleperf_unit_test_src_files) \
+  $(simpleperf_unit_test_src_files_linux) \
+
+LOCAL_STATIC_LIBRARIES += libsimpleperf $(simpleperf_static_libraries_target)
+LOCAL_POST_LINK_CMD = \
+  TMP_FILE=`mktemp $(OUT_DIR)/simpleperf-post-link-XXXXXXXXXX` && \
+  (cd $(LOCAL_PATH)/testdata && zip - -0 -r .) > $$TMP_FILE && \
+  $($(LOCAL_2ND_ARCH_VAR_PREFIX)TARGET_OBJCOPY) --add-section .testzipdata=$$TMP_FILE $(linked_module) && \
+  rm -f $$TMP_FILE
+
+LOCAL_MULTILIB := first
+LOCAL_FORCE_STATIC_EXECUTABLE := true
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_NATIVE_TEST)
+
+# simpleperf_unit_test host
+include $(CLEAR_VARS)
+LOCAL_MODULE := simpleperf_unit_test
+LOCAL_MODULE_HOST_OS := darwin linux windows
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_host)
+LOCAL_CPPFLAGS_darwin := $(simpleperf_cppflags_host_darwin)
+LOCAL_CPPFLAGS_linux := $(simpleperf_cppflags_host_linux)
+LOCAL_CPPFLAGS_windows := $(simpleperf_cppflags_host_windows)
+LOCAL_SRC_FILES := $(simpleperf_unit_test_src_files)
+LOCAL_SRC_FILES_linux := $(simpleperf_unit_test_src_files_linux)
+LOCAL_STATIC_LIBRARIES := libsimpleperf $(simpleperf_static_libraries_host)
+LOCAL_STATIC_LIBRARIES_linux := $(simpleperf_static_libraries_host_linux)
+LOCAL_LDLIBS_linux := $(simpleperf_ldlibs_host_linux)
+LOCAL_MULTILIB := first
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_NATIVE_TEST)
+
+
+# simpleperf_cpu_hotplug_test
+# =========================================================
+simpleperf_cpu_hotplug_test_src_files := \
+  cpu_hotplug_test.cpp \
+
+# simpleperf_cpu_hotplug_test target
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_MODULE := simpleperf_cpu_hotplug_test
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_target)
+LOCAL_SRC_FILES := $(simpleperf_cpu_hotplug_test_src_files)
+LOCAL_STATIC_LIBRARIES := libsimpleperf $(simpleperf_static_libraries_target)
+LOCAL_MULTILIB := both
+LOCAL_FORCE_STATIC_EXECUTABLE := true
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_NATIVE_TEST)
+
+# simpleperf_cpu_hotplug_test linux host
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_MODULE := simpleperf_cpu_hotplug_test
+LOCAL_MODULE_HOST_OS := linux
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_host)
+LOCAL_CPPFLAGS_linux := $(simpleperf_cppflags_host_linux)
+LOCAL_SRC_FILES := $(simpleperf_cpu_hotplug_test_src_files)
+LOCAL_STATIC_LIBRARIES := libsimpleperf $(simpleperf_static_libraries_host)
+LOCAL_STATIC_LIBRARIES_linux := $(simpleperf_static_libraries_host_linux)
+LOCAL_LDLIBS_linux := $(simpleperf_ldlibs_host_linux)
+LOCAL_MULTILIB := first
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_NATIVE_TEST)
+
+
+# libsimpleperf_cts_test
+# =========================================================
+libsimpleperf_cts_test_src_files := \
+  $(libsimpleperf_src_files) \
+  $(libsimpleperf_src_files_linux) \
+  $(simpleperf_unit_test_src_files) \
+  $(simpleperf_unit_test_src_files_linux) \
+
+# libsimpleperf_cts_test target
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_MODULE := libsimpleperf_cts_test
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_target)
+LOCAL_SRC_FILES := $(libsimpleperf_cts_test_src_files)
+LOCAL_STATIC_LIBRARIES := $(simpleperf_static_libraries_target)
+LOCAL_MULTILIB := both
+LOCAL_FORCE_STATIC_EXECUTABLE := true
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_TEST_LIBRARY)
+
+# libsimpleperf_cts_test linux host
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_MODULE := libsimpleperf_cts_test
+LOCAL_MODULE_HOST_OS := linux
+LOCAL_CPPFLAGS := $(simpleperf_cppflags_host)
+LOCAL_CPPFLAGS_linux := $(simpleperf_cppflags_host_linux)
+LOCAL_SRC_FILES := $(libsimpleperf_cts_test_src_files)
+LOCAL_STATIC_LIBRARIES := $(simpleperf_static_libraries_host)
+LOCAL_STATIC_LIBRARIES_linux := $(simpleperf_static_libraries_host_linux)
+LOCAL_LDLIBS_linux := $(simpleperf_ldlibs_host_linux)
+LOCAL_MULTILIB := both
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_TEST_LIBRARY)
+
+include $(call first-makefiles-under,$(LOCAL_PATH))
diff --git a/simpleperf/IOEventLoop.cpp b/simpleperf/IOEventLoop.cpp
new file mode 100644
index 0000000..e583d73
--- /dev/null
+++ b/simpleperf/IOEventLoop.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOEventLoop.h"
+
+#include <event2/event.h>
+#include <fcntl.h>
+
+#include <android-base/logging.h>
+
+struct IOEvent {
+  IOEventLoop* loop;
+  event* e;
+  std::function<bool()> callback;
+
+  IOEvent(IOEventLoop* loop, const std::function<bool()>& callback)
+      : loop(loop), e(nullptr), callback(callback) {}
+
+  ~IOEvent() {
+    if (e != nullptr) {
+      event_free(e);
+    }
+  }
+};
+
+IOEventLoop::IOEventLoop() : ebase_(nullptr), has_error_(false) {}
+
+IOEventLoop::~IOEventLoop() {
+  if (ebase_ != nullptr) {
+    event_base_free(ebase_);
+  }
+}
+
+bool IOEventLoop::EnsureInit() {
+  if (ebase_ == nullptr) {
+    ebase_ = event_base_new();
+    if (ebase_ == nullptr) {
+      LOG(ERROR) << "failed to call event_base_new()";
+      return false;
+    }
+  }
+  return true;
+}
+
+void IOEventLoop::EventCallbackFn(int, short, void* arg) {
+  IOEvent* e = static_cast<IOEvent*>(arg);
+  if (!e->callback()) {
+    e->loop->has_error_ = true;
+    e->loop->ExitLoop();
+  }
+}
+
+static bool MakeFdNonBlocking(int fd) {
+  int flags = fcntl(fd, F_GETFL, 0);
+  if (flags == -1 || fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
+    PLOG(ERROR) << "fcntl() failed";
+    return false;
+  }
+  return true;
+}
+
+bool IOEventLoop::AddReadEvent(int fd, const std::function<bool()>& callback) {
+  return MakeFdNonBlocking(fd) &&
+         AddEvent(fd, EV_READ | EV_PERSIST, nullptr, callback);
+}
+
+bool IOEventLoop::AddSignalEvent(int sig,
+                                 const std::function<bool()>& callback) {
+  return AddEvent(sig, EV_SIGNAL | EV_PERSIST, nullptr, callback);
+}
+
+bool IOEventLoop::AddSignalEvents(std::vector<int> sigs,
+                                  const std::function<bool()>& callback) {
+  for (auto sig : sigs) {
+    if (!AddSignalEvent(sig, callback)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool IOEventLoop::AddPeriodicEvent(timeval duration,
+                                   const std::function<bool()>& callback) {
+  return AddEvent(-1, EV_PERSIST, &duration, callback);
+}
+
+bool IOEventLoop::AddEvent(int fd_or_sig, short events, timeval* timeout,
+                           const std::function<bool()>& callback) {
+  if (!EnsureInit()) {
+    return false;
+  }
+  std::unique_ptr<IOEvent> e(new IOEvent(this, callback));
+  e->e = event_new(ebase_, fd_or_sig, events, EventCallbackFn, e.get());
+  if (e->e == nullptr) {
+    LOG(ERROR) << "event_new() failed";
+    return false;
+  }
+  if (event_add(e->e, timeout) != 0) {
+    LOG(ERROR) << "event_add() failed";
+    return false;
+  }
+  events_.push_back(std::move(e));
+  return true;
+}
+
+bool IOEventLoop::RunLoop() {
+  if (event_base_dispatch(ebase_) == -1) {
+    LOG(ERROR) << "event_base_dispatch() failed";
+    return false;
+  }
+  if (has_error_) {
+    return false;
+  }
+  return true;
+}
+
+bool IOEventLoop::ExitLoop() {
+  if (event_base_loopbreak(ebase_) == -1) {
+    LOG(ERROR) << "event_base_loopbreak() failed";
+    return false;
+  }
+  return true;
+}
diff --git a/simpleperf/IOEventLoop.h b/simpleperf/IOEventLoop.h
new file mode 100644
index 0000000..4ae906a
--- /dev/null
+++ b/simpleperf/IOEventLoop.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_IOEVENT_LOOP_H_
+#define SIMPLE_PERF_IOEVENT_LOOP_H_
+
+#include <time.h>
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+struct IOEvent;
+struct event_base;
+
+// IOEventLoop is a class wrapper of libevent, it monitors events happened,
+// and calls the corresponding callbacks. Possible events are: file ready to
+// read, file ready to write, signal happens, periodic timer timeout.
+class IOEventLoop {
+ public:
+  IOEventLoop();
+  ~IOEventLoop();
+
+  // Register a read Event, so [callback] is called when [fd] can be read
+  // without blocking.
+  bool AddReadEvent(int fd, const std::function<bool()>& callback);
+
+  // Register a signal Event, so [callback] is called each time signal [sig]
+  // happens.
+  bool AddSignalEvent(int sig, const std::function<bool()>& callback);
+
+  // Register a vector of signal Events.
+  bool AddSignalEvents(std::vector<int> sigs,
+                       const std::function<bool()>& callback);
+
+  // Register a periodic Event, so [callback] is called periodically every
+  // [duration].
+  bool AddPeriodicEvent(timeval duration,
+                        const std::function<bool()>& callback);
+
+  // Run a loop polling for Events. It only exits when ExitLoop() is called
+  // in a callback function of registered Events.
+  bool RunLoop();
+
+  // Exit the loop started by RunLoop().
+  bool ExitLoop();
+
+ private:
+  bool EnsureInit();
+  bool AddEvent(int fd_or_sig, short events, timeval* timeout,
+                const std::function<bool()>& callback);
+  static void EventCallbackFn(int, short, void*);
+
+  event_base* ebase_;
+  std::vector<std::unique_ptr<IOEvent>> events_;
+  bool has_error_;
+};
+
+#endif  // SIMPLE_PERF_IOEVENT_LOOP_H_
diff --git a/simpleperf/IOEventLoop_test.cpp b/simpleperf/IOEventLoop_test.cpp
new file mode 100644
index 0000000..2957ce0
--- /dev/null
+++ b/simpleperf/IOEventLoop_test.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOEventLoop.h"
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <thread>
+
+TEST(IOEventLoop, read) {
+  int fd[2];
+  ASSERT_EQ(0, pipe(fd));
+  IOEventLoop loop;
+  static int count;
+  static int retry_count;
+  count = 0;
+  retry_count = 0;
+  ASSERT_TRUE(loop.AddReadEvent(fd[0], [&]() {
+    while (true) {
+      char c;
+      int ret = read(fd[0], &c, 1);
+      if (ret == 1) {
+        if (++count == 100) {
+          return loop.ExitLoop();
+        }
+      } else if (ret == -1 && errno == EAGAIN) {
+        retry_count++;
+        break;
+      } else {
+        return false;
+      }
+    }
+    return true;
+  }));
+  std::thread thread([&]() {
+    for (int i = 0; i < 100; ++i) {
+      usleep(1000);
+      char c;
+      write(fd[1], &c, 1);
+    }
+  });
+  ASSERT_TRUE(loop.RunLoop());
+  thread.join();
+  ASSERT_EQ(100, count);
+  // Test retry_count to make sure we are not doing blocking read.
+  ASSERT_GT(retry_count, 0);
+  close(fd[0]);
+  close(fd[1]);
+}
+
+TEST(IOEventLoop, signal) {
+  IOEventLoop loop;
+  static int count;
+  count = 0;
+  ASSERT_TRUE(loop.AddSignalEvent(SIGINT, [&]() {
+    if (++count == 100) {
+      loop.ExitLoop();
+    }
+    return true;
+  }));
+  std::thread thread([]() {
+    for (int i = 0; i < 100; ++i) {
+      usleep(1000);
+      kill(getpid(), SIGINT);
+    }
+  });
+  ASSERT_TRUE(loop.RunLoop());
+  thread.join();
+  ASSERT_EQ(100, count);
+}
+
+TEST(IOEventLoop, periodic) {
+  timeval tv;
+  tv.tv_sec = 0;
+  tv.tv_usec = 1000;
+  static int count;
+  count = 0;
+  IOEventLoop loop;
+  ASSERT_TRUE(loop.AddPeriodicEvent(tv, [&]() {
+    if (++count == 100) {
+      loop.ExitLoop();
+    }
+    return true;
+  }));
+  auto start_time = std::chrono::steady_clock::now();
+  ASSERT_TRUE(loop.RunLoop());
+  auto end_time = std::chrono::steady_clock::now();
+  ASSERT_EQ(100, count);
+  double time_used = std::chrono::duration_cast<std::chrono::duration<double>>(
+                         end_time - start_time)
+                         .count();
+  // time_used is 0.1 if running precisely, and we accept small errors by using
+  // a range [0.1, 0.15).
+  ASSERT_GE(time_used, 0.1);
+  ASSERT_LT(time_used, 0.15);
+}
diff --git a/simpleperf/NOTICE b/simpleperf/NOTICE
new file mode 100644
index 0000000..8530865
--- /dev/null
+++ b/simpleperf/NOTICE
@@ -0,0 +1,190 @@
+
+   Copyright (c) 2015, The Android Open Source Project
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
diff --git a/simpleperf/SampleComparator.h b/simpleperf/SampleComparator.h
new file mode 100644
index 0000000..9eefeb4
--- /dev/null
+++ b/simpleperf/SampleComparator.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_SAMPLE_COMPARATOR_H_
+#define SIMPLE_PERF_SAMPLE_COMPARATOR_H_
+
+#include <string.h>
+
+#include <vector>
+
+// The compare functions below are used to compare two samples by their item
+// content.
+
+template <typename T>
+int Compare(const T& a, const T& b) {
+  if (a != b) {
+    return a < b ? -1 : 1;
+  }
+  return 0;
+}
+
+#define BUILD_COMPARE_VALUE_FUNCTION(function_name, compare_part)   \
+  template <typename EntryT>                                        \
+  int function_name(const EntryT* sample1, const EntryT* sample2) { \
+    return Compare(sample1->compare_part, sample2->compare_part);   \
+  }
+
+#define BUILD_COMPARE_VALUE_FUNCTION_REVERSE(function_name, compare_part) \
+  template <typename EntryT>                                              \
+  int function_name(const EntryT* sample1, const EntryT* sample2) {       \
+    return Compare(sample2->compare_part, sample1->compare_part);         \
+  }
+
+#define BUILD_COMPARE_STRING_FUNCTION(function_name, compare_part)  \
+  template <typename EntryT>                                        \
+  int function_name(const EntryT* sample1, const EntryT* sample2) { \
+    return strcmp(sample1->compare_part, sample2->compare_part);    \
+  }
+
+BUILD_COMPARE_VALUE_FUNCTION(ComparePid, thread->pid);
+BUILD_COMPARE_VALUE_FUNCTION(CompareTid, thread->tid);
+BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareSampleCount, sample_count);
+BUILD_COMPARE_STRING_FUNCTION(CompareComm, thread_comm);
+BUILD_COMPARE_STRING_FUNCTION(CompareDso, map->dso->Path().c_str());
+BUILD_COMPARE_STRING_FUNCTION(CompareSymbol, symbol->DemangledName());
+BUILD_COMPARE_STRING_FUNCTION(CompareDsoFrom,
+                              branch_from.map->dso->Path().c_str());
+BUILD_COMPARE_STRING_FUNCTION(CompareSymbolFrom,
+                              branch_from.symbol->DemangledName());
+
+template <typename EntryT>
+int CompareTotalPeriod(const EntryT* sample1, const EntryT* sample2) {
+  uint64_t period1 = sample1->period + sample1->accumulated_period;
+  uint64_t period2 = sample2->period + sample2->accumulated_period;
+  return Compare(period2, period1);
+}
+
+// SampleComparator is a class using a collection of compare functions to
+// compare two samples.
+
+template <typename EntryT>
+class SampleComparator {
+ public:
+  typedef int (*compare_sample_func_t)(const EntryT*, const EntryT*);
+
+  void AddCompareFunction(compare_sample_func_t func) {
+    compare_v_.push_back(func);
+  }
+
+  void AddComparator(const SampleComparator<EntryT>& other) {
+    compare_v_.insert(compare_v_.end(), other.compare_v_.begin(),
+                      other.compare_v_.end());
+  }
+
+  bool operator()(const EntryT* sample1, const EntryT* sample2) const {
+    for (const auto& func : compare_v_) {
+      int ret = func(sample1, sample2);
+      if (ret != 0) {
+        return ret < 0;
+      }
+    }
+    return false;
+  }
+
+  bool IsSameSample(const EntryT* sample1, const EntryT* sample2) const {
+    for (const auto& func : compare_v_) {
+      if (func(sample1, sample2) != 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool empty() const { return compare_v_.empty(); }
+
+ private:
+  std::vector<compare_sample_func_t> compare_v_;
+};
+
+#endif  // SIMPLE_PERF_SAMPLE_COMPARATOR_H_
diff --git a/simpleperf/SampleDisplayer.h b/simpleperf/SampleDisplayer.h
new file mode 100644
index 0000000..606f639
--- /dev/null
+++ b/simpleperf/SampleDisplayer.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_SAMPLE_DISPLAYER_H_
+#define SIMPLE_PERF_SAMPLE_DISPLAYER_H_
+
+#include <inttypes.h>
+
+#include <functional>
+#include <string>
+
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+
+// The display functions below are used to show items in a sample.
+
+template <typename EntryT, typename InfoT>
+std::string DisplayAccumulatedOverhead(const EntryT* sample,
+                                       const InfoT* info) {
+  uint64_t period = sample->period + sample->accumulated_period;
+  uint64_t total_period = info->total_period;
+  double percentage = (total_period != 0) ? 100.0 * period / total_period : 0.0;
+  return android::base::StringPrintf("%.2f%%", percentage);
+}
+
+template <typename EntryT, typename InfoT>
+std::string DisplaySelfOverhead(const EntryT* sample, const InfoT* info) {
+  uint64_t period = sample->period;
+  uint64_t total_period = info->total_period;
+  double percentage = (total_period != 0) ? 100.0 * period / total_period : 0.0;
+  return android::base::StringPrintf("%.2f%%", percentage);
+}
+
+#define BUILD_DISPLAY_UINT64_FUNCTION(function_name, display_part)        \
+  template <typename EntryT>                                              \
+  std::string function_name(const EntryT* sample) {                       \
+    return android::base::StringPrintf("%" PRIu64, sample->display_part); \
+  }
+
+#define BUILD_DISPLAY_HEX64_FUNCTION(function_name, display_part)           \
+  template <typename EntryT>                                                \
+  std::string function_name(const EntryT* sample) {                         \
+    return android::base::StringPrintf("0x%" PRIx64, sample->display_part); \
+  }
+
+BUILD_DISPLAY_UINT64_FUNCTION(DisplaySampleCount, sample_count);
+
+template <typename EntryT>
+std::string DisplayPid(const EntryT* sample) {
+  return android::base::StringPrintf("%d", sample->thread->pid);
+}
+
+template <typename EntryT>
+std::string DisplayTid(const EntryT* sample) {
+  return android::base::StringPrintf("%d", sample->thread->tid);
+}
+
+template <typename EntryT>
+std::string DisplayComm(const EntryT* sample) {
+  return sample->thread_comm;
+}
+
+template <typename EntryT>
+std::string DisplayDso(const EntryT* sample) {
+  return sample->map->dso->Path();
+}
+
+template <typename EntryT>
+std::string DisplaySymbol(const EntryT* sample) {
+  return sample->symbol->DemangledName();
+}
+
+template <typename EntryT>
+std::string DisplayDsoFrom(const EntryT* sample) {
+  return sample->branch_from.map->dso->Path();
+}
+
+template <typename EntryT>
+std::string DisplaySymbolFrom(const EntryT* sample) {
+  return sample->branch_from.symbol->DemangledName();
+}
+
+template <typename SampleT, typename CallChainNodeT>
+class CallgraphDisplayer {
+ public:
+  virtual ~CallgraphDisplayer() {}
+
+  void operator()(FILE* fp, const SampleT* sample) {
+    std::string prefix = "       ";
+    fprintf(fp, "%s|\n", prefix.c_str());
+    fprintf(fp, "%s-- %s\n", prefix.c_str(), PrintSampleName(sample).c_str());
+    prefix.append(3, ' ');
+    for (size_t i = 0; i < sample->callchain.children.size(); ++i) {
+      DisplayCallGraphEntry(fp, 1, prefix, sample->callchain.children[i],
+                            sample->callchain.children_period,
+                            (i + 1 == sample->callchain.children.size()));
+    }
+  }
+
+  void DisplayCallGraphEntry(FILE* fp, size_t depth, std::string prefix,
+                             const std::unique_ptr<CallChainNodeT>& node,
+                             uint64_t parent_period, bool last) {
+    if (depth > 20) {
+      LOG(WARNING) << "truncated callgraph at depth " << depth;
+      return;
+    }
+    prefix += "|";
+    fprintf(fp, "%s\n", prefix.c_str());
+    if (last) {
+      prefix.back() = ' ';
+    }
+    std::string percentage_s = "-- ";
+    if (node->period + node->children_period != parent_period) {
+      double percentage =
+          100.0 * (node->period + node->children_period) / parent_period;
+      percentage_s = android::base::StringPrintf("--%.2f%%-- ", percentage);
+    }
+    fprintf(fp, "%s%s%s\n", prefix.c_str(), percentage_s.c_str(),
+            PrintSampleName(node->chain[0]).c_str());
+    prefix.append(percentage_s.size(), ' ');
+    for (size_t i = 1; i < node->chain.size(); ++i) {
+      fprintf(fp, "%s%s\n", prefix.c_str(),
+              PrintSampleName(node->chain[i]).c_str());
+    }
+    for (size_t i = 0; i < node->children.size(); ++i) {
+      DisplayCallGraphEntry(fp, depth + 1, prefix, node->children[i],
+                            node->children_period,
+                            (i + 1 == node->children.size()));
+    }
+  }
+
+ protected:
+  virtual std::string PrintSampleName(const SampleT* sample) {
+    return sample->symbol->DemangledName();
+  }
+};
+
+// SampleDisplayer is a class using a collections of display functions to show a
+// sample.
+
+template <typename EntryT, typename InfoT>
+class SampleDisplayer {
+ public:
+  typedef std::string (*display_sample_func_t)(const EntryT*);
+  typedef std::string (*display_sample_with_info_func_t)(const EntryT*,
+                                                         const InfoT*);
+  using exclusive_display_sample_func_t =
+      std::function<void(FILE*, const EntryT*)>;
+
+ private:
+  struct Item {
+    std::string name;
+    size_t width;
+    display_sample_func_t func;
+    display_sample_with_info_func_t func_with_info;
+  };
+
+ public:
+  void SetInfo(const InfoT* info) { info_ = info; }
+
+  void AddDisplayFunction(const std::string& name, display_sample_func_t func) {
+    Item item;
+    item.name = name;
+    item.width = name.size();
+    item.func = func;
+    item.func_with_info = nullptr;
+    display_v_.push_back(item);
+  }
+
+  void AddDisplayFunction(const std::string& name,
+                          display_sample_with_info_func_t func_with_info) {
+    Item item;
+    item.name = name;
+    item.width = name.size();
+    item.func = nullptr;
+    item.func_with_info = func_with_info;
+    display_v_.push_back(item);
+  }
+
+  void AddExclusiveDisplayFunction(exclusive_display_sample_func_t func) {
+    exclusive_display_v_.push_back(func);
+  }
+
+  void AdjustWidth(const EntryT* sample) {
+    for (auto& item : display_v_) {
+      std::string data = (item.func != nullptr)
+                             ? item.func(sample)
+                             : item.func_with_info(sample, info_);
+      item.width = std::max(item.width, data.size());
+    }
+  }
+
+  void PrintNames(FILE* fp) {
+    for (size_t i = 0; i < display_v_.size(); ++i) {
+      auto& item = display_v_[i];
+      if (i != display_v_.size() - 1) {
+        fprintf(fp, "%-*s  ", static_cast<int>(item.width), item.name.c_str());
+      } else {
+        fprintf(fp, "%s\n", item.name.c_str());
+      }
+    }
+  }
+
+  void PrintSample(FILE* fp, const EntryT* sample) {
+    for (size_t i = 0; i < display_v_.size(); ++i) {
+      auto& item = display_v_[i];
+      std::string data = (item.func != nullptr)
+                             ? item.func(sample)
+                             : item.func_with_info(sample, info_);
+      if (i != display_v_.size() - 1) {
+        fprintf(fp, "%-*s  ", static_cast<int>(item.width), data.c_str());
+      } else {
+        fprintf(fp, "%s\n", data.c_str());
+      }
+    }
+    for (auto& func : exclusive_display_v_) {
+      func(fp, sample);
+    }
+  }
+
+ private:
+  const InfoT* info_;
+  std::vector<Item> display_v_;
+  std::vector<exclusive_display_sample_func_t> exclusive_display_v_;
+};
+
+#endif  // SIMPLE_PERF_SAMPLE_DISPLAYER_H_
diff --git a/simpleperf/build_id.h b/simpleperf/build_id.h
new file mode 100644
index 0000000..9f360bd
--- /dev/null
+++ b/simpleperf/build_id.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_BUILD_ID_H_
+#define SIMPLE_PERF_BUILD_ID_H_
+
+#include <string.h>
+#include <algorithm>
+#include <android-base/stringprintf.h>
+
+constexpr size_t BUILD_ID_SIZE = 20;
+
+// Shared libraries can have a section called .note.gnu.build-id, containing
+// a ~20 bytes unique id. Build id is used to compare if two shared libraries
+// are actually the same. BuildId class is the representation of build id in
+// memory.
+class BuildId {
+ public:
+  static size_t Size() {
+    return BUILD_ID_SIZE;
+  }
+
+  BuildId() {
+    memset(data_, '\0', BUILD_ID_SIZE);
+  }
+
+  // Copy build id from a byte array, like {0x76, 0x00, 0x32,...}.
+  BuildId(const void* data, size_t len) : BuildId() {
+    memcpy(data_, data, std::min(len, BUILD_ID_SIZE));
+  }
+
+  // Read build id from a hex string, like "7600329e31058e12b145d153ef27cd40e1a5f7b9".
+  explicit BuildId(const std::string& s) : BuildId() {
+    for (size_t i = 0; i < s.size() && i < BUILD_ID_SIZE * 2; i += 2) {
+      unsigned char ch = 0;
+      for (size_t j = i; j < i + 2; ++j) {
+        ch <<= 4;
+        if (s[j] >= '0' && s[j] <= '9') {
+          ch |= s[j] - '0';
+        } else if (s[j] >= 'a' && s[j] <= 'f') {
+          ch |= s[j] - 'a' + 10;
+        } else if (s[j] >= 'A' && s[j] <= 'F') {
+          ch |= s[j] - 'A' + 10;
+        }
+      }
+      data_[i / 2] = ch;
+    }
+  }
+
+  const unsigned char* Data() const {
+    return data_;
+  }
+
+  std::string ToString() const {
+    std::string s = "0x";
+    for (size_t i = 0; i < BUILD_ID_SIZE; ++i) {
+      s += android::base::StringPrintf("%02x", data_[i]);
+    }
+    return s;
+  }
+
+  bool operator==(const BuildId& build_id) const {
+    return memcmp(data_, build_id.data_, BUILD_ID_SIZE) == 0;
+  }
+
+  bool operator!=(const BuildId& build_id) const {
+    return !(*this == build_id);
+  }
+
+  bool IsEmpty() const {
+    static BuildId empty_build_id;
+    return *this == empty_build_id;
+  }
+
+ private:
+  unsigned char data_[BUILD_ID_SIZE];
+};
+
+#endif  // SIMPLE_PERF_BUILD_ID_H_
diff --git a/simpleperf/callchain.h b/simpleperf/callchain.h
new file mode 100644
index 0000000..2267fec
--- /dev/null
+++ b/simpleperf/callchain.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_CALLCHAIN_H_
+#define SIMPLE_PERF_CALLCHAIN_H_
+
+#include <string.h>
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <queue>
+#include <vector>
+
+#include <android-base/logging.h>
+
+template <typename EntryT>
+struct CallChainNode {
+  uint64_t period;
+  uint64_t children_period;
+  std::vector<EntryT*> chain;
+  std::vector<std::unique_ptr<CallChainNode>> children;
+};
+
+template <typename EntryT>
+struct CallChainRoot {
+  typedef CallChainNode<EntryT> NodeT;
+  uint64_t children_period;
+  std::vector<std::unique_ptr<NodeT>> children;
+
+  CallChainRoot() : children_period(0) {}
+
+  void AddCallChain(
+      const std::vector<EntryT*>& callchain, uint64_t period,
+      std::function<bool(const EntryT*, const EntryT*)> is_same_sample) {
+    children_period += period;
+    NodeT* p = FindMatchingNode(children, callchain[0], is_same_sample);
+    if (p == nullptr) {
+      std::unique_ptr<NodeT> new_node = AllocateNode(callchain, 0, period, 0);
+      children.push_back(std::move(new_node));
+      return;
+    }
+    size_t callchain_pos = 0;
+    while (true) {
+      size_t match_length =
+          GetMatchingLengthInNode(p, callchain, callchain_pos, is_same_sample);
+      CHECK_GT(match_length, 0u);
+      callchain_pos += match_length;
+      bool find_child = true;
+      if (match_length < p->chain.size()) {
+        SplitNode(p, match_length);
+        find_child = false;  // No need to find matching node in p->children.
+      }
+      if (callchain_pos == callchain.size()) {
+        p->period += period;
+        return;
+      }
+      p->children_period += period;
+      if (find_child) {
+        NodeT* np = FindMatchingNode(p->children, callchain[callchain_pos],
+                                     is_same_sample);
+        if (np != nullptr) {
+          p = np;
+          continue;
+        }
+      }
+      std::unique_ptr<NodeT> new_node =
+          AllocateNode(callchain, callchain_pos, period, 0);
+      p->children.push_back(std::move(new_node));
+      break;
+    }
+  }
+
+  void SortByPeriod() {
+    std::queue<std::vector<std::unique_ptr<NodeT>>*> queue;
+    queue.push(&children);
+    while (!queue.empty()) {
+      std::vector<std::unique_ptr<NodeT>>* v = queue.front();
+      queue.pop();
+      std::sort(v->begin(), v->end(), CallChainRoot::CompareNodeByPeriod);
+      for (auto& node : *v) {
+        if (!node->children.empty()) {
+          queue.push(&node->children);
+        }
+      }
+    }
+  }
+
+ private:
+  NodeT* FindMatchingNode(
+      const std::vector<std::unique_ptr<NodeT>>& nodes, const EntryT* sample,
+      std::function<bool(const EntryT*, const EntryT*)> is_same_sample) {
+    for (auto& node : nodes) {
+      if (is_same_sample(node->chain.front(), sample)) {
+        return node.get();
+      }
+    }
+    return nullptr;
+  }
+
+  size_t GetMatchingLengthInNode(
+      NodeT* node, const std::vector<EntryT*>& chain, size_t chain_start,
+      std::function<bool(const EntryT*, const EntryT*)> is_same_sample) {
+    size_t i, j;
+    for (i = 0, j = chain_start; i < node->chain.size() && j < chain.size();
+         ++i, ++j) {
+      if (!is_same_sample(node->chain[i], chain[j])) {
+        break;
+      }
+    }
+    return i;
+  }
+
+  void SplitNode(NodeT* parent, size_t parent_length) {
+    std::unique_ptr<NodeT> child = AllocateNode(
+        parent->chain, parent_length, parent->period, parent->children_period);
+    child->children = std::move(parent->children);
+    parent->period = 0;
+    parent->children_period = child->period + child->children_period;
+    parent->chain.resize(parent_length);
+    parent->children.clear();
+    parent->children.push_back(std::move(child));
+  }
+
+  std::unique_ptr<NodeT> AllocateNode(const std::vector<EntryT*>& chain,
+                                      size_t chain_start, uint64_t period,
+                                      uint64_t children_period) {
+    std::unique_ptr<NodeT> node(new NodeT);
+    for (size_t i = chain_start; i < chain.size(); ++i) {
+      node->chain.push_back(chain[i]);
+    }
+    node->period = period;
+    node->children_period = children_period;
+    return node;
+  }
+
+  static bool CompareNodeByPeriod(const std::unique_ptr<NodeT>& n1,
+                                  const std::unique_ptr<NodeT>& n2) {
+    uint64_t period1 = n1->period + n1->children_period;
+    uint64_t period2 = n2->period + n2->children_period;
+    return period1 > period2;
+  }
+};
+
+#endif  // SIMPLE_PERF_CALLCHAIN_H_
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp
new file mode 100644
index 0000000..3e89b82
--- /dev/null
+++ b/simpleperf/cmd_dumprecord.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
+
+#include "command.h"
+#include "event_attr.h"
+#include "perf_regs.h"
+#include "record.h"
+#include "record_file.h"
+#include "utils.h"
+
+using namespace PerfFileFormat;
+
+class DumpRecordCommand : public Command {
+ public:
+  DumpRecordCommand()
+      : Command("dump", "dump perf record file",
+                "Usage: simpleperf dumprecord [options] [perf_record_file]\n"
+                "    Dump different parts of a perf record file. Default file is perf.data.\n"),
+        record_filename_("perf.data"), record_file_arch_(GetBuildArch()) {
+  }
+
+  bool Run(const std::vector<std::string>& args);
+
+ private:
+  bool ParseOptions(const std::vector<std::string>& args);
+  void DumpFileHeader();
+  void DumpAttrSection();
+  void DumpDataSection();
+  void DumpFeatureSection();
+
+  std::string record_filename_;
+  std::unique_ptr<RecordFileReader> record_file_reader_;
+  ArchType record_file_arch_;
+};
+
+bool DumpRecordCommand::Run(const std::vector<std::string>& args) {
+  if (!ParseOptions(args)) {
+    return false;
+  }
+  record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
+  if (record_file_reader_ == nullptr) {
+    return false;
+  }
+  std::string arch = record_file_reader_->ReadFeatureString(FEAT_ARCH);
+  if (!arch.empty()) {
+    record_file_arch_ = GetArchType(arch);
+    if (record_file_arch_ == ARCH_UNSUPPORTED) {
+      return false;
+    }
+  }
+  ScopedCurrentArch scoped_arch(record_file_arch_);
+  DumpFileHeader();
+  DumpAttrSection();
+  DumpDataSection();
+  DumpFeatureSection();
+
+  return true;
+}
+
+bool DumpRecordCommand::ParseOptions(const std::vector<std::string>& args) {
+  if (args.size() == 1) {
+    record_filename_ = args[0];
+  } else if (args.size() > 1) {
+    ReportUnknownOption(args, 1);
+    return false;
+  }
+  return true;
+}
+
+static const std::string GetFeatureName(int feature);
+
+void DumpRecordCommand::DumpFileHeader() {
+  const FileHeader& header = record_file_reader_->FileHeader();
+  printf("magic: ");
+  for (size_t i = 0; i < 8; ++i) {
+    printf("%c", header.magic[i]);
+  }
+  printf("\n");
+  printf("header_size: %" PRId64 "\n", header.header_size);
+  if (header.header_size != sizeof(header)) {
+    PLOG(WARNING) << "record file header size " << header.header_size
+                  << "doesn't match expected header size " << sizeof(header);
+  }
+  printf("attr_size: %" PRId64 "\n", header.attr_size);
+  if (header.attr_size != sizeof(FileAttr)) {
+    PLOG(WARNING) << "record file attr size " << header.attr_size
+                  << " doesn't match expected attr size " << sizeof(FileAttr);
+  }
+  printf("attrs[file section]: offset %" PRId64 ", size %" PRId64 "\n", header.attrs.offset,
+         header.attrs.size);
+  printf("data[file section]: offset %" PRId64 ", size %" PRId64 "\n", header.data.offset,
+         header.data.size);
+  printf("event_types[file section]: offset %" PRId64 ", size %" PRId64 "\n",
+         header.event_types.offset, header.event_types.size);
+
+  std::vector<int> features;
+  for (size_t i = 0; i < FEAT_MAX_NUM; ++i) {
+    size_t j = i / 8;
+    size_t k = i % 8;
+    if ((header.features[j] & (1 << k)) != 0) {
+      features.push_back(i);
+    }
+  }
+  for (auto& feature : features) {
+    printf("feature: %s\n", GetFeatureName(feature).c_str());
+  }
+}
+
+static const std::string GetFeatureName(int feature) {
+  static std::map<int, std::string> feature_name_map = {
+      {FEAT_TRACING_DATA, "tracing_data"},
+      {FEAT_BUILD_ID, "build_id"},
+      {FEAT_HOSTNAME, "hostname"},
+      {FEAT_OSRELEASE, "osrelease"},
+      {FEAT_VERSION, "version"},
+      {FEAT_ARCH, "arch"},
+      {FEAT_NRCPUS, "nrcpus"},
+      {FEAT_CPUDESC, "cpudesc"},
+      {FEAT_CPUID, "cpuid"},
+      {FEAT_TOTAL_MEM, "total_mem"},
+      {FEAT_CMDLINE, "cmdline"},
+      {FEAT_EVENT_DESC, "event_desc"},
+      {FEAT_CPU_TOPOLOGY, "cpu_topology"},
+      {FEAT_NUMA_TOPOLOGY, "numa_topology"},
+      {FEAT_BRANCH_STACK, "branch_stack"},
+      {FEAT_PMU_MAPPINGS, "pmu_mappings"},
+      {FEAT_GROUP_DESC, "group_desc"},
+  };
+  auto it = feature_name_map.find(feature);
+  if (it != feature_name_map.end()) {
+    return it->second;
+  }
+  return android::base::StringPrintf("unknown_feature(%d)", feature);
+}
+
+void DumpRecordCommand::DumpAttrSection() {
+  std::vector<AttrWithId> attrs = record_file_reader_->AttrSection();
+  for (size_t i = 0; i < attrs.size(); ++i) {
+    const auto& attr = attrs[i];
+    printf("attr %zu:\n", i + 1);
+    DumpPerfEventAttr(*attr.attr, 1);
+    if (!attr.ids.empty()) {
+      printf("  ids:");
+      for (const auto& id : attr.ids) {
+        printf(" %" PRId64, id);
+      }
+      printf("\n");
+    }
+  }
+}
+
+void DumpRecordCommand::DumpDataSection() {
+  record_file_reader_->ReadDataSection([](std::unique_ptr<Record> record) {
+    record->Dump();
+    return true;
+  }, false);
+}
+
+void DumpRecordCommand::DumpFeatureSection() {
+  std::map<int, SectionDesc> section_map = record_file_reader_->FeatureSectionDescriptors();
+  for (const auto& pair : section_map) {
+    int feature = pair.first;
+    const auto& section = pair.second;
+    printf("feature section for %s: offset %" PRId64 ", size %" PRId64 "\n",
+           GetFeatureName(feature).c_str(), section.offset, section.size);
+    if (feature == FEAT_BUILD_ID) {
+      std::vector<BuildIdRecord> records = record_file_reader_->ReadBuildIdFeature();
+      for (auto& r : records) {
+        r.Dump(1);
+      }
+    } else if (feature == FEAT_OSRELEASE) {
+      std::string s = record_file_reader_->ReadFeatureString(feature);
+      PrintIndented(1, "osrelease: %s\n", s.c_str());
+    } else if (feature == FEAT_ARCH) {
+      std::string s = record_file_reader_->ReadFeatureString(feature);
+      PrintIndented(1, "arch: %s\n", s.c_str());
+    } else if (feature == FEAT_CMDLINE) {
+      std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
+      PrintIndented(1, "cmdline: %s\n", android::base::Join(cmdline, ' ').c_str());
+    }
+  }
+}
+
+void RegisterDumpRecordCommand() {
+  RegisterCommand("dump", [] { return std::unique_ptr<Command>(new DumpRecordCommand); });
+}
diff --git a/simpleperf/cmd_dumprecord_test.cpp b/simpleperf/cmd_dumprecord_test.cpp
new file mode 100644
index 0000000..441851f
--- /dev/null
+++ b/simpleperf/cmd_dumprecord_test.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "command.h"
+#include "get_test_data.h"
+
+static std::unique_ptr<Command> DumpCmd() {
+  return CreateCommandInstance("dump");
+}
+
+TEST(cmd_dump, record_file_option) {
+  ASSERT_TRUE(DumpCmd()->Run({GetTestData("perf.data")}));
+}
diff --git a/simpleperf/cmd_help.cpp b/simpleperf/cmd_help.cpp
new file mode 100644
index 0000000..7054e65
--- /dev/null
+++ b/simpleperf/cmd_help.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "command.h"
+
+class HelpCommand : public Command {
+ public:
+  HelpCommand()
+      : Command("help", "print help information for simpleperf",
+                // clang-format off
+"Usage: simpleperf help [subcommand]\n"
+"    Without subcommand, print short help string for every subcommand.\n"
+"    With subcommand, print long help string for the subcommand.\n\n"
+                // clang-format on
+                ) {}
+
+  bool Run(const std::vector<std::string>& args) override;
+
+ private:
+  void PrintShortHelp();
+  void PrintLongHelpForOneCommand(const Command& cmd);
+};
+
+bool HelpCommand::Run(const std::vector<std::string>& args) {
+  if (args.empty()) {
+    PrintShortHelp();
+  } else {
+    std::unique_ptr<Command> cmd = CreateCommandInstance(args[0]);
+    if (cmd == nullptr) {
+      LOG(ERROR) << "malformed command line: can't find help string for "
+                    "unknown command "
+                 << args[0];
+      LOG(ERROR) << "try using \"--help\"";
+      return false;
+    } else {
+      PrintLongHelpForOneCommand(*cmd);
+    }
+  }
+  return true;
+}
+
+void HelpCommand::PrintShortHelp() {
+  printf(
+      // clang-format off
+"Usage: simpleperf [common options] subcommand [args_for_subcommand]\n"
+"common options:\n"
+"    -h/--help     Print this help information.\n"
+"    --log <severity> Set the minimum severity of logging. Possible severities\n"
+"                     include verbose, debug, warning, info, error, fatal.\n"
+"                     Default is info.\n"
+"    --version     Print version of simpleperf.\n"
+      "subcommands:\n"
+      // clang-format on
+      );
+  for (auto& cmd_name : GetAllCommandNames()) {
+    std::unique_ptr<Command> cmd = CreateCommandInstance(cmd_name);
+    printf("    %-20s%s\n", cmd_name.c_str(), cmd->ShortHelpString().c_str());
+  }
+}
+
+void HelpCommand::PrintLongHelpForOneCommand(const Command& command) {
+  printf("%s\n", command.LongHelpString().c_str());
+}
+
+void RegisterHelpCommand() {
+  RegisterCommand("help",
+                  [] { return std::unique_ptr<Command>(new HelpCommand); });
+}
diff --git a/simpleperf/cmd_kmem.cpp b/simpleperf/cmd_kmem.cpp
new file mode 100644
index 0000000..16559a2
--- /dev/null
+++ b/simpleperf/cmd_kmem.cpp
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command.h"
+
+#include <unordered_map>
+
+#include <android-base/logging.h>
+#include <android-base/strings.h>
+
+#include "callchain.h"
+#include "event_attr.h"
+#include "event_type.h"
+#include "record_file.h"
+#include "sample_tree.h"
+#include "tracing.h"
+#include "utils.h"
+
+namespace {
+
+struct SlabSample {
+  const Symbol* symbol;            // the function making allocation
+  uint64_t ptr;                    // the start address of the allocated space
+  uint64_t bytes_req;              // requested space size
+  uint64_t bytes_alloc;            // allocated space size
+  uint64_t sample_count;           // count of allocations
+  uint64_t gfp_flags;              // flags used for allocation
+  uint64_t cross_cpu_allocations;  // count of allocations freed not on the
+                                   // cpu allocating them
+  CallChainRoot<SlabSample> callchain;  // a callchain tree representing all
+                                        // callchains in this sample
+  SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req,
+             uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags,
+             uint64_t cross_cpu_allocations)
+      : symbol(symbol),
+        ptr(ptr),
+        bytes_req(bytes_req),
+        bytes_alloc(bytes_alloc),
+        sample_count(sample_count),
+        gfp_flags(gfp_flags),
+        cross_cpu_allocations(cross_cpu_allocations) {}
+};
+
+struct SlabAccumulateInfo {
+  uint64_t bytes_req;
+  uint64_t bytes_alloc;
+};
+
+BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
+BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
+BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
+BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
+BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations,
+                                     cross_cpu_allocations);
+
+BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
+BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
+BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
+BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
+BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations,
+                              cross_cpu_allocations);
+
+static int CompareFragment(const SlabSample* sample1,
+                           const SlabSample* sample2) {
+  uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
+  uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
+  return Compare(frag2, frag1);
+}
+
+static std::string DisplayFragment(const SlabSample* sample) {
+  return android::base::StringPrintf("%" PRIu64,
+                                     sample->bytes_alloc - sample->bytes_req);
+}
+
+struct SlabSampleTree {
+  std::vector<SlabSample*> samples;
+  uint64_t total_requested_bytes;
+  uint64_t total_allocated_bytes;
+  uint64_t nr_allocations;
+  uint64_t nr_frees;
+  uint64_t nr_cross_cpu_allocations;
+};
+
+struct SlabFormat {
+  enum {
+    KMEM_ALLOC,
+    KMEM_FREE,
+  } type;
+  TracingFieldPlace call_site;
+  TracingFieldPlace ptr;
+  TracingFieldPlace bytes_req;
+  TracingFieldPlace bytes_alloc;
+  TracingFieldPlace gfp_flags;
+};
+
+class SlabSampleTreeBuilder
+    : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
+ public:
+  SlabSampleTreeBuilder(SampleComparator<SlabSample> sample_comparator,
+                        ThreadTree* thread_tree)
+      : SampleTreeBuilder(sample_comparator),
+        thread_tree_(thread_tree),
+        total_requested_bytes_(0),
+        total_allocated_bytes_(0),
+        nr_allocations_(0),
+        nr_cross_cpu_allocations_(0) {}
+
+  SlabSampleTree GetSampleTree() const {
+    SlabSampleTree sample_tree;
+    sample_tree.samples = GetSamples();
+    sample_tree.total_requested_bytes = total_requested_bytes_;
+    sample_tree.total_allocated_bytes = total_allocated_bytes_;
+    sample_tree.nr_allocations = nr_allocations_;
+    sample_tree.nr_frees = nr_frees_;
+    sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
+    return sample_tree;
+  }
+
+  void AddSlabFormat(const std::vector<uint64_t>& event_ids,
+                     SlabFormat format) {
+    std::unique_ptr<SlabFormat> p(new SlabFormat(format));
+    for (auto id : event_ids) {
+      event_id_to_format_map_[id] = p.get();
+    }
+    formats_.push_back(std::move(p));
+  }
+
+ protected:
+  SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
+                           SlabAccumulateInfo* acc_info) override {
+    if (!in_kernel) {
+      // Normally we don't parse records in user space because tracepoint
+      // events all happen in kernel. But if r.ip_data.ip == 0, it may be
+      // a kernel record failed to dump ip register and is still useful.
+      if (r.ip_data.ip == 0) {
+        // It seems we are on a kernel can't dump regset for tracepoint events
+        // because of lacking perf_arch_fetch_caller_regs(). We can't get
+        // callchain, but we can still do a normal report.
+        static bool first = true;
+        if (first) {
+          first = false;
+          if (accumulate_callchain_) {
+            // The kernel doesn't seem to support dumping registers for
+            // tracepoint events because of lacking
+            // perf_arch_fetch_caller_regs().
+            LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
+                         << " events because of lacking kernel support.";
+          }
+        }
+      } else {
+        return nullptr;
+      }
+    }
+    uint64_t id = r.id_data.id;
+    auto it = event_id_to_format_map_.find(id);
+    if (it == event_id_to_format_map_.end()) {
+      return nullptr;
+    }
+    const char* raw_data = r.raw_data.data;
+    SlabFormat* format = it->second;
+    if (format->type == SlabFormat::KMEM_ALLOC) {
+      uint64_t call_site = format->call_site.ReadFromData(raw_data);
+      const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
+      uint64_t ptr = format->ptr.ReadFromData(raw_data);
+      uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
+      uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
+      uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
+      SlabSample* sample =
+          InsertSample(std::unique_ptr<SlabSample>(new SlabSample(
+              symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
+      alloc_cpu_record_map_.insert(
+          std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
+      acc_info->bytes_req = bytes_req;
+      acc_info->bytes_alloc = bytes_alloc;
+      return sample;
+    } else if (format->type == SlabFormat::KMEM_FREE) {
+      uint64_t ptr = format->ptr.ReadFromData(raw_data);
+      auto it = alloc_cpu_record_map_.find(ptr);
+      if (it != alloc_cpu_record_map_.end()) {
+        SlabSample* sample = it->second.second;
+        if (r.cpu_data.cpu != it->second.first) {
+          sample->cross_cpu_allocations++;
+          nr_cross_cpu_allocations_++;
+        }
+        alloc_cpu_record_map_.erase(it);
+      }
+      nr_frees_++;
+    }
+    return nullptr;
+  }
+
+  SlabSample* CreateBranchSample(const SampleRecord&,
+                                 const BranchStackItemType&) override {
+    return nullptr;
+  }
+
+  SlabSample* CreateCallChainSample(
+      const SlabSample* sample, uint64_t ip, bool in_kernel,
+      const std::vector<SlabSample*>& callchain,
+      const SlabAccumulateInfo& acc_info) override {
+    if (!in_kernel) {
+      return nullptr;
+    }
+    const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
+    return InsertCallChainSample(
+        std::unique_ptr<SlabSample>(
+            new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
+                           acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
+        callchain);
+  }
+
+  const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
+
+  uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override {
+    // Decide the percentage of callchain by the sample_count, so use 1 as the
+    // period when calling AddCallChain().
+    return 1;
+  }
+
+  void UpdateSummary(const SlabSample* sample) override {
+    total_requested_bytes_ += sample->bytes_req;
+    total_allocated_bytes_ += sample->bytes_alloc;
+    nr_allocations_++;
+  }
+
+  void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
+    sample1->bytes_req += sample2->bytes_req;
+    sample1->bytes_alloc += sample2->bytes_alloc;
+    sample1->sample_count += sample2->sample_count;
+  }
+
+ private:
+  ThreadTree* thread_tree_;
+  uint64_t total_requested_bytes_;
+  uint64_t total_allocated_bytes_;
+  uint64_t nr_allocations_;
+  uint64_t nr_frees_;
+  uint64_t nr_cross_cpu_allocations_;
+
+  std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
+  std::vector<std::unique_ptr<SlabFormat>> formats_;
+  std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>>
+      alloc_cpu_record_map_;
+};
+
+using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
+using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
+using SlabSampleCallgraphDisplayer =
+    CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>;
+
+struct EventAttrWithName {
+  perf_event_attr attr;
+  std::string name;
+  std::vector<uint64_t> event_ids;
+};
+
+class KmemCommand : public Command {
+ public:
+  KmemCommand()
+      : Command(
+            "kmem", "collect kernel memory allocation information",
+            // clang-format off
+"Usage: kmem (record [record options] | report [report options])\n"
+"kmem record\n"
+"-g        Enable call graph recording. Same as '--call-graph fp'.\n"
+"--slab    Collect slab allocation information. Default option.\n"
+"Other record options provided by simpleperf record command are also available.\n"
+"kmem report\n"
+"--children  Print the accumulated allocation info appeared in the callchain.\n"
+"            Can be used on perf.data recorded with `--call-graph fp` option.\n"
+"-g [callee|caller]  Print call graph for perf.data recorded with\n"
+"                    `--call-graph fp` option. If callee mode is used, the graph\n"
+"                     shows how functions are called from others. Otherwise, the\n"
+"                     graph shows how functions call others. Default is callee\n"
+"                     mode. The percentage shown in the graph is determined by\n"
+"                     the hit count of the callchain.\n"
+"-i          Specify path of record file, default is perf.data\n"
+"-o report_file_name  Set report file name, default is stdout.\n"
+"--slab      Report slab allocation information. Default option.\n"
+"--slab-sort key1,key2,...\n"
+"            Select the keys to sort and print slab allocation information.\n"
+"            Should be used with --slab option. Possible keys include:\n"
+"              hit         -- the allocation count.\n"
+"              caller      -- the function calling allocation.\n"
+"              ptr         -- the address of the allocated space.\n"
+"              bytes_req   -- the total requested space size.\n"
+"              bytes_alloc -- the total allocated space size.\n"
+"              fragment    -- the extra allocated space size\n"
+"                             (bytes_alloc - bytes_req).\n"
+"              gfp_flags   -- the flags used for allocation.\n"
+"              pingpong    -- the count of allocations that are freed not on\n"
+"                             the cpu allocating them.\n"
+"            The default slab sort keys are:\n"
+"              hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
+            // clang-format on
+            ),
+        is_record_(false),
+        use_slab_(false),
+        accumulate_callchain_(false),
+        print_callgraph_(false),
+        callgraph_show_callee_(false),
+        record_filename_("perf.data"),
+        record_file_arch_(GetBuildArch()) {}
+
+  bool Run(const std::vector<std::string>& args);
+
+ private:
+  bool ParseOptions(const std::vector<std::string>& args,
+                    std::vector<std::string>* left_args);
+  bool RecordKmemInfo(const std::vector<std::string>& record_args);
+  bool ReportKmemInfo();
+  bool PrepareToBuildSampleTree();
+  void ReadEventAttrsFromRecordFile();
+  bool ReadFeaturesFromRecordFile();
+  bool ReadSampleTreeFromRecordFile();
+  bool ProcessRecord(std::unique_ptr<Record> record);
+  void ProcessTracingData(const std::vector<char>& data);
+  bool PrintReport();
+  void PrintReportContext(FILE* fp);
+  void PrintSlabReportContext(FILE* fp);
+
+  bool is_record_;
+  bool use_slab_;
+  std::vector<std::string> slab_sort_keys_;
+  bool accumulate_callchain_;
+  bool print_callgraph_;
+  bool callgraph_show_callee_;
+
+  std::string record_filename_;
+  std::unique_ptr<RecordFileReader> record_file_reader_;
+  std::vector<EventAttrWithName> event_attrs_;
+  std::string record_cmdline_;
+  ArchType record_file_arch_;
+
+  ThreadTree thread_tree_;
+  SlabSampleTree slab_sample_tree_;
+  std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
+  std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
+  std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
+
+  std::string report_filename_;
+};
+
+bool KmemCommand::Run(const std::vector<std::string>& args) {
+  std::vector<std::string> left_args;
+  if (!ParseOptions(args, &left_args)) {
+    return false;
+  }
+  if (!use_slab_) {
+    use_slab_ = true;
+  }
+  if (is_record_) {
+    return RecordKmemInfo(left_args);
+  }
+  return ReportKmemInfo();
+}
+
+bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
+                               std::vector<std::string>* left_args) {
+  if (args.empty()) {
+    LOG(ERROR) << "No subcommand specified";
+    return false;
+  }
+  if (args[0] == "record") {
+    if (!IsRoot()) {
+      LOG(ERROR) << "simpleperf kmem record command needs root privilege";
+      return false;
+    }
+    is_record_ = true;
+    size_t i;
+    for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
+      if (args[i] == "-g") {
+        left_args->push_back("--call-graph");
+        left_args->push_back("fp");
+      } else if (args[i] == "--slab") {
+        use_slab_ = true;
+      } else {
+        left_args->push_back(args[i]);
+      }
+    }
+    left_args->insert(left_args->end(), args.begin() + i, args.end());
+  } else if (args[0] == "report") {
+    is_record_ = false;
+    for (size_t i = 1; i < args.size(); ++i) {
+      if (args[i] == "--children") {
+        accumulate_callchain_ = true;
+      } else if (args[i] == "-g") {
+        print_callgraph_ = true;
+        accumulate_callchain_ = true;
+        callgraph_show_callee_ = true;
+        if (i + 1 < args.size() && args[i + 1][0] != '-') {
+          ++i;
+          if (args[i] == "callee") {
+            callgraph_show_callee_ = true;
+          } else if (args[i] == "caller") {
+            callgraph_show_callee_ = false;
+          } else {
+            LOG(ERROR) << "Unknown argument with -g option: " << args[i];
+            return false;
+          }
+        }
+      } else if (args[i] == "-i") {
+        if (!NextArgumentOrError(args, &i)) {
+          return false;
+        }
+        record_filename_ = args[i];
+      } else if (args[i] == "-o") {
+        if (!NextArgumentOrError(args, &i)) {
+          return false;
+        }
+        report_filename_ = args[i];
+      } else if (args[i] == "--slab") {
+        use_slab_ = true;
+      } else if (args[i] == "--slab-sort") {
+        if (!NextArgumentOrError(args, &i)) {
+          return false;
+        }
+        slab_sort_keys_ = android::base::Split(args[i], ",");
+      } else {
+        ReportUnknownOption(args, i);
+        return false;
+      }
+    }
+  } else {
+    LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
+               << ". Try `simpleperf help " << Name() << "`";
+    return false;
+  }
+  return true;
+}
+
+bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
+  std::vector<std::string> args;
+  if (use_slab_) {
+    std::vector<std::string> trace_events = {
+        "kmem:kmalloc",      "kmem:kmem_cache_alloc",
+        "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
+        "kmem:kfree",        "kmem:kmem_cache_free"};
+    for (const auto& name : trace_events) {
+      if (ParseEventType(name)) {
+        args.insert(args.end(), {"-e", name});
+      }
+    }
+  }
+  if (args.empty()) {
+    LOG(ERROR) << "Kernel allocation related trace events are not supported.";
+    return false;
+  }
+  args.push_back("-a");
+  args.insert(args.end(), record_args.begin(), record_args.end());
+  std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
+  if (record_cmd == nullptr) {
+    LOG(ERROR) << "record command isn't available";
+    return false;
+  }
+  return record_cmd->Run(args);
+}
+
+bool KmemCommand::ReportKmemInfo() {
+  if (!PrepareToBuildSampleTree()) {
+    return false;
+  }
+  record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
+  if (record_file_reader_ == nullptr) {
+    return false;
+  }
+  ReadEventAttrsFromRecordFile();
+  if (!ReadFeaturesFromRecordFile()) {
+    return false;
+  }
+  if (!ReadSampleTreeFromRecordFile()) {
+    return false;
+  }
+  if (!PrintReport()) {
+    return false;
+  }
+  return true;
+}
+
+bool KmemCommand::PrepareToBuildSampleTree() {
+  if (use_slab_) {
+    if (slab_sort_keys_.empty()) {
+      slab_sort_keys_ = {"hit",         "caller",   "bytes_req",
+                         "bytes_alloc", "fragment", "pingpong"};
+    }
+    SampleComparator<SlabSample> comparator;
+    SampleComparator<SlabSample> sort_comparator;
+    SampleDisplayer<SlabSample, SlabSampleTree> displayer;
+    std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
+
+    if (print_callgraph_) {
+      displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer());
+    }
+
+    for (const auto& key : slab_sort_keys_) {
+      if (key == "hit") {
+        sort_comparator.AddCompareFunction(CompareSampleCount);
+        displayer.AddDisplayFunction(accumulated_name + "Hit",
+                                     DisplaySampleCount);
+      } else if (key == "caller") {
+        comparator.AddCompareFunction(CompareSymbol);
+        displayer.AddDisplayFunction("Caller", DisplaySymbol);
+      } else if (key == "ptr") {
+        comparator.AddCompareFunction(ComparePtr);
+        displayer.AddDisplayFunction("Ptr", DisplayPtr);
+      } else if (key == "bytes_req") {
+        sort_comparator.AddCompareFunction(CompareBytesReq);
+        displayer.AddDisplayFunction(accumulated_name + "BytesReq",
+                                     DisplayBytesReq);
+      } else if (key == "bytes_alloc") {
+        sort_comparator.AddCompareFunction(CompareBytesAlloc);
+        displayer.AddDisplayFunction(accumulated_name + "BytesAlloc",
+                                     DisplayBytesAlloc);
+      } else if (key == "fragment") {
+        sort_comparator.AddCompareFunction(CompareFragment);
+        displayer.AddDisplayFunction(accumulated_name + "Fragment",
+                                     DisplayFragment);
+      } else if (key == "gfp_flags") {
+        comparator.AddCompareFunction(CompareGfpFlags);
+        displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
+      } else if (key == "pingpong") {
+        sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
+        displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
+      } else {
+        LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
+        return false;
+      }
+      slab_sample_tree_builder_.reset(
+          new SlabSampleTreeBuilder(comparator, &thread_tree_));
+      slab_sample_tree_builder_->SetCallChainSampleOptions(
+          accumulate_callchain_, print_callgraph_, !callgraph_show_callee_,
+          false);
+      sort_comparator.AddComparator(comparator);
+      slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
+      slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
+    }
+  }
+  return true;
+}
+
+void KmemCommand::ReadEventAttrsFromRecordFile() {
+  std::vector<AttrWithId> attrs = record_file_reader_->AttrSection();
+  for (const auto& attr_with_id : attrs) {
+    EventAttrWithName attr;
+    attr.attr = *attr_with_id.attr;
+    attr.event_ids = attr_with_id.ids;
+    attr.name = GetEventNameByAttr(attr.attr);
+    event_attrs_.push_back(attr);
+  }
+}
+
+bool KmemCommand::ReadFeaturesFromRecordFile() {
+  std::string arch =
+      record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
+  if (!arch.empty()) {
+    record_file_arch_ = GetArchType(arch);
+    if (record_file_arch_ == ARCH_UNSUPPORTED) {
+      return false;
+    }
+  }
+  std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
+  if (!cmdline.empty()) {
+    record_cmdline_ = android::base::Join(cmdline, ' ');
+  }
+  if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
+    std::vector<char> tracing_data;
+    if (!record_file_reader_->ReadFeatureSection(
+            PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) {
+      return false;
+    }
+    ProcessTracingData(tracing_data);
+  }
+  return true;
+}
+
+bool KmemCommand::ReadSampleTreeFromRecordFile() {
+  if (!record_file_reader_->ReadDataSection(
+          [this](std::unique_ptr<Record> record) {
+            return ProcessRecord(std::move(record));
+          })) {
+    return false;
+  }
+  if (use_slab_) {
+    slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
+    slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
+  }
+  return true;
+}
+
+bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
+  thread_tree_.Update(*record);
+  if (record->type() == PERF_RECORD_SAMPLE) {
+    if (use_slab_) {
+      slab_sample_tree_builder_->ProcessSampleRecord(
+          *static_cast<const SampleRecord*>(record.get()));
+    }
+  } else if (record->type() == PERF_RECORD_TRACING_DATA) {
+    const auto& r = *static_cast<TracingDataRecord*>(record.get());
+    ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size));
+  }
+  return true;
+}
+
+void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
+  Tracing tracing(data);
+  for (auto& attr : event_attrs_) {
+    if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
+      uint64_t trace_event_id = attr.attr.config;
+      attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
+      TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
+      if (use_slab_) {
+        if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
+            format.name == "kmalloc_node" ||
+            format.name == "kmem_cache_alloc_node") {
+          SlabFormat f;
+          f.type = SlabFormat::KMEM_ALLOC;
+          format.GetField("call_site", f.call_site);
+          format.GetField("ptr", f.ptr);
+          format.GetField("bytes_req", f.bytes_req);
+          format.GetField("bytes_alloc", f.bytes_alloc);
+          format.GetField("gfp_flags", f.gfp_flags);
+          slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
+        } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
+          SlabFormat f;
+          f.type = SlabFormat::KMEM_FREE;
+          format.GetField("call_site", f.call_site);
+          format.GetField("ptr", f.ptr);
+          slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
+        }
+      }
+    }
+  }
+}
+
+bool KmemCommand::PrintReport() {
+  std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
+  FILE* report_fp = stdout;
+  if (!report_filename_.empty()) {
+    file_handler.reset(fopen(report_filename_.c_str(), "w"));
+    if (file_handler == nullptr) {
+      PLOG(ERROR) << "failed to open " << report_filename_;
+      return false;
+    }
+    report_fp = file_handler.get();
+  }
+  PrintReportContext(report_fp);
+  if (use_slab_) {
+    fprintf(report_fp, "\n\n");
+    PrintSlabReportContext(report_fp);
+    slab_sample_tree_displayer_->DisplaySamples(
+        report_fp, slab_sample_tree_.samples, &slab_sample_tree_);
+  }
+  return true;
+}
+
+void KmemCommand::PrintReportContext(FILE* fp) {
+  if (!record_cmdline_.empty()) {
+    fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
+  }
+  fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
+  for (const auto& attr : event_attrs_) {
+    fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(),
+            attr.attr.type, attr.attr.config);
+  }
+}
+
+void KmemCommand::PrintSlabReportContext(FILE* fp) {
+  fprintf(fp, "Slab allocation information:\n");
+  fprintf(fp, "Total requested bytes: %" PRIu64 "\n",
+          slab_sample_tree_.total_requested_bytes);
+  fprintf(fp, "Total allocated bytes: %" PRIu64 "\n",
+          slab_sample_tree_.total_allocated_bytes);
+  uint64_t fragment = slab_sample_tree_.total_allocated_bytes -
+                      slab_sample_tree_.total_requested_bytes;
+  double percentage = 0.0;
+  if (slab_sample_tree_.total_allocated_bytes != 0) {
+    percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
+  }
+  fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
+  fprintf(fp, "Total allocations: %" PRIu64 "\n",
+          slab_sample_tree_.nr_allocations);
+  fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
+  percentage = 0.0;
+  if (slab_sample_tree_.nr_allocations != 0) {
+    percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations /
+                 slab_sample_tree_.nr_allocations;
+  }
+  fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
+          slab_sample_tree_.nr_cross_cpu_allocations, percentage);
+  fprintf(fp, "\n");
+}
+
+}  // namespace
+
+void RegisterKmemCommand() {
+  RegisterCommand("kmem",
+                  [] { return std::unique_ptr<Command>(new KmemCommand()); });
+}
diff --git a/simpleperf/cmd_kmem_test.cpp b/simpleperf/cmd_kmem_test.cpp
new file mode 100644
index 0000000..dd18858
--- /dev/null
+++ b/simpleperf/cmd_kmem_test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <android-base/file.h>
+#include <android-base/strings.h>
+#include <android-base/test_utils.h>
+
+#include <memory>
+
+#include "command.h"
+#include "environment.h"
+#include "event_selection_set.h"
+#include "get_test_data.h"
+#include "record.h"
+#include "record_file.h"
+#include "test_util.h"
+
+static std::unique_ptr<Command> KmemCmd() {
+  return CreateCommandInstance("kmem");
+}
+
+struct ReportResult {
+  bool success;
+  std::string content;
+  std::vector<std::string> lines;
+};
+
+static void KmemReportRawFile(const std::string& perf_data,
+                              const std::vector<std::string>& additional_args,
+                              ReportResult* result) {
+  result->success = false;
+  TemporaryFile tmp_file;
+  std::vector<std::string> args = {"report", "-i", perf_data, "-o",
+                                   tmp_file.path};
+  args.insert(args.end(), additional_args.begin(), additional_args.end());
+  ASSERT_TRUE(KmemCmd()->Run(args));
+  ASSERT_TRUE(android::base::ReadFileToString(tmp_file.path, &result->content));
+  ASSERT_TRUE(!result->content.empty());
+  std::vector<std::string> raw_lines =
+      android::base::Split(result->content, "\n");
+  result->lines.clear();
+  for (const auto& line : raw_lines) {
+    std::string s = android::base::Trim(line);
+    if (!s.empty()) {
+      result->lines.push_back(s);
+    }
+  }
+  ASSERT_GE(result->lines.size(), 2u);
+  result->success = true;
+}
+
+static void KmemReportFile(const std::string& perf_data,
+                           const std::vector<std::string>& additional_args,
+                           ReportResult* result) {
+  KmemReportRawFile(GetTestData(perf_data), additional_args, result);
+}
+
+#if defined(__linux__)
+
+static bool RunKmemRecordCmd(std::vector<std::string> v,
+                             const char* output_file = nullptr) {
+  std::unique_ptr<TemporaryFile> tmpfile;
+  std::string out_file;
+  if (output_file != nullptr) {
+    out_file = output_file;
+  } else {
+    tmpfile.reset(new TemporaryFile);
+    out_file = tmpfile->path;
+  }
+  v.insert(v.begin(), "record");
+  v.insert(v.end(), {"-o", out_file, "sleep", SLEEP_SEC});
+  return KmemCmd()->Run(v);
+}
+
+TEST(kmem_cmd, record_slab) {
+  TEST_IN_ROOT(ASSERT_TRUE(RunKmemRecordCmd({"--slab"})));
+}
+
+TEST(kmem_cmd, record_fp_callchain_sampling) {
+  TEST_IN_ROOT(ASSERT_TRUE(RunKmemRecordCmd({"--slab", "-g"})));
+  TEST_IN_ROOT(ASSERT_TRUE(RunKmemRecordCmd({"--slab", "--call-graph", "fp"})));
+}
+
+TEST(kmem_cmd, record_and_report) {
+  TemporaryFile tmp_file;
+  TEST_IN_ROOT({
+    ASSERT_TRUE(RunKmemRecordCmd({"--slab"}, tmp_file.path));
+    ReportResult result;
+    KmemReportRawFile(tmp_file.path, {}, &result);
+    ASSERT_TRUE(result.success);
+  });
+}
+
+TEST(kmem_cmd, record_and_report_callgraph) {
+  TemporaryFile tmp_file;
+  TEST_IN_ROOT({
+    ASSERT_TRUE(RunKmemRecordCmd({"--slab", "-g"}, tmp_file.path));
+    ReportResult result;
+    KmemReportRawFile(tmp_file.path, {"-g"}, &result);
+    ASSERT_TRUE(result.success);
+  });
+}
+
+#endif
+
+TEST(kmem_cmd, report) {
+  ReportResult result;
+  KmemReportFile(PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD, {}, &result);
+  ASSERT_TRUE(result.success);
+  ASSERT_NE(result.content.find("kmem:kmalloc"), std::string::npos);
+  ASSERT_NE(result.content.find("__alloc_skb"), std::string::npos);
+}
+
+TEST(kmem_cmd, report_all_sort_options) {
+  ReportResult result;
+  KmemReportFile(
+      PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD,
+      {"--slab-sort",
+       "hit,caller,ptr,bytes_req,bytes_alloc,fragment,gfp_flags,pingpong"},
+      &result);
+  ASSERT_TRUE(result.success);
+  ASSERT_NE(result.content.find("Ptr"), std::string::npos);
+  ASSERT_NE(result.content.find("GfpFlags"), std::string::npos);
+}
+
+TEST(kmem_cmd, report_callgraph) {
+  ReportResult result;
+  KmemReportFile(PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD, {"-g"}, &result);
+  ASSERT_TRUE(result.success);
+  ASSERT_NE(result.content.find("kmem:kmalloc"), std::string::npos);
+  ASSERT_NE(result.content.find("__alloc_skb"), std::string::npos);
+  ASSERT_NE(result.content.find("system_call_fastpath"), std::string::npos);
+}
diff --git a/simpleperf/cmd_list.cpp b/simpleperf/cmd_list.cpp
new file mode 100644
index 0000000..273a803
--- /dev/null
+++ b/simpleperf/cmd_list.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "command.h"
+#include "environment.h"
+#include "event_attr.h"
+#include "event_fd.h"
+#include "event_type.h"
+
+static void PrintEventTypesOfType(uint32_t type, const std::string& type_name,
+                                  const std::vector<EventType>& event_types) {
+  printf("List of %s:\n", type_name.c_str());
+  for (auto& event_type : event_types) {
+    if (event_type.type == type) {
+      perf_event_attr attr = CreateDefaultPerfEventAttr(event_type);
+      // Exclude kernel to list supported events even when
+      // /proc/sys/kernel/perf_event_paranoid is 2.
+      attr.exclude_kernel = 1;
+      if (IsEventAttrSupportedByKernel(attr)) {
+        printf("  %s\n", event_type.name.c_str());
+      }
+    }
+  }
+  printf("\n");
+}
+
+class ListCommand : public Command {
+ public:
+  ListCommand()
+      : Command("list", "list available event types",
+                "Usage: simpleperf list [hw|sw|cache|tracepoint]\n"
+                "    List all available perf events on this machine.\n") {
+  }
+
+  bool Run(const std::vector<std::string>& args) override;
+};
+
+bool ListCommand::Run(const std::vector<std::string>& args) {
+  if (!CheckPerfEventLimit()) {
+    return false;
+  }
+
+  static std::map<std::string, std::pair<int, std::string>> type_map = {
+      {"hw", {PERF_TYPE_HARDWARE, "hardware events"}},
+      {"sw", {PERF_TYPE_SOFTWARE, "software events"}},
+      {"cache", {PERF_TYPE_HW_CACHE, "hw-cache events"}},
+      {"tracepoint", {PERF_TYPE_TRACEPOINT, "tracepoint events"}},
+  };
+
+  std::vector<std::string> names;
+  if (args.empty()) {
+    for (auto& item : type_map) {
+      names.push_back(item.first);
+    }
+  } else {
+    for (auto& arg : args) {
+      if (type_map.find(arg) != type_map.end()) {
+        names.push_back(arg);
+      } else {
+        LOG(ERROR) << "unknown event type category: " << arg << ", try using \"help list\"";
+        return false;
+      }
+    }
+  }
+
+  auto& event_types = GetAllEventTypes();
+
+  for (auto& name : names) {
+    auto it = type_map.find(name);
+    PrintEventTypesOfType(it->second.first, it->second.second, event_types);
+  }
+  return true;
+}
+
+void RegisterListCommand() {
+  RegisterCommand("list", [] { return std::unique_ptr<Command>(new ListCommand); });
+}
diff --git a/simpleperf/cmd_list_test.cpp b/simpleperf/cmd_list_test.cpp
new file mode 100644
index 0000000..2bc6421
--- /dev/null
+++ b/simpleperf/cmd_list_test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "command.h"
+
+class ListCommandTest : public ::testing::Test {
+ protected:
+  virtual void SetUp() {
+    list_cmd = CreateCommandInstance("list");
+    ASSERT_TRUE(list_cmd != nullptr);
+  }
+
+  std::unique_ptr<Command> list_cmd;
+};
+
+TEST_F(ListCommandTest, no_options) {
+  ASSERT_TRUE(list_cmd->Run({}));
+}
+
+TEST_F(ListCommandTest, one_option) {
+  ASSERT_TRUE(list_cmd->Run({"sw"}));
+}
+
+TEST_F(ListCommandTest, multiple_options) {
+  ASSERT_TRUE(list_cmd->Run({"hw", "tracepoint"}));
+}
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
new file mode 100644
index 0000000..7d65341
--- /dev/null
+++ b/simpleperf/cmd_record.cpp
@@ -0,0 +1,1121 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libgen.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <android-base/logging.h>
+#include <android-base/file.h>
+#include <android-base/parseint.h>
+#include <android-base/strings.h>
+
+#include "command.h"
+#include "dwarf_unwind.h"
+#include "environment.h"
+#include "event_selection_set.h"
+#include "event_type.h"
+#include "IOEventLoop.h"
+#include "read_apk.h"
+#include "read_elf.h"
+#include "record.h"
+#include "record_file.h"
+#include "thread_tree.h"
+#include "tracing.h"
+#include "utils.h"
+#include "workload.h"
+
+static std::string default_measured_event_type = "cpu-cycles";
+
+static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
+    {"u", PERF_SAMPLE_BRANCH_USER},
+    {"k", PERF_SAMPLE_BRANCH_KERNEL},
+    {"any", PERF_SAMPLE_BRANCH_ANY},
+    {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
+    {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
+    {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
+};
+
+constexpr uint64_t DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT = 4000;
+constexpr uint64_t DEFAULT_SAMPLE_PERIOD_FOR_TRACEPOINT_EVENT = 1;
+
+// The max size of records dumped by kernel is 65535, and dump stack size
+// should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
+constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
+
+// The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
+// Here 1024 is a desired value for pages in mapped buffer. If mapped
+// successfully, the buffer size = 1024 * 4K (page size) = 4M.
+constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
+
+constexpr double PERIOD_TO_DETECT_CPU_HOTPLUG_EVENTS_IN_SEC = 0.5;
+
+class RecordCommand : public Command {
+ public:
+  RecordCommand()
+      : Command(
+            "record", "record sampling info in perf.data",
+            // clang-format off
+"Usage: simpleperf record [options] [command [command-args]]\n"
+"       Gather sampling information of running [command]. And -a/-p/-t option\n"
+"       can be used to change target of sampling information.\n"
+"-a     System-wide collection.\n"
+"-b     Enable take branch stack sampling. Same as '-j any'\n"
+"-c count     Set event sample period. It means recording one sample when\n"
+"             [count] events happen. Can't be used with -f/-F option.\n"
+"             For tracepoint events, the default option is -c 1.\n"
+"--call-graph fp | dwarf[,<dump_stack_size>]\n"
+"             Enable call graph recording. Use frame pointer or dwarf debug\n"
+"             frame as the method to parse call graph in stack.\n"
+"             Default is dwarf,65528.\n"
+"--cpu cpu_item1,cpu_item2,...\n"
+"             Collect samples only on the selected cpus. cpu_item can be cpu\n"
+"             number like 1, or cpu range like 0-3.\n"
+"--dump-symbols  Dump symbols in perf.data. By default perf.data doesn't contain\n"
+"                symbol information for samples. This option is used when there\n"
+"                is no symbol information in report environment.\n"
+"--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
+"                        [command]. Here time_in_sec may be any positive\n"
+"                        floating point number.\n"
+"-e event1[:modifier1],event2[:modifier2],...\n"
+"             Select the event list to sample. Use `simpleperf list` to find\n"
+"             all possible event names. Modifiers can be added to define how\n"
+"             the event should be monitored.\n"
+"             Possible modifiers are:\n"
+"                u - monitor user space events only\n"
+"                k - monitor kernel space events only\n"
+"-f freq      Set event sample frequency. It means recording at most [freq]\n"
+"             samples every second. For non-tracepoint events, the default\n"
+"             option is -f 4000.\n"
+"-F freq      Same as '-f freq'.\n"
+"-g           Same as '--call-graph dwarf'.\n"
+"--group event1[:modifier],event2[:modifier2],...\n"
+"             Similar to -e option. But events specified in the same --group\n"
+"             option are monitored as a group, and scheduled in and out at the\n"
+"             same time.\n"
+"-j branch_filter1,branch_filter2,...\n"
+"             Enable taken branch stack sampling. Each sample captures a series\n"
+"             of consecutive taken branches.\n"
+"             The following filters are defined:\n"
+"                any: any type of branch\n"
+"                any_call: any function call or system call\n"
+"                any_ret: any function return or system call return\n"
+"                ind_call: any indirect branch\n"
+"                u: only when the branch target is at the user level\n"
+"                k: only when the branch target is in the kernel\n"
+"             This option requires at least one branch type among any, any_call,\n"
+"             any_ret, ind_call.\n"
+"-m mmap_pages   Set the size of the buffer used to receiving sample data from\n"
+"                the kernel. It should be a power of 2. If not set, the max\n"
+"                possible value <= 1024 will be used.\n"
+"--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
+"                          kernel symbols will be dumped when needed.\n"
+"--no-inherit  Don't record created child threads/processes.\n"
+"--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
+"              will be unwound by default. Use this option to disable the\n"
+"              unwinding of the user's stack.\n"
+"-o record_file_name    Set record file name, default is perf.data.\n"
+"-p pid1,pid2,...       Record events on existing processes. Mutually exclusive\n"
+"                       with -a.\n"
+"--post-unwind  If `--call-graph dwarf` option is used, then the user's stack\n"
+"               will be unwound while recording by default. But it may lose\n"
+"               records as stacking unwinding can be time consuming. Use this\n"
+"               option to unwind the user's stack after recording.\n"
+"--symfs <dir>    Look for files with symbols relative to this directory.\n"
+"                 This option is used to provide files with symbol table and\n"
+"                 debug information, which are used by --dump-symbols and -g.\n"
+"-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
+            // clang-format on
+            ),
+        use_sample_freq_(false),
+        sample_freq_(0),
+        use_sample_period_(false),
+        sample_period_(0),
+        system_wide_collection_(false),
+        branch_sampling_(0),
+        fp_callchain_sampling_(false),
+        dwarf_callchain_sampling_(false),
+        dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
+        unwind_dwarf_callchain_(true),
+        post_unwind_(false),
+        child_inherit_(true),
+        duration_in_sec_(0),
+        can_dump_kernel_symbols_(true),
+        dump_symbols_(false),
+        mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
+        record_filename_("perf.data"),
+        sample_record_count_(0),
+        lost_record_count_(0) {
+    // Die if parent exits.
+    prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
+  }
+
+  bool Run(const std::vector<std::string>& args);
+
+ private:
+  bool ParseOptions(const std::vector<std::string>& args,
+                    std::vector<std::string>* non_option_args);
+  bool SetEventSelectionFlags();
+  bool CreateAndInitRecordFile();
+  std::unique_ptr<RecordFileWriter> CreateRecordFile(
+      const std::string& filename);
+  bool DumpKernelSymbol();
+  bool DumpTracingData();
+  bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
+  bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id,
+                              bool all_threads,
+                              const std::vector<pid_t>& selected_threads);
+  bool ProcessRecord(Record* record);
+  bool DumpSymbolForRecord(const SampleRecord& r, bool for_callchain);
+  void UpdateRecordForEmbeddedElfPath(Record* record);
+  bool UnwindRecord(Record* record);
+  bool PostUnwind(const std::vector<std::string>& args);
+  bool DumpAdditionalFeatures(const std::vector<std::string>& args);
+  bool DumpBuildIdFeature();
+  void CollectHitFileInfo(Record* record);
+  bool DetectCpuHotplugEvents();
+
+  bool use_sample_freq_;
+  uint64_t sample_freq_;  // Sample 'sample_freq_' times per second.
+  bool use_sample_period_;
+  uint64_t sample_period_;  // Sample once when 'sample_period_' events occur.
+
+  bool system_wide_collection_;
+  uint64_t branch_sampling_;
+  bool fp_callchain_sampling_;
+  bool dwarf_callchain_sampling_;
+  uint32_t dump_stack_size_in_dwarf_sampling_;
+  bool unwind_dwarf_callchain_;
+  bool post_unwind_;
+  bool child_inherit_;
+  double duration_in_sec_;
+  bool can_dump_kernel_symbols_;
+  bool dump_symbols_;
+  std::vector<pid_t> monitored_threads_;
+  std::vector<int> cpus_;
+  EventSelectionSet event_selection_set_;
+
+  std::pair<size_t, size_t> mmap_page_range_;
+
+  ThreadTree thread_tree_;
+  std::string record_filename_;
+  std::unique_ptr<RecordFileWriter> record_file_writer_;
+
+  std::set<std::string> hit_kernel_modules_;
+  std::set<std::string> hit_user_files_;
+
+  uint64_t sample_record_count_;
+  uint64_t lost_record_count_;
+
+  std::vector<int> online_cpus_;
+};
+
+bool RecordCommand::Run(const std::vector<std::string>& args) {
+  if (!CheckPerfEventLimit()) {
+    return false;
+  }
+
+  // 1. Parse options, and use default measured event type if not given.
+  std::vector<std::string> workload_args;
+  if (!ParseOptions(args, &workload_args)) {
+    return false;
+  }
+  if (event_selection_set_.empty()) {
+    if (!event_selection_set_.AddEventType(default_measured_event_type)) {
+      return false;
+    }
+  }
+  if (!SetEventSelectionFlags()) {
+    return false;
+  }
+
+  // 2. Create workload.
+  std::unique_ptr<Workload> workload;
+  if (!workload_args.empty()) {
+    workload = Workload::CreateWorkload(workload_args);
+    if (workload == nullptr) {
+      return false;
+    }
+  }
+  if (!system_wide_collection_ && monitored_threads_.empty()) {
+    if (workload != nullptr) {
+      monitored_threads_.push_back(workload->GetPid());
+      event_selection_set_.SetEnableOnExec(true);
+    } else {
+      LOG(ERROR)
+          << "No threads to monitor. Try `simpleperf help record` for help";
+      return false;
+    }
+  }
+
+  // 3. Open perf_event_files, create mapped buffers for perf_event_files.
+  if (system_wide_collection_) {
+    if (!event_selection_set_.OpenEventFilesForCpus(cpus_)) {
+      return false;
+    }
+  } else {
+    if (!event_selection_set_.OpenEventFilesForThreadsOnCpus(monitored_threads_,
+                                                             cpus_)) {
+      return false;
+    }
+  }
+  if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first,
+                                           mmap_page_range_.second)) {
+    return false;
+  }
+
+  // 4. Create perf.data.
+  if (!CreateAndInitRecordFile()) {
+    return false;
+  }
+
+  // 5. Create IOEventLoop and add read/signal/periodic Events.
+  IOEventLoop loop;
+  auto callback =
+      std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
+  if (!event_selection_set_.PrepareToReadMmapEventData(loop, callback)) {
+    return false;
+  }
+  if (!loop.AddSignalEvents({SIGCHLD, SIGINT, SIGTERM},
+                            [&]() { return loop.ExitLoop(); })) {
+    return false;
+  }
+  if (duration_in_sec_ != 0) {
+    if (!loop.AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
+                               [&]() { return loop.ExitLoop(); })) {
+      return false;
+    }
+  }
+  online_cpus_ = GetOnlineCpus();
+  if (!loop.AddPeriodicEvent(
+          SecondToTimeval(PERIOD_TO_DETECT_CPU_HOTPLUG_EVENTS_IN_SEC),
+          [&]() { return DetectCpuHotplugEvents(); })) {
+    return false;
+  }
+
+  // 6. Write records in mapped buffers of perf_event_files to output file while
+  //    workload is running.
+  if (workload != nullptr && !workload->Start()) {
+    return false;
+  }
+  if (!loop.RunLoop()) {
+    return false;
+  }
+  if (!event_selection_set_.FinishReadMmapEventData()) {
+    return false;
+  }
+  if (!record_file_writer_->SortDataSection()) {
+    return false;
+  }
+
+  // 7. Dump additional features, and close record file.
+  if (!DumpAdditionalFeatures(args)) {
+    return false;
+  }
+  if (!record_file_writer_->Close()) {
+    return false;
+  }
+
+  // 8. Unwind dwarf callchain.
+  if (post_unwind_) {
+    if (!PostUnwind(args)) {
+      return false;
+    }
+  }
+
+  // 9. Show brief record result.
+  LOG(INFO) << "Samples recorded: " << sample_record_count_
+            << ". Samples lost: " << lost_record_count_ << ".";
+  if (sample_record_count_ + lost_record_count_ != 0) {
+    double lost_percent = static_cast<double>(lost_record_count_) /
+                          (lost_record_count_ + sample_record_count_);
+    constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
+    if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
+      LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
+                   << "consider increasing mmap_pages(-m), "
+                   << "or decreasing sample frequency(-f), "
+                   << "or increasing sample period(-c).";
+    }
+  }
+  return true;
+}
+
+bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
+                                 std::vector<std::string>* non_option_args) {
+  std::set<pid_t> tid_set;
+  size_t i;
+  for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
+    if (args[i] == "-a") {
+      system_wide_collection_ = true;
+    } else if (args[i] == "-b") {
+      branch_sampling_ = branch_sampling_type_map["any"];
+    } else if (args[i] == "-c") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      char* endptr;
+      sample_period_ = strtoull(args[i].c_str(), &endptr, 0);
+      if (*endptr != '\0' || sample_period_ == 0) {
+        LOG(ERROR) << "Invalid sample period: '" << args[i] << "'";
+        return false;
+      }
+      use_sample_period_ = true;
+    } else if (args[i] == "--call-graph") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> strs = android::base::Split(args[i], ",");
+      if (strs[0] == "fp") {
+        fp_callchain_sampling_ = true;
+        dwarf_callchain_sampling_ = false;
+      } else if (strs[0] == "dwarf") {
+        fp_callchain_sampling_ = false;
+        dwarf_callchain_sampling_ = true;
+        if (strs.size() > 1) {
+          char* endptr;
+          uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
+          if (*endptr != '\0' || size > UINT_MAX) {
+            LOG(ERROR) << "invalid dump stack size in --call-graph option: "
+                       << strs[1];
+            return false;
+          }
+          if ((size & 7) != 0) {
+            LOG(ERROR) << "dump stack size " << size
+                       << " is not 8-byte aligned.";
+            return false;
+          }
+          if (size >= MAX_DUMP_STACK_SIZE) {
+            LOG(ERROR) << "dump stack size " << size
+                       << " is bigger than max allowed size "
+                       << MAX_DUMP_STACK_SIZE << ".";
+            return false;
+          }
+          dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
+        }
+      } else {
+        LOG(ERROR) << "unexpected argument for --call-graph option: "
+                   << args[i];
+        return false;
+      }
+    } else if (args[i] == "--cpu") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      cpus_ = GetCpusFromString(args[i]);
+    } else if (args[i] == "--dump-symbols") {
+      dump_symbols_ = true;
+    } else if (args[i] == "--duration") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      errno = 0;
+      char* endptr;
+      duration_in_sec_ = strtod(args[i].c_str(), &endptr);
+      if (duration_in_sec_ <= 0 || *endptr != '\0' || errno == ERANGE) {
+        LOG(ERROR) << "Invalid duration: " << args[i].c_str();
+        return false;
+      }
+    } else if (args[i] == "-e") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> event_types = android::base::Split(args[i], ",");
+      for (auto& event_type : event_types) {
+        if (!event_selection_set_.AddEventType(event_type)) {
+          return false;
+        }
+      }
+    } else if (args[i] == "-f" || args[i] == "-F") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      if (!android::base::ParseUint(args[i].c_str(), &sample_freq_)) {
+        LOG(ERROR) << "Invalid sample frequency: " << args[i];
+        return false;
+      }
+      if (!CheckSampleFrequency(sample_freq_)) {
+        return false;
+      }
+      use_sample_freq_ = true;
+    } else if (args[i] == "-g") {
+      fp_callchain_sampling_ = false;
+      dwarf_callchain_sampling_ = true;
+    } else if (args[i] == "--group") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> event_types = android::base::Split(args[i], ",");
+      if (!event_selection_set_.AddEventGroup(event_types)) {
+        return false;
+      }
+    } else if (args[i] == "-j") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> branch_sampling_types =
+          android::base::Split(args[i], ",");
+      for (auto& type : branch_sampling_types) {
+        auto it = branch_sampling_type_map.find(type);
+        if (it == branch_sampling_type_map.end()) {
+          LOG(ERROR) << "unrecognized branch sampling filter: " << type;
+          return false;
+        }
+        branch_sampling_ |= it->second;
+      }
+    } else if (args[i] == "-m") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      char* endptr;
+      uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
+      if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
+        LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
+        return false;
+      }
+      mmap_page_range_.first = mmap_page_range_.second = pages;
+    } else if (args[i] == "--no-dump-kernel-symbols") {
+      can_dump_kernel_symbols_ = false;
+    } else if (args[i] == "--no-inherit") {
+      child_inherit_ = false;
+    } else if (args[i] == "--no-unwind") {
+      unwind_dwarf_callchain_ = false;
+    } else if (args[i] == "-o") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      record_filename_ = args[i];
+    } else if (args[i] == "-p") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      if (!GetValidThreadsFromProcessString(args[i], &tid_set)) {
+        return false;
+      }
+    } else if (args[i] == "--post-unwind") {
+      post_unwind_ = true;
+    } else if (args[i] == "--symfs") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      if (!Dso::SetSymFsDir(args[i])) {
+        return false;
+      }
+    } else if (args[i] == "-t") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      if (!GetValidThreadsFromThreadString(args[i], &tid_set)) {
+        return false;
+      }
+    } else {
+      ReportUnknownOption(args, i);
+      return false;
+    }
+  }
+
+  if (use_sample_freq_ && use_sample_period_) {
+    LOG(ERROR) << "-f option can't be used with -c option.";
+    return false;
+  }
+
+  if (!dwarf_callchain_sampling_) {
+    if (!unwind_dwarf_callchain_) {
+      LOG(ERROR)
+          << "--no-unwind is only used with `--call-graph dwarf` option.";
+      return false;
+    }
+    unwind_dwarf_callchain_ = false;
+  }
+  if (post_unwind_) {
+    if (!dwarf_callchain_sampling_) {
+      LOG(ERROR)
+          << "--post-unwind is only used with `--call-graph dwarf` option.";
+      return false;
+    }
+    if (!unwind_dwarf_callchain_) {
+      LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option.";
+      return false;
+    }
+  }
+
+  monitored_threads_.insert(monitored_threads_.end(), tid_set.begin(),
+                            tid_set.end());
+  if (system_wide_collection_ && !monitored_threads_.empty()) {
+    LOG(ERROR) << "Record system wide and existing processes/threads can't be "
+                  "used at the same time.";
+    return false;
+  }
+
+  if (system_wide_collection_ && !IsRoot()) {
+    LOG(ERROR) << "System wide profiling needs root privilege.";
+    return false;
+  }
+
+  if (dump_symbols_ && can_dump_kernel_symbols_) {
+    // No need to dump kernel symbols as we will dump all required symbols.
+    can_dump_kernel_symbols_ = false;
+  }
+
+  non_option_args->clear();
+  for (; i < args.size(); ++i) {
+    non_option_args->push_back(args[i]);
+  }
+  return true;
+}
+
+bool RecordCommand::SetEventSelectionFlags() {
+  for (const auto& group : event_selection_set_.groups()) {
+    for (const auto& selection : group) {
+      if (use_sample_freq_) {
+        event_selection_set_.SetSampleFreq(selection, sample_freq_);
+      } else if (use_sample_period_) {
+        event_selection_set_.SetSamplePeriod(selection, sample_period_);
+      } else {
+        if (selection.event_type_modifier.event_type.type ==
+            PERF_TYPE_TRACEPOINT) {
+          event_selection_set_.SetSamplePeriod(
+              selection, DEFAULT_SAMPLE_PERIOD_FOR_TRACEPOINT_EVENT);
+        } else {
+          event_selection_set_.SetSampleFreq(
+              selection, DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT);
+        }
+      }
+    }
+  }
+  event_selection_set_.SampleIdAll();
+  if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
+    return false;
+  }
+  if (fp_callchain_sampling_) {
+    event_selection_set_.EnableFpCallChainSampling();
+  } else if (dwarf_callchain_sampling_) {
+    if (!event_selection_set_.EnableDwarfCallChainSampling(
+            dump_stack_size_in_dwarf_sampling_)) {
+      return false;
+    }
+  }
+  event_selection_set_.SetInherit(child_inherit_);
+  // If Unwinding while recording, records are used before being sorted.
+  // By using low watermark, records are almost sorted when read from kernel.
+  if (dwarf_callchain_sampling_ && unwind_dwarf_callchain_ && !post_unwind_) {
+    event_selection_set_.SetLowWatermark();
+  }
+  return true;
+}
+
+bool RecordCommand::CreateAndInitRecordFile() {
+  record_file_writer_ = CreateRecordFile(record_filename_);
+  if (record_file_writer_ == nullptr) {
+    return false;
+  }
+  // Use first perf_event_attr and first event id to dump mmap and comm records.
+  const EventSelection& selection = event_selection_set_.groups()[0][0];
+  const perf_event_attr& attr = selection.event_attr;
+  const std::vector<std::unique_ptr<EventFd>>& fds = selection.event_fds;
+  uint64_t event_id = fds[0]->Id();
+  if (!DumpKernelSymbol()) {
+    return false;
+  }
+  if (!DumpTracingData()) {
+    return false;
+  }
+  if (!DumpKernelAndModuleMmaps(attr, event_id)) {
+    return false;
+  }
+  if (!DumpThreadCommAndMmaps(attr, event_id, system_wide_collection_,
+                              monitored_threads_)) {
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
+    const std::string& filename) {
+  std::unique_ptr<RecordFileWriter> writer =
+      RecordFileWriter::CreateInstance(filename);
+  if (writer == nullptr) {
+    return nullptr;
+  }
+
+  std::vector<AttrWithId> attr_ids;
+  for (const auto& group : event_selection_set_.groups()) {
+    for (const auto& selection : group) {
+      AttrWithId attr_id;
+      attr_id.attr = &selection.event_attr;
+      CHECK(attr_id.attr != nullptr);
+      const std::vector<std::unique_ptr<EventFd>>& fds = selection.event_fds;
+      for (const auto& fd : fds) {
+        attr_id.ids.push_back(fd->Id());
+      }
+      attr_ids.push_back(attr_id);
+    }
+  }
+  if (!writer->WriteAttrSection(attr_ids)) {
+    return nullptr;
+  }
+  return writer;
+}
+
+bool RecordCommand::DumpKernelSymbol() {
+  if (can_dump_kernel_symbols_) {
+    std::string kallsyms;
+    bool need_kernel_symbol = false;
+    for (const auto& group : event_selection_set_.groups()) {
+      for (const auto& selection : group) {
+        if (!selection.event_type_modifier.exclude_kernel) {
+          need_kernel_symbol = true;
+        }
+      }
+    }
+    if (need_kernel_symbol && CheckKernelSymbolAddresses()) {
+      if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
+        PLOG(ERROR) << "failed to read /proc/kallsyms";
+        return false;
+      }
+    }
+    KernelSymbolRecord r(kallsyms);
+    if (!ProcessRecord(&r)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool RecordCommand::DumpTracingData() {
+  std::vector<const EventType*> tracepoint_event_types;
+  for (const auto& group : event_selection_set_.groups()) {
+    for (const auto& selection : group) {
+      if (selection.event_type_modifier.event_type.type ==
+          PERF_TYPE_TRACEPOINT) {
+        tracepoint_event_types.push_back(
+            &selection.event_type_modifier.event_type);
+      }
+    }
+  }
+  if (tracepoint_event_types.empty()) {
+    return true;  // No need to dump tracing data.
+  }
+  std::vector<char> tracing_data;
+  if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
+    return false;
+  }
+  TracingDataRecord record(tracing_data);
+  if (!ProcessRecord(&record)) {
+    return false;
+  }
+  return true;
+}
+
+bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
+                                             uint64_t event_id) {
+  KernelMmap kernel_mmap;
+  std::vector<KernelMmap> module_mmaps;
+  GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
+
+  MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
+                         kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
+  if (!ProcessRecord(&mmap_record)) {
+    return false;
+  }
+  for (auto& module_mmap : module_mmaps) {
+    MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr,
+                           module_mmap.len, 0, module_mmap.filepath, event_id);
+    if (!ProcessRecord(&mmap_record)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool RecordCommand::DumpThreadCommAndMmaps(
+    const perf_event_attr& attr, uint64_t event_id, bool all_threads,
+    const std::vector<pid_t>& selected_threads) {
+  std::vector<ThreadComm> thread_comms;
+  if (!GetThreadComms(&thread_comms)) {
+    return false;
+  }
+  // Decide which processes and threads to dump.
+  std::set<pid_t> dump_processes;
+  std::set<pid_t> dump_threads;
+  for (auto& tid : selected_threads) {
+    dump_threads.insert(tid);
+  }
+  for (auto& thread : thread_comms) {
+    if (dump_threads.find(thread.tid) != dump_threads.end()) {
+      dump_processes.insert(thread.pid);
+    }
+  }
+
+  // Dump processes.
+  for (auto& thread : thread_comms) {
+    if (thread.pid != thread.tid) {
+      continue;
+    }
+    if (!all_threads &&
+        dump_processes.find(thread.pid) == dump_processes.end()) {
+      continue;
+    }
+    CommRecord record(attr, thread.pid, thread.tid, thread.comm, event_id);
+    if (!ProcessRecord(&record)) {
+      return false;
+    }
+    std::vector<ThreadMmap> thread_mmaps;
+    if (!GetThreadMmapsInProcess(thread.pid, &thread_mmaps)) {
+      // The thread may exit before we get its info.
+      continue;
+    }
+    for (auto& thread_mmap : thread_mmaps) {
+      if (thread_mmap.executable == 0) {
+        continue;  // No need to dump non-executable mmap info.
+      }
+      MmapRecord record(attr, false, thread.pid, thread.tid,
+                        thread_mmap.start_addr, thread_mmap.len,
+                        thread_mmap.pgoff, thread_mmap.name, event_id);
+      if (!ProcessRecord(&record)) {
+        return false;
+      }
+    }
+  }
+
+  // Dump threads.
+  for (auto& thread : thread_comms) {
+    if (thread.pid == thread.tid) {
+      continue;
+    }
+    if (!all_threads && dump_threads.find(thread.tid) == dump_threads.end()) {
+      continue;
+    }
+    ForkRecord fork_record(attr, thread.pid, thread.tid, thread.pid, thread.pid,
+                           event_id);
+    if (!ProcessRecord(&fork_record)) {
+      return false;
+    }
+    CommRecord comm_record(attr, thread.pid, thread.tid, thread.comm, event_id);
+    if (!ProcessRecord(&comm_record)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool RecordCommand::ProcessRecord(Record* record) {
+  UpdateRecordForEmbeddedElfPath(record);
+  thread_tree_.Update(*record);
+  CollectHitFileInfo(record);
+  if (unwind_dwarf_callchain_ && !post_unwind_) {
+    if (!UnwindRecord(record)) {
+      return false;
+    }
+  }
+  if (record->type() == PERF_RECORD_SAMPLE) {
+    sample_record_count_++;
+    if (dump_symbols_) {
+      auto& r = *static_cast<SampleRecord*>(record);
+      if (!DumpSymbolForRecord(r, false)) {
+        return false;
+      }
+      if (fp_callchain_sampling_) {
+        if (!DumpSymbolForRecord(r, true)) {
+          return false;
+        }
+      }
+    }
+  } else if (record->type() == PERF_RECORD_LOST) {
+    lost_record_count_ += static_cast<LostRecord*>(record)->lost;
+  }
+  bool result = record_file_writer_->WriteRecord(*record);
+  return result;
+}
+
+bool RecordCommand::DumpSymbolForRecord(const SampleRecord& r,
+                                        bool for_callchain) {
+  const ThreadEntry* thread =
+      thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+  uint64_t ip_nr = 1;
+  const uint64_t* ips = &r.ip_data.ip;
+  if (for_callchain) {
+    ip_nr = r.callchain_data.ip_nr;
+    ips = r.callchain_data.ips;
+  }
+  for (uint64_t i = 0; i < ip_nr; ++i) {
+    const MapEntry* map = thread_tree_.FindMap(thread, ips[i], r.InKernel());
+    const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], nullptr);
+    if (symbol == thread_tree_.UnknownSymbol()) {
+      continue;
+    }
+    if (!map->dso->HasDumped()) {
+      map->dso->SetDumped();
+      DsoRecord dso_record(map->dso->type(), map->dso->id(), map->dso->Path(),
+                           map->dso->MinVirtualAddress());
+      if (!record_file_writer_->WriteRecord(dso_record)) {
+        return false;
+      }
+    }
+    if (!symbol->HasDumped()) {
+      symbol->SetDumped();
+      SymbolRecord symbol_record(symbol->addr, symbol->len, symbol->Name(),
+                                 map->dso->id());
+      if (!record_file_writer_->WriteRecord(symbol_record)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+template <class RecordType>
+void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
+  RecordType& r = *record;
+  if (!r.InKernel() && r.data->pgoff != 0) {
+    // For the case of a shared library "foobar.so" embedded
+    // inside an APK, we rewrite the original MMAP from
+    // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
+    // so as to make the library name explicit. This update is
+    // done here (as part of the record operation) as opposed to
+    // on the host during the report, since we want to report
+    // the correct library name even if the the APK in question
+    // is not present on the host. The new offset W is
+    // calculated to be with respect to the start of foobar.so,
+    // not to the start of path.apk.
+    EmbeddedElf* ee =
+        ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff);
+    if (ee != nullptr) {
+      // Compute new offset relative to start of elf in APK.
+      auto data = *r.data;
+      data.pgoff -= ee->entry_offset();
+      r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name()));
+    }
+  }
+}
+
+void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
+  if (record->type() == PERF_RECORD_MMAP) {
+    UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
+  } else if (record->type() == PERF_RECORD_MMAP2) {
+    UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
+  }
+}
+
+bool RecordCommand::UnwindRecord(Record* record) {
+  if (record->type() == PERF_RECORD_SAMPLE) {
+    SampleRecord& r = *static_cast<SampleRecord*>(record);
+    if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+        (r.sample_type & PERF_SAMPLE_REGS_USER) &&
+        (r.regs_user_data.reg_mask != 0) &&
+        (r.sample_type & PERF_SAMPLE_STACK_USER) &&
+        (r.GetValidStackSize() > 0)) {
+      ThreadEntry* thread =
+          thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+      RegSet regs =
+          CreateRegSet(r.regs_user_data.reg_mask, r.regs_user_data.regs);
+      ArchType arch = GetArchForAbi(GetBuildArch(), r.regs_user_data.abi);
+      // Normally do strict arch check when unwinding stack. But allow unwinding
+      // 32-bit processes on 64-bit devices for system wide profiling.
+      bool strict_arch_check = !system_wide_collection_;
+      std::vector<uint64_t> unwind_ips =
+          UnwindCallChain(arch, *thread, regs, r.stack_user_data.data,
+                          r.GetValidStackSize(), strict_arch_check);
+      r.ReplaceRegAndStackWithCallChain(unwind_ips);
+      if (dump_symbols_) {
+        if (!DumpSymbolForRecord(r, true)) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
+  thread_tree_.ClearThreadAndMap();
+  std::unique_ptr<RecordFileReader> reader =
+      RecordFileReader::CreateInstance(record_filename_);
+  if (reader == nullptr) {
+    return false;
+  }
+  std::string tmp_filename = record_filename_ + ".tmp";
+  record_file_writer_ = CreateRecordFile(tmp_filename);
+  if (record_file_writer_ == nullptr) {
+    return false;
+  }
+  bool result = reader->ReadDataSection(
+      [this](std::unique_ptr<Record> record) {
+        thread_tree_.Update(*record);
+        if (!UnwindRecord(record.get())) {
+          return false;
+        }
+        return record_file_writer_->WriteRecord(*record);
+      },
+      false);
+  if (!result) {
+    return false;
+  }
+  if (!DumpAdditionalFeatures(args)) {
+    return false;
+  }
+  if (!record_file_writer_->Close()) {
+    return false;
+  }
+
+  if (unlink(record_filename_.c_str()) != 0) {
+    PLOG(ERROR) << "failed to remove " << record_filename_;
+    return false;
+  }
+  if (rename(tmp_filename.c_str(), record_filename_.c_str()) != 0) {
+    PLOG(ERROR) << "failed to rename " << tmp_filename << " to "
+                << record_filename_;
+    return false;
+  }
+  return true;
+}
+
+bool RecordCommand::DumpAdditionalFeatures(
+    const std::vector<std::string>& args) {
+  size_t feature_count = (branch_sampling_ != 0 ? 5 : 4);
+  if (!record_file_writer_->WriteFeatureHeader(feature_count)) {
+    return false;
+  }
+  if (!DumpBuildIdFeature()) {
+    return false;
+  }
+  utsname uname_buf;
+  if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
+    PLOG(ERROR) << "uname() failed";
+    return false;
+  }
+  if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
+                                               uname_buf.release)) {
+    return false;
+  }
+  if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
+                                               uname_buf.machine)) {
+    return false;
+  }
+
+  std::string exec_path = "simpleperf";
+  GetExecPath(&exec_path);
+  std::vector<std::string> cmdline;
+  cmdline.push_back(exec_path);
+  cmdline.push_back("record");
+  cmdline.insert(cmdline.end(), args.begin(), args.end());
+  if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
+    return false;
+  }
+  if (branch_sampling_ != 0 &&
+      !record_file_writer_->WriteBranchStackFeature()) {
+    return false;
+  }
+  return true;
+}
+
+bool RecordCommand::DumpBuildIdFeature() {
+  std::vector<BuildIdRecord> build_id_records;
+  BuildId build_id;
+  // Add build_ids for kernel/modules.
+  for (const auto& filename : hit_kernel_modules_) {
+    if (filename == DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID) {
+      if (!GetKernelBuildId(&build_id)) {
+        continue;
+      }
+      build_id_records.push_back(BuildIdRecord(
+          true, UINT_MAX, build_id, DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID));
+    } else {
+      std::string path = filename;
+      std::string module_name = basename(&path[0]);
+      if (android::base::EndsWith(module_name, ".ko")) {
+        module_name = module_name.substr(0, module_name.size() - 3);
+      }
+      if (!GetModuleBuildId(module_name, &build_id)) {
+        LOG(DEBUG) << "can't read build_id for module " << module_name;
+        continue;
+      }
+      build_id_records.push_back(
+          BuildIdRecord(true, UINT_MAX, build_id, filename));
+    }
+  }
+  // Add build_ids for user elf files.
+  for (const auto& filename : hit_user_files_) {
+    if (filename == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
+      continue;
+    }
+    auto tuple = SplitUrlInApk(filename);
+    if (std::get<0>(tuple)) {
+      ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple),
+                                               std::get<2>(tuple), &build_id);
+      if (result != ElfStatus::NO_ERROR) {
+        LOG(DEBUG) << "can't read build_id from file " << filename << ": "
+                   << result;
+        continue;
+      }
+    } else {
+      ElfStatus result = GetBuildIdFromElfFile(filename, &build_id);
+      if (result != ElfStatus::NO_ERROR) {
+        LOG(DEBUG) << "can't read build_id from file " << filename << ": "
+                   << result;
+        continue;
+      }
+    }
+    build_id_records.push_back(
+        BuildIdRecord(false, UINT_MAX, build_id, filename));
+  }
+  if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
+    return false;
+  }
+  return true;
+}
+
+void RecordCommand::CollectHitFileInfo(Record* record) {
+  if (record->type() == PERF_RECORD_SAMPLE) {
+    const auto& r = *static_cast<SampleRecord*>(record);
+    bool in_kernel = r.InKernel();
+    const ThreadEntry* thread =
+        thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+    const MapEntry* map = thread_tree_.FindMap(thread, r.ip_data.ip, in_kernel);
+    if (in_kernel) {
+      hit_kernel_modules_.insert(map->dso->Path());
+    } else {
+      hit_user_files_.insert(map->dso->Path());
+    }
+  }
+}
+
+bool RecordCommand::DetectCpuHotplugEvents() {
+  std::vector<int> new_cpus = GetOnlineCpus();
+  for (auto& cpu : online_cpus_) {
+    if (std::find(new_cpus.begin(), new_cpus.end(), cpu) == new_cpus.end()) {
+      LOG(INFO) << "Cpu " << cpu << " is offlined";
+    }
+  }
+  for (auto& cpu : new_cpus) {
+    if (std::find(online_cpus_.begin(), online_cpus_.end(), cpu) ==
+        online_cpus_.end()) {
+      LOG(INFO) << "Cpu " << cpu << " is onlined";
+    }
+  }
+  online_cpus_ = new_cpus;
+  return true;
+}
+
+void RegisterRecordCommand() {
+  RegisterCommand("record",
+                  [] { return std::unique_ptr<Command>(new RecordCommand()); });
+}
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
new file mode 100644
index 0000000..96098ae
--- /dev/null
+++ b/simpleperf/cmd_record_test.cpp
@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <android-base/stringprintf.h>
+#include <android-base/test_utils.h>
+
+#include <map>
+#include <memory>
+
+#include "command.h"
+#include "environment.h"
+#include "event_selection_set.h"
+#include "get_test_data.h"
+#include "record.h"
+#include "record_file.h"
+#include "test_util.h"
+
+using namespace PerfFileFormat;
+
+static std::unique_ptr<Command> RecordCmd() {
+  return CreateCommandInstance("record");
+}
+
+static bool RunRecordCmd(std::vector<std::string> v,
+                         const char* output_file = nullptr) {
+  std::unique_ptr<TemporaryFile> tmpfile;
+  std::string out_file;
+  if (output_file != nullptr) {
+    out_file = output_file;
+  } else {
+    tmpfile.reset(new TemporaryFile);
+    out_file = tmpfile->path;
+  }
+  v.insert(v.end(), {"-o", out_file, "sleep", SLEEP_SEC});
+  return RecordCmd()->Run(v);
+}
+
+TEST(record_cmd, no_options) { ASSERT_TRUE(RunRecordCmd({})); }
+
+TEST(record_cmd, system_wide_option) {
+  TEST_IN_ROOT(ASSERT_TRUE(RunRecordCmd({"-a"})));
+}
+
+TEST(record_cmd, sample_period_option) {
+  ASSERT_TRUE(RunRecordCmd({"-c", "100000"}));
+}
+
+TEST(record_cmd, event_option) {
+  ASSERT_TRUE(RunRecordCmd({"-e", "cpu-clock"}));
+}
+
+TEST(record_cmd, freq_option) {
+  ASSERT_TRUE(RunRecordCmd({"-f", "99"}));
+  ASSERT_TRUE(RunRecordCmd({"-F", "99"}));
+}
+
+TEST(record_cmd, output_file_option) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RecordCmd()->Run({"-o", tmpfile.path, "sleep", SLEEP_SEC}));
+}
+
+TEST(record_cmd, dump_kernel_mmap) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RunRecordCmd({}, tmpfile.path));
+  std::unique_ptr<RecordFileReader> reader =
+      RecordFileReader::CreateInstance(tmpfile.path);
+  ASSERT_TRUE(reader != nullptr);
+  std::vector<std::unique_ptr<Record>> records = reader->DataSection();
+  ASSERT_GT(records.size(), 0U);
+  bool have_kernel_mmap = false;
+  for (auto& record : records) {
+    if (record->type() == PERF_RECORD_MMAP) {
+      const MmapRecord* mmap_record =
+          static_cast<const MmapRecord*>(record.get());
+      if (strcmp(mmap_record->filename, DEFAULT_KERNEL_MMAP_NAME) == 0) {
+        have_kernel_mmap = true;
+        break;
+      }
+    }
+  }
+  ASSERT_TRUE(have_kernel_mmap);
+}
+
+TEST(record_cmd, dump_build_id_feature) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RunRecordCmd({}, tmpfile.path));
+  std::unique_ptr<RecordFileReader> reader =
+      RecordFileReader::CreateInstance(tmpfile.path);
+  ASSERT_TRUE(reader != nullptr);
+  const FileHeader& file_header = reader->FileHeader();
+  ASSERT_TRUE(file_header.features[FEAT_BUILD_ID / 8] &
+              (1 << (FEAT_BUILD_ID % 8)));
+  ASSERT_GT(reader->FeatureSectionDescriptors().size(), 0u);
+}
+
+TEST(record_cmd, tracepoint_event) {
+  TEST_IN_ROOT(ASSERT_TRUE(RunRecordCmd({"-a", "-e", "sched:sched_switch"})));
+}
+
+TEST(record_cmd, branch_sampling) {
+  if (IsBranchSamplingSupported()) {
+    ASSERT_TRUE(RunRecordCmd({"-b"}));
+    ASSERT_TRUE(RunRecordCmd({"-j", "any,any_call,any_ret,ind_call"}));
+    ASSERT_TRUE(RunRecordCmd({"-j", "any,k"}));
+    ASSERT_TRUE(RunRecordCmd({"-j", "any,u"}));
+    ASSERT_FALSE(RunRecordCmd({"-j", "u"}));
+  } else {
+    GTEST_LOG_(INFO) << "This test does nothing as branch stack sampling is "
+                        "not supported on this device.";
+  }
+}
+
+TEST(record_cmd, event_modifier) {
+  ASSERT_TRUE(RunRecordCmd({"-e", "cpu-cycles:u"}));
+}
+
+TEST(record_cmd, fp_callchain_sampling) {
+  ASSERT_TRUE(RunRecordCmd({"--call-graph", "fp"}));
+}
+
+TEST(record_cmd, system_wide_fp_callchain_sampling) {
+  TEST_IN_ROOT(ASSERT_TRUE(RunRecordCmd({"-a", "--call-graph", "fp"})));
+}
+
+TEST(record_cmd, dwarf_callchain_sampling) {
+  if (IsDwarfCallChainSamplingSupported()) {
+    ASSERT_TRUE(RunRecordCmd({"--call-graph", "dwarf"}));
+    ASSERT_TRUE(RunRecordCmd({"--call-graph", "dwarf,16384"}));
+    ASSERT_FALSE(RunRecordCmd({"--call-graph", "dwarf,65536"}));
+    ASSERT_TRUE(RunRecordCmd({"-g"}));
+  } else {
+    GTEST_LOG_(INFO) << "This test does nothing as dwarf callchain sampling is "
+                        "not supported on this device.";
+  }
+}
+
+TEST(record_cmd, system_wide_dwarf_callchain_sampling) {
+  if (IsDwarfCallChainSamplingSupported()) {
+    TEST_IN_ROOT(RunRecordCmd({"-a", "--call-graph", "dwarf"}));
+  } else {
+    GTEST_LOG_(INFO) << "This test does nothing as dwarf callchain sampling is "
+                        "not supported on this device.";
+  }
+}
+
+TEST(record_cmd, no_unwind_option) {
+  if (IsDwarfCallChainSamplingSupported()) {
+    ASSERT_TRUE(RunRecordCmd({"--call-graph", "dwarf", "--no-unwind"}));
+  } else {
+    GTEST_LOG_(INFO) << "This test does nothing as dwarf callchain sampling is "
+                        "not supported on this device.";
+  }
+  ASSERT_FALSE(RunRecordCmd({"--no-unwind"}));
+}
+
+TEST(record_cmd, post_unwind_option) {
+  if (IsDwarfCallChainSamplingSupported()) {
+    ASSERT_TRUE(RunRecordCmd({"--call-graph", "dwarf", "--post-unwind"}));
+  } else {
+    GTEST_LOG_(INFO) << "This test does nothing as dwarf callchain sampling is "
+                        "not supported on this device.";
+  }
+  ASSERT_FALSE(RunRecordCmd({"--post-unwind"}));
+  ASSERT_FALSE(
+      RunRecordCmd({"--call-graph", "dwarf", "--no-unwind", "--post-unwind"}));
+}
+
+TEST(record_cmd, existing_processes) {
+  std::vector<std::unique_ptr<Workload>> workloads;
+  CreateProcesses(2, &workloads);
+  std::string pid_list = android::base::StringPrintf(
+      "%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RecordCmd()->Run({"-p", pid_list, "-o", tmpfile.path}));
+}
+
+TEST(record_cmd, existing_threads) {
+  std::vector<std::unique_ptr<Workload>> workloads;
+  CreateProcesses(2, &workloads);
+  // Process id can also be used as thread id in linux.
+  std::string tid_list = android::base::StringPrintf(
+      "%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RecordCmd()->Run({"-t", tid_list, "-o", tmpfile.path}));
+}
+
+TEST(record_cmd, no_monitored_threads) { ASSERT_FALSE(RecordCmd()->Run({""})); }
+
+TEST(record_cmd, more_than_one_event_types) {
+  ASSERT_TRUE(RunRecordCmd({"-e", "cpu-cycles,cpu-clock"}));
+  ASSERT_TRUE(RunRecordCmd({"-e", "cpu-cycles", "-e", "cpu-clock"}));
+}
+
+TEST(record_cmd, mmap_page_option) {
+  ASSERT_TRUE(RunRecordCmd({"-m", "1"}));
+  ASSERT_FALSE(RunRecordCmd({"-m", "0"}));
+  ASSERT_FALSE(RunRecordCmd({"-m", "7"}));
+}
+
+static void CheckKernelSymbol(const std::string& path, bool need_kallsyms,
+                              bool* success) {
+  *success = false;
+  std::unique_ptr<RecordFileReader> reader =
+      RecordFileReader::CreateInstance(path);
+  ASSERT_TRUE(reader != nullptr);
+  std::vector<std::unique_ptr<Record>> records = reader->DataSection();
+  bool has_kernel_symbol_records = false;
+  for (const auto& record : records) {
+    if (record->type() == SIMPLE_PERF_RECORD_KERNEL_SYMBOL) {
+      has_kernel_symbol_records = true;
+    }
+  }
+  ASSERT_EQ(need_kallsyms, has_kernel_symbol_records);
+  *success = true;
+}
+
+TEST(record_cmd, kernel_symbol) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RunRecordCmd({}, tmpfile.path));
+  bool success;
+  CheckKernelSymbol(tmpfile.path, true, &success);
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(RunRecordCmd({"--no-dump-kernel-symbols"}, tmpfile.path));
+  CheckKernelSymbol(tmpfile.path, false, &success);
+  ASSERT_TRUE(success);
+}
+
+// Check if the dso/symbol records in perf.data matches our expectation.
+static void CheckDsoSymbolRecords(const std::string& path,
+                                  bool can_have_dso_symbol_records,
+                                  bool* success) {
+  *success = false;
+  std::unique_ptr<RecordFileReader> reader =
+      RecordFileReader::CreateInstance(path);
+  ASSERT_TRUE(reader != nullptr);
+  std::vector<std::unique_ptr<Record>> records = reader->DataSection();
+  bool has_dso_record = false;
+  bool has_symbol_record = false;
+  std::map<uint64_t, bool> dso_hit_map;
+  for (const auto& record : records) {
+    if (record->type() == SIMPLE_PERF_RECORD_DSO) {
+      has_dso_record = true;
+      uint64_t dso_id = static_cast<const DsoRecord*>(record.get())->dso_id;
+      ASSERT_EQ(dso_hit_map.end(), dso_hit_map.find(dso_id));
+      dso_hit_map.insert(std::make_pair(dso_id, false));
+    } else if (record->type() == SIMPLE_PERF_RECORD_SYMBOL) {
+      has_symbol_record = true;
+      uint64_t dso_id = static_cast<const SymbolRecord*>(record.get())->dso_id;
+      auto it = dso_hit_map.find(dso_id);
+      ASSERT_NE(dso_hit_map.end(), it);
+      it->second = true;
+    }
+  }
+  if (can_have_dso_symbol_records) {
+    // It is possible that there are no samples hitting functions having symbol.
+    // In that case, there are no dso/symbol records.
+    ASSERT_EQ(has_dso_record, has_symbol_record);
+    for (auto& pair : dso_hit_map) {
+      ASSERT_TRUE(pair.second);
+    }
+  } else {
+    ASSERT_FALSE(has_dso_record);
+    ASSERT_FALSE(has_symbol_record);
+  }
+  *success = true;
+}
+
+TEST(record_cmd, dump_symbols) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RunRecordCmd({}, tmpfile.path));
+  bool success;
+  CheckDsoSymbolRecords(tmpfile.path, false, &success);
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(RunRecordCmd({"--dump-symbols"}, tmpfile.path));
+  CheckDsoSymbolRecords(tmpfile.path, true, &success);
+  ASSERT_TRUE(success);
+  if (IsDwarfCallChainSamplingSupported()) {
+    ASSERT_TRUE(RunRecordCmd({"-g"}, tmpfile.path));
+    bool success;
+    CheckDsoSymbolRecords(tmpfile.path, false, &success);
+    ASSERT_TRUE(success);
+    ASSERT_TRUE(RunRecordCmd({"-g", "--dump-symbols"}, tmpfile.path));
+    CheckDsoSymbolRecords(tmpfile.path, true, &success);
+    ASSERT_TRUE(success);
+  }
+}
+
+TEST(record_cmd, group_option) {
+  ASSERT_TRUE(RunRecordCmd({"--group", "cpu-cycles,cpu-clock", "-m", "16"}));
+  ASSERT_TRUE(RunRecordCmd({"--group", "cpu-cycles,cpu-clock", "--group",
+                            "cpu-cycles:u,cpu-clock:u", "--group",
+                            "cpu-cycles:k,cpu-clock:k", "-m", "16"}));
+}
+
+TEST(record_cmd, symfs_option) { ASSERT_TRUE(RunRecordCmd({"--symfs", "/"})); }
+
+TEST(record_cmd, duration_option) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RecordCmd()->Run({"--duration", "1.2", "-p",
+                                std::to_string(getpid()), "-o", tmpfile.path}));
+  ASSERT_TRUE(
+      RecordCmd()->Run({"--duration", "1", "-o", tmpfile.path, "sleep", "2"}));
+}
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
new file mode 100644
index 0000000..c2f95d2
--- /dev/null
+++ b/simpleperf/cmd_report.cpp
@@ -0,0 +1,746 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include <android-base/logging.h>
+#include <android-base/parseint.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
+
+#include "command.h"
+#include "dwarf_unwind.h"
+#include "environment.h"
+#include "event_attr.h"
+#include "event_type.h"
+#include "perf_regs.h"
+#include "record.h"
+#include "record_file.h"
+#include "sample_tree.h"
+#include "thread_tree.h"
+#include "tracing.h"
+#include "utils.h"
+
+namespace {
+
+static std::set<std::string> branch_sort_keys = {
+    "dso_from", "dso_to", "symbol_from", "symbol_to",
+};
+struct BranchFromEntry {
+  const MapEntry* map;
+  const Symbol* symbol;
+  uint64_t vaddr_in_file;
+  uint64_t flags;
+
+  BranchFromEntry()
+      : map(nullptr), symbol(nullptr), vaddr_in_file(0), flags(0) {}
+};
+
+struct SampleEntry {
+  uint64_t time;
+  uint64_t period;
+  // accumuated when appearing in other sample's callchain
+  uint64_t accumulated_period;
+  uint64_t sample_count;
+  const ThreadEntry* thread;
+  const char* thread_comm;
+  const MapEntry* map;
+  const Symbol* symbol;
+  uint64_t vaddr_in_file;
+  BranchFromEntry branch_from;
+  // a callchain tree representing all callchains in the sample
+  CallChainRoot<SampleEntry> callchain;
+
+  SampleEntry(uint64_t time, uint64_t period, uint64_t accumulated_period,
+              uint64_t sample_count, const ThreadEntry* thread,
+              const MapEntry* map, const Symbol* symbol, uint64_t vaddr_in_file)
+      : time(time),
+        period(period),
+        accumulated_period(accumulated_period),
+        sample_count(sample_count),
+        thread(thread),
+        thread_comm(thread->comm),
+        map(map),
+        symbol(symbol),
+        vaddr_in_file(vaddr_in_file) {}
+
+  // The data member 'callchain' can only move, not copy.
+  SampleEntry(SampleEntry&&) = default;
+  SampleEntry(SampleEntry&) = delete;
+};
+
+struct SampleTree {
+  std::vector<SampleEntry*> samples;
+  uint64_t total_samples;
+  uint64_t total_period;
+};
+
+BUILD_COMPARE_VALUE_FUNCTION(CompareVaddrInFile, vaddr_in_file);
+BUILD_DISPLAY_HEX64_FUNCTION(DisplayVaddrInFile, vaddr_in_file);
+
+class ReportCmdSampleTreeBuilder
+    : public SampleTreeBuilder<SampleEntry, uint64_t> {
+ public:
+  ReportCmdSampleTreeBuilder(SampleComparator<SampleEntry> sample_comparator,
+                             ThreadTree* thread_tree)
+      : SampleTreeBuilder(sample_comparator),
+        thread_tree_(thread_tree),
+        total_samples_(0),
+        total_period_(0) {}
+
+  void SetFilters(const std::unordered_set<int>& pid_filter,
+                  const std::unordered_set<int>& tid_filter,
+                  const std::unordered_set<std::string>& comm_filter,
+                  const std::unordered_set<std::string>& dso_filter,
+                  const std::unordered_set<std::string>& symbol_filter) {
+    pid_filter_ = pid_filter;
+    tid_filter_ = tid_filter;
+    comm_filter_ = comm_filter;
+    dso_filter_ = dso_filter;
+    symbol_filter_ = symbol_filter;
+  }
+
+  SampleTree GetSampleTree() const {
+    SampleTree sample_tree;
+    sample_tree.samples = GetSamples();
+    sample_tree.total_samples = total_samples_;
+    sample_tree.total_period = total_period_;
+    return sample_tree;
+  }
+
+ protected:
+  SampleEntry* CreateSample(const SampleRecord& r, bool in_kernel,
+                            uint64_t* acc_info) override {
+    const ThreadEntry* thread =
+        thread_tree_->FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+    const MapEntry* map =
+        thread_tree_->FindMap(thread, r.ip_data.ip, in_kernel);
+    uint64_t vaddr_in_file;
+    const Symbol* symbol =
+        thread_tree_->FindSymbol(map, r.ip_data.ip, &vaddr_in_file);
+    *acc_info = r.period_data.period;
+    return InsertSample(std::unique_ptr<SampleEntry>(
+        new SampleEntry(r.time_data.time, r.period_data.period, 0, 1, thread,
+                        map, symbol, vaddr_in_file)));
+  }
+
+  SampleEntry* CreateBranchSample(const SampleRecord& r,
+                                  const BranchStackItemType& item) override {
+    const ThreadEntry* thread =
+        thread_tree_->FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+    const MapEntry* from_map = thread_tree_->FindMap(thread, item.from);
+    uint64_t from_vaddr_in_file;
+    const Symbol* from_symbol =
+        thread_tree_->FindSymbol(from_map, item.from, &from_vaddr_in_file);
+    const MapEntry* to_map = thread_tree_->FindMap(thread, item.to);
+    uint64_t to_vaddr_in_file;
+    const Symbol* to_symbol =
+        thread_tree_->FindSymbol(to_map, item.to, &to_vaddr_in_file);
+    std::unique_ptr<SampleEntry> sample(
+        new SampleEntry(r.time_data.time, r.period_data.period, 0, 1, thread,
+                        to_map, to_symbol, to_vaddr_in_file));
+    sample->branch_from.map = from_map;
+    sample->branch_from.symbol = from_symbol;
+    sample->branch_from.vaddr_in_file = from_vaddr_in_file;
+    sample->branch_from.flags = item.flags;
+    return InsertSample(std::move(sample));
+  }
+
+  SampleEntry* CreateCallChainSample(const SampleEntry* sample, uint64_t ip,
+                                     bool in_kernel,
+                                     const std::vector<SampleEntry*>& callchain,
+                                     const uint64_t& acc_info) override {
+    const ThreadEntry* thread = sample->thread;
+    const MapEntry* map = thread_tree_->FindMap(thread, ip, in_kernel);
+    uint64_t vaddr_in_file;
+    const Symbol* symbol = thread_tree_->FindSymbol(map, ip, &vaddr_in_file);
+    std::unique_ptr<SampleEntry> callchain_sample(new SampleEntry(
+        sample->time, 0, acc_info, 0, thread, map, symbol, vaddr_in_file));
+    return InsertCallChainSample(std::move(callchain_sample), callchain);
+  }
+
+  const ThreadEntry* GetThreadOfSample(SampleEntry* sample) override {
+    return sample->thread;
+  }
+
+  uint64_t GetPeriodForCallChain(const uint64_t& acc_info) override {
+    return acc_info;
+  }
+
+  bool FilterSample(const SampleEntry* sample) override {
+    if (!pid_filter_.empty() &&
+        pid_filter_.find(sample->thread->pid) == pid_filter_.end()) {
+      return false;
+    }
+    if (!tid_filter_.empty() &&
+        tid_filter_.find(sample->thread->tid) == tid_filter_.end()) {
+      return false;
+    }
+    if (!comm_filter_.empty() &&
+        comm_filter_.find(sample->thread_comm) == comm_filter_.end()) {
+      return false;
+    }
+    if (!dso_filter_.empty() &&
+        dso_filter_.find(sample->map->dso->Path()) == dso_filter_.end()) {
+      return false;
+    }
+    if (!symbol_filter_.empty() &&
+        symbol_filter_.find(sample->symbol->DemangledName()) ==
+            symbol_filter_.end()) {
+      return false;
+    }
+    return true;
+  }
+
+  void UpdateSummary(const SampleEntry* sample) override {
+    total_samples_ += sample->sample_count;
+    total_period_ += sample->period;
+  }
+
+  void MergeSample(SampleEntry* sample1, SampleEntry* sample2) override {
+    sample1->period += sample2->period;
+    sample1->accumulated_period += sample2->accumulated_period;
+    sample1->sample_count += sample2->sample_count;
+  }
+
+ private:
+  ThreadTree* thread_tree_;
+
+  std::unordered_set<int> pid_filter_;
+  std::unordered_set<int> tid_filter_;
+  std::unordered_set<std::string> comm_filter_;
+  std::unordered_set<std::string> dso_filter_;
+  std::unordered_set<std::string> symbol_filter_;
+
+  uint64_t total_samples_;
+  uint64_t total_period_;
+};
+
+using ReportCmdSampleTreeSorter = SampleTreeSorter<SampleEntry>;
+using ReportCmdSampleTreeDisplayer =
+    SampleTreeDisplayer<SampleEntry, SampleTree>;
+
+using ReportCmdCallgraphDisplayer =
+    CallgraphDisplayer<SampleEntry, CallChainNode<SampleEntry>>;
+
+class ReportCmdCallgraphDisplayerWithVaddrInFile
+    : public ReportCmdCallgraphDisplayer {
+ protected:
+  std::string PrintSampleName(const SampleEntry* sample) override {
+    return android::base::StringPrintf("%s [+0x%" PRIx64 "]",
+                                       sample->symbol->DemangledName(),
+                                       sample->vaddr_in_file);
+  }
+};
+
+struct EventAttrWithName {
+  perf_event_attr attr;
+  std::string name;
+  std::vector<uint64_t> event_ids;
+};
+
+class ReportCommand : public Command {
+ public:
+  ReportCommand()
+      : Command(
+            "report", "report sampling information in perf.data",
+            // clang-format off
+"Usage: simpleperf report [options]\n"
+"-b    Use the branch-to addresses in sampled take branches instead of the\n"
+"      instruction addresses. Only valid for perf.data recorded with -b/-j\n"
+"      option.\n"
+"--children    Print the overhead accumulated by appearing in the callchain.\n"
+"--comms comm1,comm2,...   Report only for selected comms.\n"
+"--dsos dso1,dso2,...      Report only for selected dsos.\n"
+"-g [callee|caller]    Print call graph. If callee mode is used, the graph\n"
+"                      shows how functions are called from others. Otherwise,\n"
+"                      the graph shows how functions call others.\n"
+"                      Default is caller mode.\n"
+"-i <file>  Specify path of record file, default is perf.data.\n"
+"-n         Print the sample count for each item.\n"
+"--no-demangle         Don't demangle symbol names.\n"
+"--no-show-ip          Don't show vaddr in file for unknown symbols.\n"
+"-o report_file_name   Set report file name, default is stdout.\n"
+"--pids pid1,pid2,...  Report only for selected pids.\n"
+"--sort key1,key2,...  Select keys used to sort and print the report. The\n"
+"                      appearance order of keys decides the order of keys used\n"
+"                      to sort and print the report.\n"
+"                      Possible keys include:\n"
+"                        pid             -- process id\n"
+"                        tid             -- thread id\n"
+"                        comm            -- thread name (can be changed during\n"
+"                                           the lifetime of a thread)\n"
+"                        dso             -- shared library\n"
+"                        symbol          -- function name in the shared library\n"
+"                        vaddr_in_file   -- virtual address in the shared\n"
+"                                           library\n"
+"                      Keys can only be used with -b option:\n"
+"                        dso_from        -- shared library branched from\n"
+"                        dso_to          -- shared library branched to\n"
+"                        symbol_from     -- name of function branched from\n"
+"                        symbol_to       -- name of function branched to\n"
+"                      The default sort keys are:\n"
+"                        comm,pid,tid,dso,symbol\n"
+"--symbols symbol1;symbol2;...    Report only for selected symbols.\n"
+"--symfs <dir>         Look for files with symbols relative to this directory.\n"
+"--tids tid1,tid2,...  Report only for selected tids.\n"
+"--vmlinux <file>      Parse kernel symbols from <file>.\n"
+            // clang-format on
+            ),
+        record_filename_("perf.data"),
+        record_file_arch_(GetBuildArch()),
+        use_branch_address_(false),
+        system_wide_collection_(false),
+        accumulate_callchain_(false),
+        print_callgraph_(false),
+        callgraph_show_callee_(false) {}
+
+  bool Run(const std::vector<std::string>& args);
+
+ private:
+  bool ParseOptions(const std::vector<std::string>& args);
+  bool ReadEventAttrFromRecordFile();
+  bool ReadFeaturesFromRecordFile();
+  bool ReadSampleTreeFromRecordFile();
+  bool ProcessRecord(std::unique_ptr<Record> record);
+  bool ProcessTracingData(const std::vector<char>& data);
+  bool PrintReport();
+  void PrintReportContext(FILE* fp);
+
+  std::string record_filename_;
+  ArchType record_file_arch_;
+  std::unique_ptr<RecordFileReader> record_file_reader_;
+  std::vector<EventAttrWithName> event_attrs_;
+  ThreadTree thread_tree_;
+  SampleTree sample_tree_;
+  std::unique_ptr<ReportCmdSampleTreeBuilder> sample_tree_builder_;
+  std::unique_ptr<ReportCmdSampleTreeSorter> sample_tree_sorter_;
+  std::unique_ptr<ReportCmdSampleTreeDisplayer> sample_tree_displayer_;
+  bool use_branch_address_;
+  std::string record_cmdline_;
+  bool system_wide_collection_;
+  bool accumulate_callchain_;
+  bool print_callgraph_;
+  bool callgraph_show_callee_;
+
+  std::string report_filename_;
+};
+
+bool ReportCommand::Run(const std::vector<std::string>& args) {
+  // 1. Parse options.
+  if (!ParseOptions(args)) {
+    return false;
+  }
+
+  // 2. Read record file and build SampleTree.
+  record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
+  if (record_file_reader_ == nullptr) {
+    return false;
+  }
+  if (!ReadEventAttrFromRecordFile()) {
+    return false;
+  }
+  // Read features first to prepare build ids used when building SampleTree.
+  if (!ReadFeaturesFromRecordFile()) {
+    return false;
+  }
+  ScopedCurrentArch scoped_arch(record_file_arch_);
+  if (!ReadSampleTreeFromRecordFile()) {
+    return false;
+  }
+
+  // 3. Show collected information.
+  if (!PrintReport()) {
+    return false;
+  }
+
+  return true;
+}
+
+bool ReportCommand::ParseOptions(const std::vector<std::string>& args) {
+  bool demangle = true;
+  bool show_ip_for_unknown_symbol = true;
+  std::string symfs_dir;
+  std::string vmlinux;
+  bool print_sample_count = false;
+  std::vector<std::string> sort_keys = {"comm", "pid", "tid", "dso", "symbol"};
+  std::unordered_set<std::string> comm_filter;
+  std::unordered_set<std::string> dso_filter;
+  std::unordered_set<std::string> symbol_filter;
+  std::unordered_set<int> pid_filter;
+  std::unordered_set<int> tid_filter;
+
+  for (size_t i = 0; i < args.size(); ++i) {
+    if (args[i] == "-b") {
+      use_branch_address_ = true;
+    } else if (args[i] == "--children") {
+      accumulate_callchain_ = true;
+    } else if (args[i] == "--comms" || args[i] == "--dsos") {
+      std::unordered_set<std::string>& filter =
+          (args[i] == "--comms" ? comm_filter : dso_filter);
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> strs = android::base::Split(args[i], ",");
+      filter.insert(strs.begin(), strs.end());
+
+    } else if (args[i] == "-g") {
+      print_callgraph_ = true;
+      accumulate_callchain_ = true;
+      if (i + 1 < args.size() && args[i + 1][0] != '-') {
+        ++i;
+        if (args[i] == "callee") {
+          callgraph_show_callee_ = true;
+        } else if (args[i] == "caller") {
+          callgraph_show_callee_ = false;
+        } else {
+          LOG(ERROR) << "Unknown argument with -g option: " << args[i];
+          return false;
+        }
+      }
+    } else if (args[i] == "-i") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      record_filename_ = args[i];
+
+    } else if (args[i] == "-n") {
+      print_sample_count = true;
+
+    } else if (args[i] == "--no-demangle") {
+      demangle = false;
+    } else if (args[i] == "--no-show-ip") {
+      show_ip_for_unknown_symbol = false;
+    } else if (args[i] == "-o") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      report_filename_ = args[i];
+
+    } else if (args[i] == "--pids" || args[i] == "--tids") {
+      const std::string& option = args[i];
+      std::unordered_set<int>& filter =
+          (option == "--pids" ? pid_filter : tid_filter);
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> strs = android::base::Split(args[i], ",");
+      for (const auto& s : strs) {
+        int id;
+        if (!android::base::ParseInt(s.c_str(), &id, 0)) {
+          LOG(ERROR) << "invalid id in " << option << " option: " << s;
+          return false;
+        }
+        filter.insert(id);
+      }
+
+    } else if (args[i] == "--sort") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      sort_keys = android::base::Split(args[i], ",");
+    } else if (args[i] == "--symbols") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> strs = android::base::Split(args[i], ";");
+      symbol_filter.insert(strs.begin(), strs.end());
+    } else if (args[i] == "--symfs") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      symfs_dir = args[i];
+
+    } else if (args[i] == "--vmlinux") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      vmlinux = args[i];
+    } else {
+      ReportUnknownOption(args, i);
+      return false;
+    }
+  }
+
+  Dso::SetDemangle(demangle);
+  if (!Dso::SetSymFsDir(symfs_dir)) {
+    return false;
+  }
+  if (!vmlinux.empty()) {
+    Dso::SetVmlinux(vmlinux);
+  }
+
+  if (show_ip_for_unknown_symbol) {
+    thread_tree_.ShowIpForUnknownSymbol();
+  }
+
+  SampleDisplayer<SampleEntry, SampleTree> displayer;
+  SampleComparator<SampleEntry> comparator;
+
+  if (accumulate_callchain_) {
+    displayer.AddDisplayFunction("Children", DisplayAccumulatedOverhead);
+    displayer.AddDisplayFunction("Self", DisplaySelfOverhead);
+  } else {
+    displayer.AddDisplayFunction("Overhead", DisplaySelfOverhead);
+  }
+  if (print_sample_count) {
+    displayer.AddDisplayFunction("Sample", DisplaySampleCount);
+  }
+
+  for (auto& key : sort_keys) {
+    if (!use_branch_address_ &&
+        branch_sort_keys.find(key) != branch_sort_keys.end()) {
+      LOG(ERROR) << "sort key '" << key << "' can only be used with -b option.";
+      return false;
+    }
+    if (key == "pid") {
+      comparator.AddCompareFunction(ComparePid);
+      displayer.AddDisplayFunction("Pid", DisplayPid);
+    } else if (key == "tid") {
+      comparator.AddCompareFunction(CompareTid);
+      displayer.AddDisplayFunction("Tid", DisplayTid);
+    } else if (key == "comm") {
+      comparator.AddCompareFunction(CompareComm);
+      displayer.AddDisplayFunction("Command", DisplayComm);
+    } else if (key == "dso") {
+      comparator.AddCompareFunction(CompareDso);
+      displayer.AddDisplayFunction("Shared Object", DisplayDso);
+    } else if (key == "symbol") {
+      comparator.AddCompareFunction(CompareSymbol);
+      displayer.AddDisplayFunction("Symbol", DisplaySymbol);
+    } else if (key == "vaddr_in_file") {
+      comparator.AddCompareFunction(CompareVaddrInFile);
+      displayer.AddDisplayFunction("VaddrInFile", DisplayVaddrInFile);
+    } else if (key == "dso_from") {
+      comparator.AddCompareFunction(CompareDsoFrom);
+      displayer.AddDisplayFunction("Source Shared Object", DisplayDsoFrom);
+    } else if (key == "dso_to") {
+      comparator.AddCompareFunction(CompareDso);
+      displayer.AddDisplayFunction("Target Shared Object", DisplayDso);
+    } else if (key == "symbol_from") {
+      comparator.AddCompareFunction(CompareSymbolFrom);
+      displayer.AddDisplayFunction("Source Symbol", DisplaySymbolFrom);
+    } else if (key == "symbol_to") {
+      comparator.AddCompareFunction(CompareSymbol);
+      displayer.AddDisplayFunction("Target Symbol", DisplaySymbol);
+    } else {
+      LOG(ERROR) << "Unknown sort key: " << key;
+      return false;
+    }
+  }
+  if (print_callgraph_) {
+    bool has_symbol_key = false;
+    bool has_vaddr_in_file_key = false;
+    for (const auto& key : sort_keys) {
+      if (key == "symbol") {
+        has_symbol_key = true;
+      } else if (key == "vaddr_in_file") {
+        has_vaddr_in_file_key = true;
+      }
+    }
+    if (has_symbol_key) {
+      if (has_vaddr_in_file_key) {
+        displayer.AddExclusiveDisplayFunction(
+            ReportCmdCallgraphDisplayerWithVaddrInFile());
+      } else {
+        displayer.AddExclusiveDisplayFunction(ReportCmdCallgraphDisplayer());
+      }
+    }
+  }
+
+  sample_tree_builder_.reset(
+      new ReportCmdSampleTreeBuilder(comparator, &thread_tree_));
+  sample_tree_builder_->SetFilters(pid_filter, tid_filter, comm_filter,
+                                   dso_filter, symbol_filter);
+
+  SampleComparator<SampleEntry> sort_comparator;
+  sort_comparator.AddCompareFunction(CompareTotalPeriod);
+  sort_comparator.AddComparator(comparator);
+  sample_tree_sorter_.reset(new ReportCmdSampleTreeSorter(sort_comparator));
+  sample_tree_displayer_.reset(new ReportCmdSampleTreeDisplayer(displayer));
+  return true;
+}
+
+bool ReportCommand::ReadEventAttrFromRecordFile() {
+  std::vector<AttrWithId> attrs = record_file_reader_->AttrSection();
+  for (const auto& attr_with_id : attrs) {
+    EventAttrWithName attr;
+    attr.attr = *attr_with_id.attr;
+    attr.event_ids = attr_with_id.ids;
+    attr.name = GetEventNameByAttr(attr.attr);
+    event_attrs_.push_back(attr);
+  }
+  if (use_branch_address_) {
+    bool has_branch_stack = true;
+    for (const auto& attr : event_attrs_) {
+      if ((attr.attr.sample_type & PERF_SAMPLE_BRANCH_STACK) == 0) {
+        has_branch_stack = false;
+        break;
+      }
+    }
+    if (!has_branch_stack) {
+      LOG(ERROR) << record_filename_
+                 << " is not recorded with branch stack sampling option.";
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ReportCommand::ReadFeaturesFromRecordFile() {
+  std::vector<BuildIdRecord> records =
+      record_file_reader_->ReadBuildIdFeature();
+  std::vector<std::pair<std::string, BuildId>> build_ids;
+  for (auto& r : records) {
+    build_ids.push_back(std::make_pair(r.filename, r.build_id));
+  }
+  Dso::SetBuildIds(build_ids);
+
+  std::string arch =
+      record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
+  if (!arch.empty()) {
+    record_file_arch_ = GetArchType(arch);
+    if (record_file_arch_ == ARCH_UNSUPPORTED) {
+      return false;
+    }
+  }
+
+  std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
+  if (!cmdline.empty()) {
+    record_cmdline_ = android::base::Join(cmdline, ' ');
+    // TODO: the code to detect system wide collection option is fragile, remove
+    // it once we can do cross unwinding.
+    for (size_t i = 0; i < cmdline.size(); i++) {
+      std::string& s = cmdline[i];
+      if (s == "-a") {
+        system_wide_collection_ = true;
+        break;
+      } else if (s == "--call-graph" || s == "--cpu" || s == "-e" ||
+                 s == "-f" || s == "-F" || s == "-j" || s == "-m" ||
+                 s == "-o" || s == "-p" || s == "-t") {
+        i++;
+      } else if (!s.empty() && s[0] != '-') {
+        break;
+      }
+    }
+  }
+  if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
+    std::vector<char> tracing_data;
+    if (!record_file_reader_->ReadFeatureSection(
+            PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) {
+      return false;
+    }
+    if (!ProcessTracingData(tracing_data)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ReportCommand::ReadSampleTreeFromRecordFile() {
+  thread_tree_.AddThread(0, 0, "swapper");
+  sample_tree_builder_->SetBranchSampleOption(use_branch_address_);
+  // Normally do strict arch check when unwinding stack. But allow unwinding
+  // 32-bit processes on 64-bit devices for system wide profiling.
+  bool strict_unwind_arch_check = !system_wide_collection_;
+  sample_tree_builder_->SetCallChainSampleOptions(
+      accumulate_callchain_, print_callgraph_, !callgraph_show_callee_,
+      strict_unwind_arch_check);
+  if (!record_file_reader_->ReadDataSection(
+          [this](std::unique_ptr<Record> record) {
+            return ProcessRecord(std::move(record));
+          })) {
+    return false;
+  }
+  sample_tree_ = sample_tree_builder_->GetSampleTree();
+  sample_tree_sorter_->Sort(sample_tree_.samples, print_callgraph_);
+  return true;
+}
+
+bool ReportCommand::ProcessRecord(std::unique_ptr<Record> record) {
+  thread_tree_.Update(*record);
+  if (record->type() == PERF_RECORD_SAMPLE) {
+    sample_tree_builder_->ProcessSampleRecord(
+        *static_cast<const SampleRecord*>(record.get()));
+  } else if (record->type() == PERF_RECORD_TRACING_DATA) {
+    const auto& r = *static_cast<TracingDataRecord*>(record.get());
+    if (!ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ReportCommand::ProcessTracingData(const std::vector<char>& data) {
+  Tracing tracing(data);
+  for (auto& attr : event_attrs_) {
+    if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
+      uint64_t trace_event_id = attr.attr.config;
+      attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
+    }
+  }
+  return true;
+}
+
+bool ReportCommand::PrintReport() {
+  std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
+  FILE* report_fp = stdout;
+  if (!report_filename_.empty()) {
+    report_fp = fopen(report_filename_.c_str(), "w");
+    if (report_fp == nullptr) {
+      PLOG(ERROR) << "failed to open file " << report_filename_;
+      return false;
+    }
+    file_handler.reset(report_fp);
+  }
+  PrintReportContext(report_fp);
+  sample_tree_displayer_->DisplaySamples(report_fp, sample_tree_.samples,
+                                         &sample_tree_);
+  fflush(report_fp);
+  if (ferror(report_fp) != 0) {
+    PLOG(ERROR) << "print report failed";
+    return false;
+  }
+  return true;
+}
+
+void ReportCommand::PrintReportContext(FILE* report_fp) {
+  if (!record_cmdline_.empty()) {
+    fprintf(report_fp, "Cmdline: %s\n", record_cmdline_.c_str());
+  }
+  fprintf(report_fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
+  for (const auto& attr : event_attrs_) {
+    fprintf(report_fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(),
+            attr.attr.type, attr.attr.config);
+  }
+  fprintf(report_fp, "Samples: %" PRIu64 "\n", sample_tree_.total_samples);
+  fprintf(report_fp, "Event count: %" PRIu64 "\n\n", sample_tree_.total_period);
+}
+
+}  // namespace
+
+void RegisterReportCommand() {
+  RegisterCommand("report",
+                  [] { return std::unique_ptr<Command>(new ReportCommand()); });
+}
diff --git a/simpleperf/cmd_report_sample.cpp b/simpleperf/cmd_report_sample.cpp
new file mode 100644
index 0000000..5a7599a
--- /dev/null
+++ b/simpleperf/cmd_report_sample.cpp
@@ -0,0 +1,419 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+
+#include <memory>
+
+#include "system/extras/simpleperf/report_sample.pb.h"
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+#include "command.h"
+#include "record_file.h"
+#include "thread_tree.h"
+#include "utils.h"
+
+namespace proto = simpleperf_report_proto;
+
+namespace {
+
+class ProtobufFileWriter : public google::protobuf::io::CopyingOutputStream {
+ public:
+  explicit ProtobufFileWriter(FILE* out_fp) : out_fp_(out_fp) {}
+
+  bool Write(const void* buffer, int size) override {
+    return fwrite(buffer, size, 1, out_fp_) == 1;
+  }
+
+ private:
+  FILE* out_fp_;
+};
+
+class ProtobufFileReader : public google::protobuf::io::CopyingInputStream {
+ public:
+  explicit ProtobufFileReader(FILE* in_fp) : in_fp_(in_fp) {}
+
+  int Read(void* buffer, int size) override {
+    return fread(buffer, 1, size, in_fp_);
+  }
+
+ private:
+  FILE* in_fp_;
+};
+
+class ReportSampleCommand : public Command {
+ public:
+  ReportSampleCommand()
+      : Command(
+            "report-sample", "report raw sample information in perf.data",
+            // clang-format off
+"Usage: simpleperf report-sample [options]\n"
+"--dump-protobuf-report  <file>\n"
+"           Dump report file generated by\n"
+"           `simpleperf report-sample --protobuf -o <file>`.\n"
+"-i <file>  Specify path of record file, default is perf.data.\n"
+"-o report_file_name  Set report file name, default is stdout.\n"
+"--protobuf  Use protobuf format in report_sample.proto to output samples.\n"
+"            Need to set a report_file_name when using this option.\n"
+"--show-callchain  Print callchain samples.\n"
+            // clang-format on
+            ),
+        record_filename_("perf.data"),
+        show_callchain_(false),
+        use_protobuf_(false),
+        report_fp_(nullptr),
+        coded_os_(nullptr),
+        sample_count_(0),
+        lost_count_(0) {
+    thread_tree_.ShowMarkForUnknownSymbol();
+    thread_tree_.ShowIpForUnknownSymbol();
+  }
+
+  bool Run(const std::vector<std::string>& args) override;
+
+ private:
+  bool ParseOptions(const std::vector<std::string>& args);
+  bool DumpProtobufReport(const std::string& filename);
+  bool ProcessRecord(std::unique_ptr<Record> record);
+  bool PrintSampleRecordInProtobuf(const SampleRecord& record);
+  bool PrintLostSituationInProtobuf();
+  bool PrintSampleRecord(const SampleRecord& record);
+  void PrintLostSituation();
+
+  std::string record_filename_;
+  std::unique_ptr<RecordFileReader> record_file_reader_;
+  std::string dump_protobuf_report_file_;
+  bool show_callchain_;
+  bool use_protobuf_;
+  ThreadTree thread_tree_;
+  std::string report_filename_;
+  FILE* report_fp_;
+  google::protobuf::io::CodedOutputStream* coded_os_;
+  size_t sample_count_;
+  size_t lost_count_;
+};
+
+bool ReportSampleCommand::Run(const std::vector<std::string>& args) {
+  // 1. Parse options.
+  if (!ParseOptions(args)) {
+    return false;
+  }
+  if (!dump_protobuf_report_file_.empty()) {
+    return DumpProtobufReport(dump_protobuf_report_file_);
+  }
+  if (use_protobuf_) {
+    GOOGLE_PROTOBUF_VERIFY_VERSION;
+  }
+
+  // 2. Open record file.
+  record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
+  if (record_file_reader_ == nullptr) {
+    return false;
+  }
+
+  // 3. Prepare report output stream.
+  report_fp_ = stdout;
+  std::unique_ptr<FILE, decltype(&fclose)> fp(nullptr, fclose);
+  std::unique_ptr<ProtobufFileWriter> protobuf_writer;
+  std::unique_ptr<google::protobuf::io::CopyingOutputStreamAdaptor> protobuf_os;
+  std::unique_ptr<google::protobuf::io::CodedOutputStream> protobuf_coded_os;
+  if (!report_filename_.empty()) {
+    fp.reset(fopen(report_filename_.c_str(), use_protobuf_ ? "wb" : "w"));
+    if (fp == nullptr) {
+      PLOG(ERROR) << "failed to open " << report_filename_;
+      return false;
+    }
+    report_fp_ = fp.get();
+  }
+  if (use_protobuf_) {
+    protobuf_writer.reset(new ProtobufFileWriter(report_fp_));
+    protobuf_os.reset(new google::protobuf::io::CopyingOutputStreamAdaptor(
+        protobuf_writer.get()));
+    protobuf_coded_os.reset(
+        new google::protobuf::io::CodedOutputStream(protobuf_os.get()));
+    coded_os_ = protobuf_coded_os.get();
+  }
+
+  // 4. Read record file, and print samples online.
+  if (!record_file_reader_->ReadDataSection(
+          [this](std::unique_ptr<Record> record) {
+            return ProcessRecord(std::move(record));
+          })) {
+    return false;
+  }
+
+  if (use_protobuf_) {
+    if (!PrintLostSituationInProtobuf()) {
+      return false;
+    }
+    coded_os_->WriteLittleEndian32(0);
+    if (coded_os_->HadError()) {
+      LOG(ERROR) << "print protobuf report failed";
+      return false;
+    }
+    protobuf_coded_os.reset(nullptr);
+    google::protobuf::ShutdownProtobufLibrary();
+  } else {
+    PrintLostSituation();
+    fflush(report_fp_);
+  }
+  if (ferror(report_fp_) != 0) {
+    PLOG(ERROR) << "print report failed";
+    return false;
+  }
+  return true;
+}
+
+bool ReportSampleCommand::ParseOptions(const std::vector<std::string>& args) {
+  for (size_t i = 0; i < args.size(); ++i) {
+    if (args[i] == "--dump-protobuf-report") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      dump_protobuf_report_file_ = args[i];
+    } else if (args[i] == "-i") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      record_filename_ = args[i];
+    } else if (args[i] == "-o") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      report_filename_ = args[i];
+    } else if (args[i] == "--protobuf") {
+      use_protobuf_ = true;
+    } else if (args[i] == "--show-callchain") {
+      show_callchain_ = true;
+    } else {
+      ReportUnknownOption(args, i);
+      return false;
+    }
+  }
+
+  if (use_protobuf_ && report_filename_.empty()) {
+    LOG(ERROR) << "please specify a report filename to write protobuf data";
+    return false;
+  }
+  return true;
+}
+
+bool ReportSampleCommand::DumpProtobufReport(const std::string& filename) {
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
+  std::unique_ptr<FILE, decltype(&fclose)> fp(fopen(filename.c_str(), "rb"),
+                                              fclose);
+  if (fp == nullptr) {
+    PLOG(ERROR) << "failed to open " << filename;
+    return false;
+  }
+  ProtobufFileReader protobuf_reader(fp.get());
+  google::protobuf::io::CopyingInputStreamAdaptor adaptor(&protobuf_reader);
+  google::protobuf::io::CodedInputStream coded_is(&adaptor);
+  while (true) {
+    uint32_t size;
+    if (!coded_is.ReadLittleEndian32(&size)) {
+      PLOG(ERROR) << "failed to read " << filename;
+      return false;
+    }
+    if (size == 0) {
+      break;
+    }
+    auto limit = coded_is.PushLimit(size);
+    proto::Record proto_record;
+    if (!proto_record.ParseFromCodedStream(&coded_is)) {
+      PLOG(ERROR) << "failed to read " << filename;
+      return false;
+    }
+    coded_is.PopLimit(limit);
+    if (proto_record.type() == proto::Record_Type_SAMPLE) {
+      auto& sample = proto_record.sample();
+      static size_t sample_count = 0;
+      PrintIndented(0, "sample %zu:\n", ++sample_count);
+      PrintIndented(1, "time: %" PRIu64 "\n", sample.time());
+      PrintIndented(1, "thread_id: %d\n", sample.thread_id());
+      PrintIndented(1, "callchain:\n");
+      for (int j = 0; j < sample.callchain_size(); ++j) {
+        const proto::Sample_CallChainEntry& callchain = sample.callchain(j);
+        PrintIndented(2, "ip: %" PRIx64 "\n", callchain.ip());
+        PrintIndented(2, "dso: %s\n", callchain.file().c_str());
+        PrintIndented(2, "symbol: %s\n", callchain.symbol().c_str());
+      }
+    } else if (proto_record.type() == proto::Record_Type_LOST_SITUATION) {
+      auto& lost = proto_record.lost();
+      PrintIndented(0, "lost_situation:\n");
+      PrintIndented(1, "sample_count: %" PRIu64 "\n", lost.sample_count());
+      PrintIndented(1, "lost_count: %" PRIu64 "\n", lost.lost_count());
+    } else {
+      LOG(ERROR) << "unexpected record type " << proto_record.type();
+      return false;
+    }
+  }
+  google::protobuf::ShutdownProtobufLibrary();
+  return true;
+}
+
+bool ReportSampleCommand::ProcessRecord(std::unique_ptr<Record> record) {
+  thread_tree_.Update(*record);
+  if (record->type() == PERF_RECORD_SAMPLE) {
+    sample_count_++;
+    auto& r = *static_cast<const SampleRecord*>(record.get());
+    if (use_protobuf_) {
+      return PrintSampleRecordInProtobuf(r);
+    } else {
+      return PrintSampleRecord(r);
+    }
+  } else if (record->type() == PERF_RECORD_LOST) {
+    lost_count_ += static_cast<const LostRecord*>(record.get())->lost;
+  }
+  return true;
+}
+
+bool ReportSampleCommand::PrintSampleRecordInProtobuf(const SampleRecord& r) {
+  proto::Record proto_record;
+  proto_record.set_type(proto::Record_Type_SAMPLE);
+  proto::Sample* sample = proto_record.mutable_sample();
+  sample->set_time(r.time_data.time);
+  sample->set_thread_id(r.tid_data.tid);
+  proto::Sample_CallChainEntry* callchain = sample->add_callchain();
+  callchain->set_ip(r.ip_data.ip);
+
+  bool in_kernel = r.InKernel();
+  const ThreadEntry* thread =
+      thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+  const MapEntry* map = thread_tree_.FindMap(thread, r.ip_data.ip, in_kernel);
+  const Symbol* symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr);
+  callchain->set_symbol(symbol->DemangledName());
+  callchain->set_file(map->dso->Path());
+
+  if (show_callchain_) {
+    bool first_ip = true;
+    for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
+      uint64_t ip = r.callchain_data.ips[i];
+      if (ip >= PERF_CONTEXT_MAX) {
+        switch (ip) {
+          case PERF_CONTEXT_KERNEL:
+            in_kernel = true;
+            break;
+          case PERF_CONTEXT_USER:
+            in_kernel = false;
+            break;
+          default:
+            LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
+                       << ip << std::dec;
+        }
+      } else {
+        if (first_ip) {
+          first_ip = false;
+          // Remove duplication with sample ip.
+          if (ip == r.ip_data.ip) {
+            continue;
+          }
+        }
+        const MapEntry* map = thread_tree_.FindMap(thread, ip, in_kernel);
+        const Symbol* symbol = thread_tree_.FindSymbol(map, ip, nullptr);
+        callchain = sample->add_callchain();
+        callchain->set_ip(ip);
+        callchain->set_symbol(symbol->DemangledName());
+        callchain->set_file(map->dso->Path());
+      }
+    }
+  }
+  coded_os_->WriteLittleEndian32(proto_record.ByteSize());
+  if (!proto_record.SerializeToCodedStream(coded_os_)) {
+    LOG(ERROR) << "failed to write sample to protobuf";
+    return false;
+  }
+  return true;
+}
+
+bool ReportSampleCommand::PrintLostSituationInProtobuf() {
+  proto::Record proto_record;
+  proto_record.set_type(proto::Record_Type_LOST_SITUATION);
+  proto::LostSituation* lost = proto_record.mutable_lost();
+  lost->set_sample_count(sample_count_);
+  lost->set_lost_count(lost_count_);
+  coded_os_->WriteLittleEndian32(proto_record.ByteSize());
+  if (!proto_record.SerializeToCodedStream(coded_os_)) {
+    LOG(ERROR) << "failed to write lost situation to protobuf";
+    return false;
+  }
+  return true;
+}
+
+bool ReportSampleCommand::PrintSampleRecord(const SampleRecord& r) {
+  bool in_kernel = r.InKernel();
+  const ThreadEntry* thread =
+      thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+  const MapEntry* map = thread_tree_.FindMap(thread, r.ip_data.ip, in_kernel);
+  const Symbol* symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr);
+  FprintIndented(report_fp_, 0, "sample:\n");
+  FprintIndented(report_fp_, 1, "time: %" PRIu64 "\n", r.time_data.time);
+  FprintIndented(report_fp_, 1, "thread_id: %d\n", r.tid_data.tid);
+  FprintIndented(report_fp_, 1, "ip: %" PRIx64 "\n", r.ip_data.ip);
+  FprintIndented(report_fp_, 1, "dso: %s\n", map->dso->Path().c_str());
+  FprintIndented(report_fp_, 1, "symbol: %s\n", symbol->DemangledName());
+
+  if (show_callchain_) {
+    FprintIndented(report_fp_, 1, "callchain:\n");
+    bool first_ip = true;
+    for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
+      uint64_t ip = r.callchain_data.ips[i];
+      if (ip >= PERF_CONTEXT_MAX) {
+        switch (ip) {
+          case PERF_CONTEXT_KERNEL:
+            in_kernel = true;
+            break;
+          case PERF_CONTEXT_USER:
+            in_kernel = false;
+            break;
+          default:
+            LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
+                       << ip;
+        }
+      } else {
+        if (first_ip) {
+          first_ip = false;
+          // Remove duplication with sample ip.
+          if (ip == r.ip_data.ip) {
+            continue;
+          }
+        }
+        const MapEntry* map = thread_tree_.FindMap(thread, ip, in_kernel);
+        const Symbol* symbol = thread_tree_.FindSymbol(map, ip, nullptr);
+        FprintIndented(report_fp_, 2, "ip: %" PRIx64 "\n", ip);
+        FprintIndented(report_fp_, 2, "dso: %s\n", map->dso->Path().c_str());
+        FprintIndented(report_fp_, 2, "symbol: %s\n", symbol->DemangledName());
+      }
+    }
+  }
+  return true;
+}
+
+void ReportSampleCommand::PrintLostSituation() {
+  FprintIndented(report_fp_, 0, "lost_situation:\n");
+  FprintIndented(report_fp_, 1, "sample_count: %" PRIu64 "\n", sample_count_);
+  FprintIndented(report_fp_, 1, "lost_count: %" PRIu64 "\n", sample_count_);
+}
+
+}  // namespace
+
+void RegisterReportSampleCommand() {
+  RegisterCommand("report-sample", [] {
+    return std::unique_ptr<Command>(new ReportSampleCommand());
+  });
+}
diff --git a/simpleperf/cmd_report_sample_test.cpp b/simpleperf/cmd_report_sample_test.cpp
new file mode 100644
index 0000000..42df179
--- /dev/null
+++ b/simpleperf/cmd_report_sample_test.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <android-base/test_utils.h>
+
+#include "command.h"
+#include "get_test_data.h"
+
+static std::unique_ptr<Command> ReportSampleCmd() {
+  return CreateCommandInstance("report-sample");
+}
+
+TEST(cmd_report_sample, text) {
+  ASSERT_TRUE(
+      ReportSampleCmd()->Run({"-i", GetTestData(PERF_DATA_WITH_SYMBOLS)}));
+}
+
+TEST(cmd_report_sample, output_option) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(ReportSampleCmd()->Run(
+      {"-i", GetTestData(PERF_DATA_WITH_SYMBOLS), "-o", tmpfile.path}));
+}
+
+TEST(cmd_report_sample, show_callchain_option) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(ReportSampleCmd()->Run({"-i", GetTestData(CALLGRAPH_FP_PERF_DATA),
+                                      "-o", tmpfile.path, "--show-callchain"}));
+}
+
+TEST(cmd_report_sample, protobuf_option) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(ReportSampleCmd()->Run({"-i", GetTestData(PERF_DATA_WITH_SYMBOLS),
+                                      "-o", tmpfile.path, "--protobuf"}));
+  ASSERT_TRUE(ReportSampleCmd()->Run({"--dump-protobuf-report", tmpfile.path}));
+}
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
new file mode 100644
index 0000000..a64ee18
--- /dev/null
+++ b/simpleperf/cmd_report_test.cpp
@@ -0,0 +1,449 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <set>
+#include <unordered_map>
+
+#include <android-base/file.h>
+#include <android-base/strings.h>
+#include <android-base/test_utils.h>
+
+#include "command.h"
+#include "event_selection_set.h"
+#include "get_test_data.h"
+#include "perf_regs.h"
+#include "read_apk.h"
+#include "test_util.h"
+
+static std::unique_ptr<Command> ReportCmd() {
+  return CreateCommandInstance("report");
+}
+
+class ReportCommandTest : public ::testing::Test {
+ protected:
+  void Report(
+      const std::string& perf_data,
+      const std::vector<std::string>& add_args = std::vector<std::string>()) {
+    ReportRaw(GetTestData(perf_data), add_args);
+  }
+
+  void ReportRaw(
+      const std::string& perf_data,
+      const std::vector<std::string>& add_args = std::vector<std::string>()) {
+    success = false;
+    std::vector<std::string> args = {
+        "-i", perf_data, "--symfs", GetTestDataDir(), "-o", tmp_file.path};
+    args.insert(args.end(), add_args.begin(), add_args.end());
+    ASSERT_TRUE(ReportCmd()->Run(args));
+    ASSERT_TRUE(android::base::ReadFileToString(tmp_file.path, &content));
+    ASSERT_TRUE(!content.empty());
+    std::vector<std::string> raw_lines = android::base::Split(content, "\n");
+    lines.clear();
+    for (const auto& line : raw_lines) {
+      std::string s = android::base::Trim(line);
+      if (!s.empty()) {
+        lines.push_back(s);
+      }
+    }
+    ASSERT_GE(lines.size(), 2u);
+    success = true;
+  }
+
+  TemporaryFile tmp_file;
+  std::string content;
+  std::vector<std::string> lines;
+  bool success;
+};
+
+TEST_F(ReportCommandTest, no_option) {
+  Report(PERF_DATA);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("GlobalFunc"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, report_symbol_from_elf_file_with_mini_debug_info) {
+  Report(PERF_DATA_WITH_MINI_DEBUG_INFO);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("GlobalFunc"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, sort_option_pid) {
+  Report(PERF_DATA, {"--sort", "pid"});
+  ASSERT_TRUE(success);
+  size_t line_index = 0;
+  while (line_index < lines.size() &&
+         lines[line_index].find("Pid") == std::string::npos) {
+    line_index++;
+  }
+  ASSERT_LT(line_index + 2, lines.size());
+}
+
+TEST_F(ReportCommandTest, sort_option_more_than_one) {
+  Report(PERF_DATA, {"--sort", "comm,pid,dso,symbol"});
+  ASSERT_TRUE(success);
+  size_t line_index = 0;
+  while (line_index < lines.size() &&
+         lines[line_index].find("Overhead") == std::string::npos) {
+    line_index++;
+  }
+  ASSERT_LT(line_index + 1, lines.size());
+  ASSERT_NE(lines[line_index].find("Command"), std::string::npos);
+  ASSERT_NE(lines[line_index].find("Pid"), std::string::npos);
+  ASSERT_NE(lines[line_index].find("Shared Object"), std::string::npos);
+  ASSERT_NE(lines[line_index].find("Symbol"), std::string::npos);
+  ASSERT_EQ(lines[line_index].find("Tid"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, children_option) {
+  Report(CALLGRAPH_FP_PERF_DATA, {"--children", "--sort", "symbol"});
+  ASSERT_TRUE(success);
+  std::unordered_map<std::string, std::pair<double, double>> map;
+  for (size_t i = 0; i < lines.size(); ++i) {
+    char name[1024];
+    std::pair<double, double> pair;
+    if (sscanf(lines[i].c_str(), "%lf%%%lf%%%s", &pair.first, &pair.second,
+               name) == 3) {
+      map.insert(std::make_pair(name, pair));
+    }
+  }
+  ASSERT_NE(map.find("GlobalFunc"), map.end());
+  ASSERT_NE(map.find("main"), map.end());
+  auto func_pair = map["GlobalFunc"];
+  auto main_pair = map["main"];
+  ASSERT_GE(main_pair.first, func_pair.first);
+  ASSERT_GE(func_pair.first, func_pair.second);
+  ASSERT_GE(func_pair.second, main_pair.second);
+}
+
+static bool CheckCalleeMode(std::vector<std::string>& lines) {
+  bool found = false;
+  for (size_t i = 0; i + 2 < lines.size(); ++i) {
+    if (lines[i].find("GlobalFunc") != std::string::npos &&
+        lines[i + 1].find('|') != std::string::npos &&
+        lines[i + 2].find("main") != std::string::npos) {
+      found = true;
+      break;
+    }
+  }
+  return found;
+}
+
+static bool CheckCallerMode(std::vector<std::string>& lines) {
+  bool found = false;
+  for (size_t i = 0; i + 2 < lines.size(); ++i) {
+    if (lines[i].find("main") != std::string::npos &&
+        lines[i + 1].find('|') != std::string::npos &&
+        lines[i + 2].find("GlobalFunc") != std::string::npos) {
+      found = true;
+      break;
+    }
+  }
+  return found;
+}
+
+TEST_F(ReportCommandTest, callgraph_option) {
+  Report(CALLGRAPH_FP_PERF_DATA, {"-g"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(CheckCallerMode(lines));
+  Report(CALLGRAPH_FP_PERF_DATA, {"-g", "callee"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(CheckCalleeMode(lines));
+  Report(CALLGRAPH_FP_PERF_DATA, {"-g", "caller"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(CheckCallerMode(lines));
+}
+
+static bool AllItemsWithString(std::vector<std::string>& lines,
+                               const std::vector<std::string>& strs) {
+  size_t line_index = 0;
+  while (line_index < lines.size() &&
+         lines[line_index].find("Overhead") == std::string::npos) {
+    line_index++;
+  }
+  if (line_index == lines.size() || line_index + 1 == lines.size()) {
+    return false;
+  }
+  line_index++;
+  for (; line_index < lines.size(); ++line_index) {
+    bool exist = false;
+    for (auto& s : strs) {
+      if (lines[line_index].find(s) != std::string::npos) {
+        exist = true;
+        break;
+      }
+    }
+    if (!exist) {
+      return false;
+    }
+  }
+  return true;
+}
+
+TEST_F(ReportCommandTest, pid_filter_option) {
+  Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS, {"--sort", "pid"});
+  ASSERT_TRUE(success);
+  ASSERT_FALSE(AllItemsWithString(lines, {"17441"}));
+  ASSERT_FALSE(AllItemsWithString(lines, {"17441", "17443"}));
+  Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS,
+         {"--sort", "pid", "--pids", "17441"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"17441"}));
+  Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS,
+         {"--sort", "pid", "--pids", "17441,17443"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"17441", "17443"}));
+
+  // Test that --pids option is not the same as --tids option.
+  // Thread 17445 and 17441 are in process 17441.
+  Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS,
+         {"--sort", "tid", "--pids", "17441"});
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("17441"), std::string::npos);
+  ASSERT_NE(content.find("17445"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, wrong_pid_filter_option) {
+  ASSERT_EXIT(
+      {
+        Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS, {"--pids", "2,bogus"});
+        exit(success ? 0 : 1);
+      },
+      testing::ExitedWithCode(1), "invalid id in --pids option: bogus");
+}
+
+TEST_F(ReportCommandTest, tid_filter_option) {
+  Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS, {"--sort", "tid"});
+  ASSERT_TRUE(success);
+  ASSERT_FALSE(AllItemsWithString(lines, {"17441"}));
+  ASSERT_FALSE(AllItemsWithString(lines, {"17441", "17445"}));
+  Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS,
+         {"--sort", "tid", "--tids", "17441"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"17441"}));
+  Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS,
+         {"--sort", "tid", "--tids", "17441,17445"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"17441", "17445"}));
+}
+
+TEST_F(ReportCommandTest, wrong_tid_filter_option) {
+  ASSERT_EXIT(
+      {
+        Report(PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS, {"--tids", "2,bogus"});
+        exit(success ? 0 : 1);
+      },
+      testing::ExitedWithCode(1), "invalid id in --tids option: bogus");
+}
+
+TEST_F(ReportCommandTest, comm_filter_option) {
+  Report(PERF_DATA, {"--sort", "comm"});
+  ASSERT_TRUE(success);
+  ASSERT_FALSE(AllItemsWithString(lines, {"t1"}));
+  ASSERT_FALSE(AllItemsWithString(lines, {"t1", "t2"}));
+  Report(PERF_DATA, {"--sort", "comm", "--comms", "t1"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"t1"}));
+  Report(PERF_DATA, {"--sort", "comm", "--comms", "t1,t2"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"t1", "t2"}));
+}
+
+TEST_F(ReportCommandTest, dso_filter_option) {
+  Report(PERF_DATA, {"--sort", "dso"});
+  ASSERT_TRUE(success);
+  ASSERT_FALSE(AllItemsWithString(lines, {"/t1"}));
+  ASSERT_FALSE(AllItemsWithString(lines, {"/t1", "/t2"}));
+  Report(PERF_DATA, {"--sort", "dso", "--dsos", "/t1"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"/t1"}));
+  Report(PERF_DATA, {"--sort", "dso", "--dsos", "/t1,/t2"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"/t1", "/t2"}));
+}
+
+TEST_F(ReportCommandTest, symbol_filter_option) {
+  Report(PERF_DATA_WITH_SYMBOLS, {"--sort", "symbol"});
+  ASSERT_TRUE(success);
+  ASSERT_FALSE(AllItemsWithString(lines, {"func2(int, int)"}));
+  ASSERT_FALSE(AllItemsWithString(lines, {"main", "func2(int, int)"}));
+  Report(PERF_DATA_WITH_SYMBOLS,
+         {"--sort", "symbol", "--symbols", "func2(int, int)"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"func2(int, int)"}));
+  Report(PERF_DATA_WITH_SYMBOLS,
+         {"--sort", "symbol", "--symbols", "main;func2(int, int)"});
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(AllItemsWithString(lines, {"main", "func2(int, int)"}));
+}
+
+TEST_F(ReportCommandTest, use_branch_address) {
+  Report(BRANCH_PERF_DATA, {"-b", "--sort", "symbol_from,symbol_to"});
+  std::set<std::pair<std::string, std::string>> hit_set;
+  bool after_overhead = false;
+  for (const auto& line : lines) {
+    if (!after_overhead && line.find("Overhead") != std::string::npos) {
+      after_overhead = true;
+    } else if (after_overhead) {
+      char from[80];
+      char to[80];
+      if (sscanf(line.c_str(), "%*f%%%s%s", from, to) == 2) {
+        hit_set.insert(std::make_pair<std::string, std::string>(from, to));
+      }
+    }
+  }
+  ASSERT_NE(hit_set.find(std::make_pair<std::string, std::string>(
+                "GlobalFunc", "CalledFunc")),
+            hit_set.end());
+  ASSERT_NE(hit_set.find(std::make_pair<std::string, std::string>(
+                "CalledFunc", "GlobalFunc")),
+            hit_set.end());
+}
+
+TEST_F(ReportCommandTest, report_symbols_of_nativelib_in_apk) {
+  Report(NATIVELIB_IN_APK_PERF_DATA);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find(GetUrlInApk(APK_FILE, NATIVELIB_IN_APK)),
+            std::string::npos);
+  ASSERT_NE(content.find("Func2"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, report_more_than_one_event_types) {
+  Report(PERF_DATA_WITH_TWO_EVENT_TYPES);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("cpu-cycles"), std::string::npos);
+  ASSERT_NE(content.find("cpu-clock"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, report_kernel_symbol) {
+  Report(PERF_DATA_WITH_KERNEL_SYMBOL);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("perf_event_aux"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, report_dumped_symbols) {
+  Report(PERF_DATA_WITH_SYMBOLS);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("main"), std::string::npos);
+  Report(PERF_DATA_WITH_SYMBOLS_FOR_NONZERO_MINVADDR_DSO);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("main"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, report_sort_vaddr_in_file) {
+  Report(PERF_DATA, {"--sort", "vaddr_in_file"});
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("VaddrInFile"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, check_build_id) {
+  Report(PERF_DATA_FOR_BUILD_ID_CHECK,
+         {"--symfs", GetTestData(CORRECT_SYMFS_FOR_BUILD_ID_CHECK)});
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("main"), std::string::npos);
+  ASSERT_EXIT(
+      {
+        Report(PERF_DATA_FOR_BUILD_ID_CHECK,
+               {"--symfs", GetTestData(WRONG_SYMFS_FOR_BUILD_ID_CHECK)});
+        if (!success) {
+          exit(1);
+        }
+        if (content.find("main") != std::string::npos) {
+          exit(2);
+        }
+        exit(0);
+      },
+      testing::ExitedWithCode(0), "Build id mismatch");
+}
+
+TEST_F(ReportCommandTest, no_show_ip_option) {
+  Report(PERF_DATA);
+  ASSERT_TRUE(success);
+  ASSERT_EQ(content.find("unknown"), std::string::npos);
+  Report(PERF_DATA, {"--no-show-ip"});
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("unknown"), std::string::npos);
+}
+
+TEST_F(ReportCommandTest, no_symbol_table_warning) {
+  ASSERT_EXIT(
+      {
+        Report(PERF_DATA,
+               {"--symfs", GetTestData(SYMFS_FOR_NO_SYMBOL_TABLE_WARNING)});
+        if (!success) {
+          exit(1);
+        }
+        if (content.find("GlobalFunc") != std::string::npos) {
+          exit(2);
+        }
+        exit(0);
+      },
+      testing::ExitedWithCode(0), "elf doesn't contain symbol table");
+}
+
+TEST_F(ReportCommandTest, read_elf_file_warning) {
+  ASSERT_EXIT(
+      {
+        Report(PERF_DATA,
+               {"--symfs", GetTestData(SYMFS_FOR_READ_ELF_FILE_WARNING)});
+        if (!success) {
+          exit(1);
+        }
+        if (content.find("GlobalFunc") != std::string::npos) {
+          exit(2);
+        }
+        exit(0);
+      },
+      testing::ExitedWithCode(0), "elf: Read failed");
+}
+
+#if defined(__linux__)
+
+static std::unique_ptr<Command> RecordCmd() {
+  return CreateCommandInstance("record");
+}
+
+TEST_F(ReportCommandTest, dwarf_callgraph) {
+  if (IsDwarfCallChainSamplingSupported()) {
+    TemporaryFile tmp_file;
+    ASSERT_TRUE(
+        RecordCmd()->Run({"-g", "-o", tmp_file.path, "sleep", SLEEP_SEC}));
+    ReportRaw(tmp_file.path, {"-g"});
+    ASSERT_TRUE(success);
+  } else {
+    GTEST_LOG_(INFO) << "This test does nothing as dwarf callchain sampling is "
+                        "not supported on this device.";
+  }
+}
+
+TEST_F(ReportCommandTest, report_dwarf_callgraph_of_nativelib_in_apk) {
+  // NATIVELIB_IN_APK_PERF_DATA is recorded on arm64, so can only report
+  // callgraph on arm64.
+  if (GetBuildArch() == ARCH_ARM64) {
+    Report(NATIVELIB_IN_APK_PERF_DATA, {"-g"});
+    ASSERT_NE(content.find(GetUrlInApk(APK_FILE, NATIVELIB_IN_APK)),
+              std::string::npos);
+    ASSERT_NE(content.find("Func2"), std::string::npos);
+    ASSERT_NE(content.find("Func1"), std::string::npos);
+    ASSERT_NE(content.find("GlobalFunc"), std::string::npos);
+  } else {
+    GTEST_LOG_(INFO)
+        << "This test does nothing as it is only run on arm64 devices";
+  }
+}
+
+#endif
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
new file mode 100644
index 0000000..5a0694e
--- /dev/null
+++ b/simpleperf/cmd_stat.cpp
@@ -0,0 +1,604 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include <algorithm>
+#include <chrono>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+#include <android-base/strings.h>
+
+#include "command.h"
+#include "environment.h"
+#include "event_attr.h"
+#include "event_fd.h"
+#include "event_selection_set.h"
+#include "event_type.h"
+#include "IOEventLoop.h"
+#include "utils.h"
+#include "workload.h"
+
+namespace {
+
+static std::vector<std::string> default_measured_event_types{
+    "cpu-cycles",   "stalled-cycles-frontend", "stalled-cycles-backend",
+    "instructions", "branch-instructions",     "branch-misses",
+    "task-clock",   "context-switches",        "page-faults",
+};
+
+struct CounterSummary {
+  std::string type_name;
+  std::string modifier;
+  uint32_t group_id;
+  uint64_t count;
+  double scale;
+  std::string readable_count;
+  std::string comment;
+  bool auto_generated;
+
+  CounterSummary(const std::string& type_name, const std::string& modifier,
+                 uint32_t group_id, uint64_t count, double scale,
+                 bool auto_generated, bool csv)
+      : type_name(type_name),
+        modifier(modifier),
+        group_id(group_id),
+        count(count),
+        scale(scale),
+        auto_generated(auto_generated) {
+    readable_count = ReadableCountValue(csv);
+  }
+
+  bool IsMonitoredAtTheSameTime(const CounterSummary& other) const {
+    // Two summaries are monitored at the same time if they are in the same
+    // group or are monitored all the time.
+    if (group_id == other.group_id) {
+      return true;
+    }
+    return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime();
+  }
+
+  std::string Name() const {
+    if (modifier.empty()) {
+      return type_name;
+    }
+    return type_name + ":" + modifier;
+  }
+
+ private:
+  std::string ReadableCountValue(bool csv) {
+    if (type_name == "cpu-clock" || type_name == "task-clock") {
+      // Convert nanoseconds to milliseconds.
+      double value = count / 1e6;
+      return android::base::StringPrintf("%lf(ms)", value);
+    } else {
+      // Convert big numbers to human friendly mode. For example,
+      // 1000000 will be converted to 1,000,000.
+      std::string s = android::base::StringPrintf("%" PRIu64, count);
+      if (csv) {
+        return s;
+      } else {
+        for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) {
+          if (j == 3) {
+            s.insert(s.begin() + i, ',');
+            j = 0;
+          }
+        }
+        return s;
+      }
+    }
+  }
+
+  bool IsMonitoredAllTheTime() const {
+    // If an event runs all the time it is enabled (by not sharing hardware
+    // counters with other events), the scale of its summary is usually within
+    // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify
+    // events monitored all the time in most cases while keeping the report
+    // error rate <= 1e-5.
+    constexpr double SCALE_ERROR_LIMIT = 1e-5;
+    return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT);
+  }
+};
+
+class CounterSummaries {
+ public:
+  explicit CounterSummaries(bool csv) : csv_(csv) {}
+  void AddSummary(const CounterSummary& summary) {
+    summaries_.push_back(summary);
+  }
+
+  const CounterSummary* FindSummary(const std::string& type_name,
+                                    const std::string& modifier) {
+    for (const auto& s : summaries_) {
+      if (s.type_name == type_name && s.modifier == modifier) {
+        return &s;
+      }
+    }
+    return nullptr;
+  }
+
+  // If we have two summaries monitoring the same event type at the same time,
+  // that one is for user space only, and the other is for kernel space only;
+  // then we can automatically generate a summary combining the two results.
+  // For example, a summary of branch-misses:u and a summary for branch-misses:k
+  // can generate a summary of branch-misses.
+  void AutoGenerateSummaries() {
+    for (size_t i = 0; i < summaries_.size(); ++i) {
+      const CounterSummary& s = summaries_[i];
+      if (s.modifier == "u") {
+        const CounterSummary* other = FindSummary(s.type_name, "k");
+        if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
+          if (FindSummary(s.type_name, "") == nullptr) {
+            AddSummary(CounterSummary(s.type_name, "", s.group_id,
+                                      s.count + other->count, s.scale, true,
+                                      csv_));
+          }
+        }
+      }
+    }
+  }
+
+  void GenerateComments(double duration_in_sec) {
+    for (auto& s : summaries_) {
+      s.comment = GetCommentForSummary(s, duration_in_sec);
+    }
+  }
+
+  void Show(FILE* fp) {
+    size_t count_column_width = 0;
+    size_t name_column_width = 0;
+    size_t comment_column_width = 0;
+    for (auto& s : summaries_) {
+      count_column_width =
+          std::max(count_column_width, s.readable_count.size());
+      name_column_width = std::max(name_column_width, s.Name().size());
+      comment_column_width = std::max(comment_column_width, s.comment.size());
+    }
+
+    for (auto& s : summaries_) {
+      if (csv_) {
+        fprintf(fp, "%s,%s,%s,(%.0lf%%)%s\n", s.readable_count.c_str(),
+                s.Name().c_str(), s.comment.c_str(), 1.0 / s.scale * 100,
+                (s.auto_generated ? " (generated)," : ","));
+      } else {
+        fprintf(fp, "  %*s  %-*s   # %-*s  (%.0lf%%)%s\n",
+                static_cast<int>(count_column_width), s.readable_count.c_str(),
+                static_cast<int>(name_column_width), s.Name().c_str(),
+                static_cast<int>(comment_column_width), s.comment.c_str(),
+                1.0 / s.scale * 100, (s.auto_generated ? " (generated)" : ""));
+      }
+    }
+  }
+
+ private:
+  std::string GetCommentForSummary(const CounterSummary& s,
+                                   double duration_in_sec) {
+    char sap_mid;
+    if (csv_) {
+      sap_mid = ',';
+    } else {
+      sap_mid = ' ';
+    }
+    if (s.type_name == "task-clock") {
+      double run_sec = s.count / 1e9;
+      double used_cpus = run_sec / (duration_in_sec / s.scale);
+      return android::base::StringPrintf("%lf%ccpus used", used_cpus, sap_mid);
+    }
+    if (s.type_name == "cpu-clock") {
+      return "";
+    }
+    if (s.type_name == "cpu-cycles") {
+      double hz = s.count / (duration_in_sec / s.scale);
+      return android::base::StringPrintf("%lf%cGHz", hz / 1e9, sap_mid);
+    }
+    if (s.type_name == "instructions" && s.count != 0) {
+      const CounterSummary* other = FindSummary("cpu-cycles", s.modifier);
+      if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
+        double cpi = static_cast<double>(other->count) / s.count;
+        return android::base::StringPrintf("%lf%ccycles per instruction", cpi,
+                                           sap_mid);
+      }
+    }
+    if (android::base::EndsWith(s.type_name, "-misses")) {
+      std::string other_name;
+      if (s.type_name == "cache-misses") {
+        other_name = "cache-references";
+      } else if (s.type_name == "branch-misses") {
+        other_name = "branch-instructions";
+      } else {
+        other_name =
+            s.type_name.substr(0, s.type_name.size() - strlen("-misses")) + "s";
+      }
+      const CounterSummary* other = FindSummary(other_name, s.modifier);
+      if (other != nullptr && other->IsMonitoredAtTheSameTime(s) &&
+          other->count != 0) {
+        double miss_rate = static_cast<double>(s.count) / other->count;
+        return android::base::StringPrintf("%lf%%%cmiss rate", miss_rate * 100,
+                                           sap_mid);
+      }
+    }
+    double rate = s.count / (duration_in_sec / s.scale);
+    if (rate > 1e9) {
+      return android::base::StringPrintf("%.3lf%cG/sec", rate / 1e9, sap_mid);
+    }
+    if (rate > 1e6) {
+      return android::base::StringPrintf("%.3lf%cM/sec", rate / 1e6, sap_mid);
+    }
+    if (rate > 1e3) {
+      return android::base::StringPrintf("%.3lf%cK/sec", rate / 1e3, sap_mid);
+    }
+    return android::base::StringPrintf("%.3lf%c/sec", rate, sap_mid);
+  }
+
+ private:
+  std::vector<CounterSummary> summaries_;
+  bool csv_;
+};
+
+class StatCommand : public Command {
+ public:
+  StatCommand()
+      : Command("stat", "gather performance counter information",
+                // clang-format off
+"Usage: simpleperf stat [options] [command [command-args]]\n"
+"       Gather performance counter information of running [command].\n"
+"       And -a/-p/-t option can be used to change target of counter information.\n"
+"-a           Collect system-wide information.\n"
+"--cpu cpu_item1,cpu_item2,...\n"
+"                 Collect information only on the selected cpus. cpu_item can\n"
+"                 be a cpu number like 1, or a cpu range like 0-3.\n"
+"--csv            Write report in comma separate form.\n"
+"--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
+"                        [command]. Here time_in_sec may be any positive\n"
+"                        floating point number.\n"
+"-e event1[:modifier1],event2[:modifier2],...\n"
+"                 Select the event list to count. Use `simpleperf list` to find\n"
+"                 all possible event names. Modifiers can be added to define\n"
+"                 how the event should be monitored. Possible modifiers are:\n"
+"                   u - monitor user space events only\n"
+"                   k - monitor kernel space events only\n"
+"--group event1[:modifier],event2[:modifier2],...\n"
+"             Similar to -e option. But events specified in the same --group\n"
+"             option are monitored as a group, and scheduled in and out at the\n"
+"             same time.\n"
+"--no-inherit     Don't stat created child threads/processes.\n"
+"-o output_filename  Write report to output_filename instead of standard output.\n"
+"-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
+"-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
+"--verbose        Show result in verbose mode.\n"
+                // clang-format on
+                ),
+        verbose_mode_(false),
+        system_wide_collection_(false),
+        child_inherit_(true),
+        duration_in_sec_(0),
+        csv_(false) {
+    // Die if parent exits.
+    prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
+  }
+
+  bool Run(const std::vector<std::string>& args);
+
+ private:
+  bool ParseOptions(const std::vector<std::string>& args,
+                    std::vector<std::string>* non_option_args);
+  bool AddDefaultMeasuredEventTypes();
+  void SetEventSelectionFlags();
+  bool ShowCounters(const std::vector<CountersInfo>& counters,
+                    double duration_in_sec);
+
+  bool verbose_mode_;
+  bool system_wide_collection_;
+  bool child_inherit_;
+  double duration_in_sec_;
+  std::vector<pid_t> monitored_threads_;
+  std::vector<int> cpus_;
+  EventSelectionSet event_selection_set_;
+  std::string output_filename_;
+  bool csv_;
+};
+
+bool StatCommand::Run(const std::vector<std::string>& args) {
+  if (!CheckPerfEventLimit()) {
+    return false;
+  }
+
+  // 1. Parse options, and use default measured event types if not given.
+  std::vector<std::string> workload_args;
+  if (!ParseOptions(args, &workload_args)) {
+    return false;
+  }
+  if (event_selection_set_.empty()) {
+    if (!AddDefaultMeasuredEventTypes()) {
+      return false;
+    }
+  }
+  SetEventSelectionFlags();
+
+  // 2. Create workload.
+  std::unique_ptr<Workload> workload;
+  if (!workload_args.empty()) {
+    workload = Workload::CreateWorkload(workload_args);
+    if (workload == nullptr) {
+      return false;
+    }
+  }
+  if (!system_wide_collection_ && monitored_threads_.empty()) {
+    if (workload != nullptr) {
+      monitored_threads_.push_back(workload->GetPid());
+      event_selection_set_.SetEnableOnExec(true);
+    } else {
+      LOG(ERROR)
+          << "No threads to monitor. Try `simpleperf help stat` for help\n";
+      return false;
+    }
+  }
+
+  // 3. Open perf_event_files.
+  if (system_wide_collection_) {
+    if (!event_selection_set_.OpenEventFilesForCpus(cpus_)) {
+      return false;
+    }
+  } else {
+    if (cpus_.empty()) {
+      cpus_ = {-1};
+    }
+    if (!event_selection_set_.OpenEventFilesForThreadsOnCpus(monitored_threads_,
+                                                             cpus_)) {
+      return false;
+    }
+  }
+
+  // 4. Create IOEventLoop and add signal/periodic Events.
+  IOEventLoop loop;
+  if (!loop.AddSignalEvents({SIGCHLD, SIGINT, SIGTERM},
+                            [&]() { return loop.ExitLoop(); })) {
+    return false;
+  }
+  if (duration_in_sec_ != 0) {
+    if (!loop.AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
+                           [&]() { return loop.ExitLoop(); })) {
+      return false;
+    }
+  }
+
+  // 5. Count events while workload running.
+  auto start_time = std::chrono::steady_clock::now();
+  if (workload != nullptr && !workload->Start()) {
+    return false;
+  }
+  if (!loop.RunLoop()) {
+    return false;
+  }
+  auto end_time = std::chrono::steady_clock::now();
+
+  // 6. Read and print counters.
+  std::vector<CountersInfo> counters;
+  if (!event_selection_set_.ReadCounters(&counters)) {
+    return false;
+  }
+  double duration_in_sec =
+      std::chrono::duration_cast<std::chrono::duration<double>>(end_time -
+                                                                start_time)
+          .count();
+  if (!ShowCounters(counters, duration_in_sec)) {
+    return false;
+  }
+  return true;
+}
+
+bool StatCommand::ParseOptions(const std::vector<std::string>& args,
+                               std::vector<std::string>* non_option_args) {
+  std::set<pid_t> tid_set;
+  size_t i;
+  for (i = 0; i < args.size() && args[i].size() > 0 && args[i][0] == '-'; ++i) {
+    if (args[i] == "-a") {
+      system_wide_collection_ = true;
+    } else if (args[i] == "--cpu") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      cpus_ = GetCpusFromString(args[i]);
+    } else if (args[i] == "--csv") {
+      csv_ = true;
+    } else if (args[i] == "--duration") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      errno = 0;
+      char* endptr;
+      duration_in_sec_ = strtod(args[i].c_str(), &endptr);
+      if (duration_in_sec_ <= 0 || *endptr != '\0' || errno == ERANGE) {
+        LOG(ERROR) << "Invalid duration: " << args[i].c_str();
+        return false;
+      }
+    } else if (args[i] == "-e") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> event_types = android::base::Split(args[i], ",");
+      for (auto& event_type : event_types) {
+        if (!event_selection_set_.AddEventType(event_type)) {
+          return false;
+        }
+      }
+    } else if (args[i] == "--group") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> event_types = android::base::Split(args[i], ",");
+      if (!event_selection_set_.AddEventGroup(event_types)) {
+        return false;
+      }
+    } else if (args[i] == "--no-inherit") {
+      child_inherit_ = false;
+    } else if (args[i] == "-o") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      output_filename_ = args[i];
+    } else if (args[i] == "-p") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      if (!GetValidThreadsFromProcessString(args[i], &tid_set)) {
+        return false;
+      }
+    } else if (args[i] == "-t") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      if (!GetValidThreadsFromThreadString(args[i], &tid_set)) {
+        return false;
+      }
+    } else if (args[i] == "--verbose") {
+      verbose_mode_ = true;
+    } else {
+      ReportUnknownOption(args, i);
+      return false;
+    }
+  }
+
+  monitored_threads_.insert(monitored_threads_.end(), tid_set.begin(),
+                            tid_set.end());
+  if (system_wide_collection_ && !monitored_threads_.empty()) {
+    LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
+                  "used at the same time.";
+    return false;
+  }
+  if (system_wide_collection_ && !IsRoot()) {
+    LOG(ERROR) << "System wide profiling needs root privilege.";
+    return false;
+  }
+
+  non_option_args->clear();
+  for (; i < args.size(); ++i) {
+    non_option_args->push_back(args[i]);
+  }
+  return true;
+}
+
+bool StatCommand::AddDefaultMeasuredEventTypes() {
+  for (auto& name : default_measured_event_types) {
+    // It is not an error when some event types in the default list are not
+    // supported by the kernel.
+    const EventType* type = FindEventTypeByName(name);
+    if (type != nullptr &&
+        IsEventAttrSupportedByKernel(CreateDefaultPerfEventAttr(*type))) {
+      if (!event_selection_set_.AddEventType(name)) {
+        return false;
+      }
+    }
+  }
+  if (event_selection_set_.empty()) {
+    LOG(ERROR) << "Failed to add any supported default measured types";
+    return false;
+  }
+  return true;
+}
+
+void StatCommand::SetEventSelectionFlags() {
+  event_selection_set_.SetInherit(child_inherit_);
+}
+
+bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
+                               double duration_in_sec) {
+  std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
+  FILE* fp = stdout;
+  if (!output_filename_.empty()) {
+    fp_holder.reset(fopen(output_filename_.c_str(), "w"));
+    if (fp_holder == nullptr) {
+      PLOG(ERROR) << "failed to open " << output_filename_;
+      return false;
+    }
+    fp = fp_holder.get();
+  }
+  if (csv_) {
+    fprintf(fp, "Performance counter statistics,\n");
+  } else {
+    fprintf(fp, "Performance counter statistics:\n\n");
+  }
+
+  if (verbose_mode_) {
+    for (auto& counters_info : counters) {
+      const EventTypeAndModifier& event_type =
+          counters_info.selection->event_type_modifier;
+      for (auto& counter_info : counters_info.counters) {
+        if (csv_) {
+          fprintf(fp, "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
+                      ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
+                  event_type.name.c_str(), counter_info.tid, counter_info.cpu,
+                  counter_info.counter.value, counter_info.counter.time_enabled,
+                  counter_info.counter.time_running, counter_info.counter.id);
+        } else {
+          fprintf(fp,
+                  "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
+                  ", time running %" PRIu64 ", id %" PRIu64 "\n",
+                  event_type.name.c_str(), counter_info.tid, counter_info.cpu,
+                  counter_info.counter.value, counter_info.counter.time_enabled,
+                  counter_info.counter.time_running, counter_info.counter.id);
+        }
+      }
+    }
+  }
+
+  CounterSummaries summaries(csv_);
+  for (auto& counters_info : counters) {
+    uint64_t value_sum = 0;
+    uint64_t time_enabled_sum = 0;
+    uint64_t time_running_sum = 0;
+    for (auto& counter_info : counters_info.counters) {
+      // If time_running is 0, the program has never run on this event and we
+      // shouldn't summarize it.
+      if (counter_info.counter.time_running != 0) {
+        value_sum += counter_info.counter.value;
+        time_enabled_sum += counter_info.counter.time_enabled;
+        time_running_sum += counter_info.counter.time_running;
+      }
+    }
+    double scale = 1.0;
+    if (time_running_sum < time_enabled_sum && time_running_sum != 0) {
+      scale = static_cast<double>(time_enabled_sum) / time_running_sum;
+    }
+    summaries.AddSummary(CounterSummary(
+        counters_info.selection->event_type_modifier.event_type.name,
+        counters_info.selection->event_type_modifier.modifier,
+        counters_info.selection->group_id, value_sum, scale, false, csv_));
+  }
+  summaries.AutoGenerateSummaries();
+  summaries.GenerateComments(duration_in_sec);
+  summaries.Show(fp);
+
+  if (csv_)
+    fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
+  else
+    fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
+  return true;
+}
+
+}  // namespace
+
+void RegisterStatCommand() {
+  RegisterCommand("stat",
+                  [] { return std::unique_ptr<Command>(new StatCommand); });
+}
diff --git a/simpleperf/cmd_stat_test.cpp b/simpleperf/cmd_stat_test.cpp
new file mode 100644
index 0000000..810cfd7
--- /dev/null
+++ b/simpleperf/cmd_stat_test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <android-base/file.h>
+#include <android-base/stringprintf.h>
+#include <android-base/test_utils.h>
+
+#include "command.h"
+#include "get_test_data.h"
+#include "test_util.h"
+
+static std::unique_ptr<Command> StatCmd() {
+  return CreateCommandInstance("stat");
+}
+
+TEST(stat_cmd, no_options) { ASSERT_TRUE(StatCmd()->Run({"sleep", "1"})); }
+
+TEST(stat_cmd, event_option) {
+  ASSERT_TRUE(StatCmd()->Run({"-e", "cpu-clock,task-clock", "sleep", "1"}));
+}
+
+TEST(stat_cmd, system_wide_option) {
+  TEST_IN_ROOT(ASSERT_TRUE(StatCmd()->Run({"-a", "sleep", "1"})));
+}
+
+TEST(stat_cmd, verbose_option) {
+  ASSERT_TRUE(StatCmd()->Run({"--verbose", "sleep", "1"}));
+}
+
+TEST(stat_cmd, tracepoint_event) {
+  TEST_IN_ROOT(ASSERT_TRUE(
+      StatCmd()->Run({"-a", "-e", "sched:sched_switch", "sleep", "1"})));
+}
+
+TEST(stat_cmd, event_modifier) {
+  ASSERT_TRUE(
+      StatCmd()->Run({"-e", "cpu-cycles:u,cpu-cycles:k", "sleep", "1"}));
+}
+
+void CreateProcesses(size_t count,
+                     std::vector<std::unique_ptr<Workload>>* workloads) {
+  workloads->clear();
+  for (size_t i = 0; i < count; ++i) {
+    auto workload = Workload::CreateWorkload({"sleep", "1"});
+    ASSERT_TRUE(workload != nullptr);
+    ASSERT_TRUE(workload->Start());
+    workloads->push_back(std::move(workload));
+  }
+}
+
+TEST(stat_cmd, existing_processes) {
+  std::vector<std::unique_ptr<Workload>> workloads;
+  CreateProcesses(2, &workloads);
+  std::string pid_list = android::base::StringPrintf(
+      "%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
+  ASSERT_TRUE(StatCmd()->Run({"-p", pid_list}));
+}
+
+TEST(stat_cmd, existing_threads) {
+  std::vector<std::unique_ptr<Workload>> workloads;
+  CreateProcesses(2, &workloads);
+  // Process id can be used as thread id in linux.
+  std::string tid_list = android::base::StringPrintf(
+      "%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
+  ASSERT_TRUE(StatCmd()->Run({"-t", tid_list}));
+}
+
+TEST(stat_cmd, no_monitored_threads) { ASSERT_FALSE(StatCmd()->Run({""})); }
+
+TEST(stat_cmd, group_option) {
+  ASSERT_TRUE(
+      StatCmd()->Run({"--group", "cpu-cycles,cpu-clock", "sleep", "1"}));
+  ASSERT_TRUE(StatCmd()->Run({"--group", "cpu-cycles,cpu-clock", "--group",
+                              "cpu-cycles:u,cpu-clock:u", "--group",
+                              "cpu-cycles:k,cpu-clock:k", "sleep", "1"}));
+}
+
+TEST(stat_cmd, auto_generated_summary) {
+  TemporaryFile tmp_file;
+  ASSERT_TRUE(StatCmd()->Run({"--group", "cpu-clock:u,cpu-clock:k", "-o",
+                              tmp_file.path, "sleep", "1"}));
+  std::string s;
+  ASSERT_TRUE(android::base::ReadFileToString(tmp_file.path, &s));
+  size_t pos = s.find("cpu-clock:u");
+  ASSERT_NE(s.npos, pos);
+  pos = s.find("cpu-clock:k", pos);
+  ASSERT_NE(s.npos, pos);
+  pos += strlen("cpu-clock:k");
+  // Check if the summary of cpu-clock is generated.
+  ASSERT_NE(s.npos, s.find("cpu-clock", pos));
+}
+
+TEST(stat_cmd, duration_option) {
+  ASSERT_TRUE(
+      StatCmd()->Run({"--duration", "1.2", "-p", std::to_string(getpid())}));
+  ASSERT_TRUE(StatCmd()->Run({"--duration", "1", "sleep", "2"}));
+}
diff --git a/simpleperf/command.cpp b/simpleperf/command.cpp
new file mode 100644
index 0000000..f5d4e8b
--- /dev/null
+++ b/simpleperf/command.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command.h"
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+
+bool Command::NextArgumentOrError(const std::vector<std::string>& args, size_t* pi) {
+  if (*pi + 1 == args.size()) {
+    LOG(ERROR) << "No argument following " << args[*pi] << " option. Try `simpleperf help " << name_
+               << "`";
+    return false;
+  }
+  ++*pi;
+  return true;
+}
+
+void Command::ReportUnknownOption(const std::vector<std::string>& args, size_t i) {
+  LOG(ERROR) << "Unknown option for " << name_ << " command: '" << args[i]
+             << "'. Try `simpleperf help " << name_ << "`";
+}
+
+typedef std::function<std::unique_ptr<Command>(void)> callback_t;
+
+static std::map<std::string, callback_t>& CommandMap() {
+  // commands is used in the constructor of Command. Defining it as a static
+  // variable in a function makes sure it is initialized before use.
+  static std::map<std::string, callback_t> command_map;
+  return command_map;
+}
+
+void RegisterCommand(const std::string& cmd_name,
+                     const std::function<std::unique_ptr<Command>(void)>& callback) {
+  CommandMap().insert(std::make_pair(cmd_name, callback));
+}
+
+void UnRegisterCommand(const std::string& cmd_name) {
+  CommandMap().erase(cmd_name);
+}
+
+std::unique_ptr<Command> CreateCommandInstance(const std::string& cmd_name) {
+  auto it = CommandMap().find(cmd_name);
+  return (it == CommandMap().end()) ? nullptr : (it->second)();
+}
+
+const std::vector<std::string> GetAllCommandNames() {
+  std::vector<std::string> names;
+  for (auto pair : CommandMap()) {
+    names.push_back(pair.first);
+  }
+  return names;
+}
+
+extern void RegisterDumpRecordCommand();
+extern void RegisterHelpCommand();
+extern void RegisterListCommand();
+extern void RegisterKmemCommand();
+extern void RegisterRecordCommand();
+extern void RegisterReportCommand();
+extern void RegisterReportSampleCommand();
+extern void RegisterStatCommand();
+
+class CommandRegister {
+ public:
+  CommandRegister() {
+    RegisterDumpRecordCommand();
+    RegisterHelpCommand();
+    RegisterKmemCommand();
+    RegisterReportCommand();
+    RegisterReportSampleCommand();
+#if defined(__linux__)
+    RegisterListCommand();
+    RegisterRecordCommand();
+    RegisterStatCommand();
+#endif
+  }
+};
+
+CommandRegister command_register;
diff --git a/simpleperf/command.h b/simpleperf/command.h
new file mode 100644
index 0000000..2311f33
--- /dev/null
+++ b/simpleperf/command.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_COMMAND_H_
+#define SIMPLE_PERF_COMMAND_H_
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <android-base/macros.h>
+
+class Command {
+ public:
+  Command(const std::string& name, const std::string& short_help_string,
+          const std::string& long_help_string)
+      : name_(name), short_help_string_(short_help_string), long_help_string_(long_help_string) {
+  }
+
+  virtual ~Command() {
+  }
+
+  const std::string& Name() const {
+    return name_;
+  }
+
+  const std::string& ShortHelpString() const {
+    return short_help_string_;
+  }
+
+  const std::string LongHelpString() const {
+    return long_help_string_;
+  }
+
+  virtual bool Run(const std::vector<std::string>& args) = 0;
+
+ protected:
+  bool NextArgumentOrError(const std::vector<std::string>& args, size_t* pi);
+  void ReportUnknownOption(const std::vector<std::string>& args, size_t i);
+
+ private:
+  const std::string name_;
+  const std::string short_help_string_;
+  const std::string long_help_string_;
+
+  DISALLOW_COPY_AND_ASSIGN(Command);
+};
+
+void RegisterCommand(const std::string& cmd_name,
+                     const std::function<std::unique_ptr<Command>(void)>& callback);
+void UnRegisterCommand(const std::string& cmd_name);
+std::unique_ptr<Command> CreateCommandInstance(const std::string& cmd_name);
+const std::vector<std::string> GetAllCommandNames();
+
+#endif  // SIMPLE_PERF_COMMAND_H_
diff --git a/simpleperf/command_test.cpp b/simpleperf/command_test.cpp
new file mode 100644
index 0000000..18cb569
--- /dev/null
+++ b/simpleperf/command_test.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "command.h"
+
+class MockCommand : public Command {
+ public:
+  MockCommand() : Command("mock", "mock_short_help", "mock_long_help") {
+  }
+
+  bool Run(const std::vector<std::string>&) override {
+    return true;
+  }
+};
+
+TEST(command, CreateCommandInstance) {
+  ASSERT_TRUE(CreateCommandInstance("mock1") == nullptr);
+  RegisterCommand("mock1", [] { return std::unique_ptr<Command>(new MockCommand); });
+  ASSERT_TRUE(CreateCommandInstance("mock1") != nullptr);
+  UnRegisterCommand("mock1");
+  ASSERT_TRUE(CreateCommandInstance("mock1") == nullptr);
+}
+
+TEST(command, GetAllCommands) {
+  size_t command_count = GetAllCommandNames().size();
+  RegisterCommand("mock1", [] { return std::unique_ptr<Command>(new MockCommand); });
+  ASSERT_EQ(command_count + 1, GetAllCommandNames().size());
+  UnRegisterCommand("mock1");
+  ASSERT_EQ(command_count, GetAllCommandNames().size());
+}
diff --git a/simpleperf/cpu_hotplug_test.cpp b/simpleperf/cpu_hotplug_test.cpp
new file mode 100644
index 0000000..56962b1
--- /dev/null
+++ b/simpleperf/cpu_hotplug_test.cpp
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <sys/stat.h>
+#include <unistd.h>
+#if defined(__BIONIC__)
+#include <sys/system_properties.h>
+#endif
+
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <unordered_map>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+
+#include "event_attr.h"
+#include "event_fd.h"
+#include "event_type.h"
+#include "utils.h"
+
+#if defined(__BIONIC__)
+class ScopedMpdecisionKiller {
+ public:
+  ScopedMpdecisionKiller() {
+    have_mpdecision_ = IsMpdecisionRunning();
+    if (have_mpdecision_) {
+      DisableMpdecision();
+    }
+  }
+
+  ~ScopedMpdecisionKiller() {
+    if (have_mpdecision_) {
+      EnableMpdecision();
+    }
+  }
+
+ private:
+  bool IsMpdecisionRunning() {
+    char value[PROP_VALUE_MAX];
+    int len = __system_property_get("init.svc.mpdecision", value);
+    if (len == 0 || (len > 0 && strstr(value, "stopped") != nullptr)) {
+      return false;
+    }
+    return true;
+  }
+
+  void DisableMpdecision() {
+    int ret = __system_property_set("ctl.stop", "mpdecision");
+    CHECK_EQ(0, ret);
+    // Need to wait until mpdecision is actually stopped.
+    usleep(500000);
+    CHECK(!IsMpdecisionRunning());
+  }
+
+  void EnableMpdecision() {
+    int ret = __system_property_set("ctl.start", "mpdecision");
+    CHECK_EQ(0, ret);
+    usleep(500000);
+    CHECK(IsMpdecisionRunning());
+  }
+
+  bool have_mpdecision_;
+};
+#else
+class ScopedMpdecisionKiller {
+ public:
+  ScopedMpdecisionKiller() {
+  }
+};
+#endif
+
+static bool IsCpuOnline(int cpu, bool* has_error) {
+  std::string filename = android::base::StringPrintf("/sys/devices/system/cpu/cpu%d/online", cpu);
+  std::string content;
+  bool ret = android::base::ReadFileToString(filename, &content);
+  if (!ret) {
+    PLOG(ERROR) << "failed to read file " << filename;
+    *has_error = true;
+    return false;
+  }
+  *has_error = false;
+  return (content.find('1') != std::string::npos);
+}
+
+static bool SetCpuOnline(int cpu, bool online) {
+  bool has_error;
+  bool ret = IsCpuOnline(cpu, &has_error);
+  if (has_error) {
+    return false;
+  }
+  if (ret == online) {
+    return true;
+  }
+  std::string filename = android::base::StringPrintf("/sys/devices/system/cpu/cpu%d/online", cpu);
+  std::string content = online ? "1" : "0";
+  ret = android::base::WriteStringToFile(content, filename);
+  if (!ret) {
+    ret = IsCpuOnline(cpu, &has_error);
+    if (has_error) {
+      return false;
+    }
+    if (online == ret) {
+      return true;
+    }
+    PLOG(ERROR) << "failed to write " << content << " to " << filename;
+    return false;
+  }
+  // Kernel needs time to offline/online cpus, so use a loop to wait here.
+  size_t retry_count = 0;
+  while (true) {
+    ret = IsCpuOnline(cpu, &has_error);
+    if (has_error) {
+      return false;
+    }
+    if (ret == online) {
+      break;
+    }
+    LOG(ERROR) << "reading cpu retry count = " << retry_count << ", requested = " << online
+        << ", real = " << ret;
+    if (++retry_count == 10000) {
+      LOG(ERROR) << "setting cpu " << cpu << (online ? " online" : " offline") << " seems not to take effect";
+      return false;
+    }
+    usleep(1000);
+  }
+  return true;
+}
+
+static int GetCpuCount() {
+  return static_cast<int>(sysconf(_SC_NPROCESSORS_CONF));
+}
+
+class CpuOnlineRestorer {
+ public:
+  CpuOnlineRestorer() {
+    for (int cpu = 1; cpu < GetCpuCount(); ++cpu) {
+      bool has_error;
+      bool ret = IsCpuOnline(cpu, &has_error);
+      if (has_error) {
+        continue;
+      }
+      online_map_[cpu] = ret;
+    }
+  }
+
+  ~CpuOnlineRestorer() {
+    for (const auto& pair : online_map_) {
+      SetCpuOnline(pair.first, pair.second);
+    }
+  }
+
+ private:
+  std::unordered_map<int, bool> online_map_;
+};
+
+bool FindAHotpluggableCpu(int* hotpluggable_cpu) {
+  if (!IsRoot()) {
+    GTEST_LOG_(INFO) << "This test needs root privilege to hotplug cpu.";
+    return false;
+  }
+  for (int cpu = 1; cpu < GetCpuCount(); ++cpu) {
+    bool has_error;
+    bool online = IsCpuOnline(cpu, &has_error);
+    if (has_error) {
+      continue;
+    }
+    if (SetCpuOnline(cpu, !online)) {
+      *hotpluggable_cpu = cpu;
+      return true;
+    }
+  }
+  GTEST_LOG_(INFO) << "There is no hotpluggable cpu.";
+  return false;
+}
+
+struct CpuToggleThreadArg {
+  int toggle_cpu;
+  std::atomic<bool> end_flag;
+};
+
+static void CpuToggleThread(CpuToggleThreadArg* arg) {
+  while (!arg->end_flag) {
+    CHECK(SetCpuOnline(arg->toggle_cpu, true));
+    CHECK(SetCpuOnline(arg->toggle_cpu, false));
+  }
+}
+
+// http://b/25193162.
+TEST(cpu_offline, offline_while_recording) {
+  ScopedMpdecisionKiller scoped_mpdecision_killer;
+  CpuOnlineRestorer cpuonline_restorer;
+  if (GetCpuCount() == 1) {
+    GTEST_LOG_(INFO) << "This test does nothing, because there is only one cpu in the system.";
+    return;
+  }
+  // Start cpu hotpluger.
+  int test_cpu;
+  if (!FindAHotpluggableCpu(&test_cpu)) {
+    return;
+  }
+  CpuToggleThreadArg cpu_toggle_arg;
+  cpu_toggle_arg.toggle_cpu = test_cpu;
+  cpu_toggle_arg.end_flag = false;
+  std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
+
+  std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType("cpu-cycles");
+  ASSERT_TRUE(event_type_modifier != nullptr);
+  perf_event_attr attr = CreateDefaultPerfEventAttr(event_type_modifier->event_type);
+  attr.disabled = 0;
+  attr.enable_on_exec = 0;
+
+  const std::chrono::minutes test_duration(2);  // Test for 2 minutes.
+  auto end_time = std::chrono::steady_clock::now() + test_duration;
+  size_t iterations = 0;
+
+  while (std::chrono::steady_clock::now() < end_time) {
+    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, false);
+    if (event_fd == nullptr) {
+      // Failed to open because the test_cpu is offline.
+      continue;
+    }
+    iterations++;
+    GTEST_LOG_(INFO) << "Test offline while recording for " << iterations << " times.";
+  }
+  cpu_toggle_arg.end_flag = true;
+  cpu_toggle_thread.join();
+}
+
+// http://b/25193162.
+TEST(cpu_offline, offline_while_ioctl_enable) {
+  ScopedMpdecisionKiller scoped_mpdecision_killer;
+  CpuOnlineRestorer cpuonline_restorer;
+  if (GetCpuCount() == 1) {
+    GTEST_LOG_(INFO) << "This test does nothing, because there is only one cpu in the system.";
+    return;
+  }
+  // Start cpu hotpluger.
+  int test_cpu;
+  if (!FindAHotpluggableCpu(&test_cpu)) {
+    return;
+  }
+  CpuToggleThreadArg cpu_toggle_arg;
+  cpu_toggle_arg.toggle_cpu = test_cpu;
+  cpu_toggle_arg.end_flag = false;
+  std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
+
+  std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType("cpu-cycles");
+  ASSERT_TRUE(event_type_modifier != nullptr);
+  perf_event_attr attr = CreateDefaultPerfEventAttr(event_type_modifier->event_type);
+  attr.disabled = 1;
+  attr.enable_on_exec = 0;
+
+  const std::chrono::minutes test_duration(2);  // Test for 2 minutes.
+  auto end_time = std::chrono::steady_clock::now() + test_duration;
+  size_t iterations = 0;
+
+  while (std::chrono::steady_clock::now() < end_time) {
+    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, false);
+    if (event_fd == nullptr) {
+      // Failed to open because the test_cpu is offline.
+      continue;
+    }
+    // Wait a little for the event to be installed on test_cpu's perf context.
+    usleep(1000);
+    ASSERT_TRUE(event_fd->EnableEvent());
+    iterations++;
+    GTEST_LOG_(INFO) << "Test offline while ioctl(PERF_EVENT_IOC_ENABLE) for " << iterations << " times.";
+  }
+  cpu_toggle_arg.end_flag = true;
+  cpu_toggle_thread.join();
+}
+
+// http://b/19863147.
+TEST(cpu_offline, offline_while_recording_on_another_cpu) {
+  ScopedMpdecisionKiller scoped_mpdecision_killer;
+  CpuOnlineRestorer cpuonline_restorer;
+
+  if (GetCpuCount() == 1) {
+    GTEST_LOG_(INFO) << "This test does nothing, because there is only one cpu in the system.";
+    return;
+  }
+  int test_cpu;
+  if (!FindAHotpluggableCpu(&test_cpu)) {
+    return;
+  }
+  std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType("cpu-cycles");
+  perf_event_attr attr = CreateDefaultPerfEventAttr(event_type_modifier->event_type);
+  attr.disabled = 0;
+  attr.enable_on_exec = 0;
+
+  const size_t TEST_ITERATION_COUNT = 10u;
+  for (size_t i = 0; i < TEST_ITERATION_COUNT; ++i) {
+    int record_cpu = 0;
+    ASSERT_TRUE(SetCpuOnline(test_cpu, true));
+    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
+    ASSERT_TRUE(event_fd != nullptr);
+    ASSERT_TRUE(SetCpuOnline(test_cpu, false));
+    event_fd = nullptr;
+    event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
+    ASSERT_TRUE(event_fd != nullptr);
+  }
+}
+
+int main(int argc, char** argv) {
+  InitLogging(argv, android::base::StderrLogger);
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp
new file mode 100644
index 0000000..69049e2
--- /dev/null
+++ b/simpleperf/dso.cpp
@@ -0,0 +1,424 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dso.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+
+#include "environment.h"
+#include "read_apk.h"
+#include "read_elf.h"
+#include "utils.h"
+
+static OneTimeFreeAllocator symbol_name_allocator;
+
+Symbol::Symbol(const std::string& name, uint64_t addr, uint64_t len)
+    : addr(addr),
+      len(len),
+      name_(symbol_name_allocator.AllocateString(name)),
+      demangled_name_(nullptr),
+      has_dumped_(false) {}
+
+const char* Symbol::DemangledName() const {
+  if (demangled_name_ == nullptr) {
+    const std::string s = Dso::Demangle(name_);
+    if (s == name_) {
+      demangled_name_ = name_;
+    } else {
+      demangled_name_ = symbol_name_allocator.AllocateString(s);
+    }
+  }
+  return demangled_name_;
+}
+
+bool Dso::demangle_ = true;
+std::string Dso::symfs_dir_;
+std::string Dso::vmlinux_;
+std::string Dso::kallsyms_;
+std::unordered_map<std::string, BuildId> Dso::build_id_map_;
+size_t Dso::dso_count_;
+
+void Dso::SetDemangle(bool demangle) { demangle_ = demangle; }
+
+extern "C" char* __cxa_demangle(const char* mangled_name, char* buf, size_t* n,
+                                int* status);
+
+std::string Dso::Demangle(const std::string& name) {
+  if (!demangle_) {
+    return name;
+  }
+  int status;
+  bool is_linker_symbol = (name.find(linker_prefix) == 0);
+  const char* mangled_str = name.c_str();
+  if (is_linker_symbol) {
+    mangled_str += linker_prefix.size();
+  }
+  std::string result = name;
+  char* demangled_name = __cxa_demangle(mangled_str, nullptr, nullptr, &status);
+  if (status == 0) {
+    if (is_linker_symbol) {
+      result = std::string("[linker]") + demangled_name;
+    } else {
+      result = demangled_name;
+    }
+    free(demangled_name);
+  } else if (is_linker_symbol) {
+    result = std::string("[linker]") + mangled_str;
+  }
+  return result;
+}
+
+bool Dso::SetSymFsDir(const std::string& symfs_dir) {
+  std::string dirname = symfs_dir;
+  if (!dirname.empty()) {
+    if (dirname.back() != '/') {
+      dirname.push_back('/');
+    }
+    if (GetEntriesInDir(symfs_dir).empty()) {
+      LOG(ERROR) << "Invalid symfs_dir '" << symfs_dir << "'";
+      return false;
+    }
+  }
+  symfs_dir_ = dirname;
+  return true;
+}
+
+void Dso::SetVmlinux(const std::string& vmlinux) { vmlinux_ = vmlinux; }
+
+void Dso::SetBuildIds(
+    const std::vector<std::pair<std::string, BuildId>>& build_ids) {
+  std::unordered_map<std::string, BuildId> map;
+  for (auto& pair : build_ids) {
+    LOG(DEBUG) << "build_id_map: " << pair.first << ", "
+               << pair.second.ToString();
+    map.insert(pair);
+  }
+  build_id_map_ = std::move(map);
+}
+
+BuildId Dso::GetExpectedBuildId() {
+  auto it = build_id_map_.find(path_);
+  if (it != build_id_map_.end()) {
+    return it->second;
+  }
+  return BuildId();
+}
+
+std::unique_ptr<Dso> Dso::CreateDso(DsoType dso_type,
+                                    const std::string& dso_path) {
+  static uint64_t id = 0;
+  return std::unique_ptr<Dso>(new Dso(dso_type, ++id, dso_path));
+}
+
+Dso::Dso(DsoType type, uint64_t id, const std::string& path)
+    : type_(type),
+      id_(id),
+      path_(path),
+      debug_file_path_(path),
+      min_vaddr_(std::numeric_limits<uint64_t>::max()),
+      is_loaded_(false),
+      has_dumped_(false) {
+  // Check if file matching path_ exists in symfs directory before using it as
+  // debug_file_path_.
+  if (!symfs_dir_.empty()) {
+    std::string path_in_symfs = symfs_dir_ + path_;
+    std::tuple<bool, std::string, std::string> tuple =
+        SplitUrlInApk(path_in_symfs);
+    std::string file_path =
+        std::get<0>(tuple) ? std::get<1>(tuple) : path_in_symfs;
+    if (IsRegularFile(file_path)) {
+      debug_file_path_ = path_in_symfs;
+    }
+  }
+  size_t pos = path.find_last_of("/\\");
+  if (pos != std::string::npos) {
+    file_name_ = path.substr(pos + 1);
+  } else {
+    file_name_ = path;
+  }
+  dso_count_++;
+}
+
+Dso::~Dso() {
+  if (--dso_count_ == 0) {
+    // Clean up global variables when no longer used.
+    symbol_name_allocator.Clear();
+    demangle_ = true;
+    symfs_dir_.clear();
+    vmlinux_.clear();
+    kallsyms_.clear();
+    build_id_map_.clear();
+  }
+}
+
+const Symbol* Dso::FindSymbol(uint64_t vaddr_in_dso) {
+  if (!is_loaded_) {
+    is_loaded_ = true;
+    // If symbols has been read from SymbolRecords, no need to load them from
+    // dso.
+    if (symbols_.empty()) {
+      if (!Load()) {
+        LOG(DEBUG) << "failed to load dso: " << path_;
+        return nullptr;
+      }
+    }
+  }
+  if (symbols_.empty()) {
+    return nullptr;
+  }
+
+  auto it = symbols_.upper_bound(Symbol("", vaddr_in_dso, 0));
+  if (it != symbols_.begin()) {
+    --it;
+    if (it->addr <= vaddr_in_dso && it->addr + it->len > vaddr_in_dso) {
+      return &*it;
+    }
+  }
+  return nullptr;
+}
+
+uint64_t Dso::MinVirtualAddress() {
+  if (min_vaddr_ == std::numeric_limits<uint64_t>::max()) {
+    min_vaddr_ = 0;
+    if (type_ == DSO_ELF_FILE) {
+      BuildId build_id = GetExpectedBuildId();
+
+      uint64_t addr;
+      ElfStatus result = ReadMinExecutableVirtualAddressFromElfFile(
+          GetDebugFilePath(), build_id, &addr);
+      if (result != ElfStatus::NO_ERROR) {
+        LOG(WARNING) << "failed to read min virtual address of "
+                     << GetDebugFilePath() << ": " << result;
+      } else {
+        min_vaddr_ = addr;
+      }
+    }
+  }
+  return min_vaddr_;
+}
+
+bool Dso::Load() {
+  bool result = false;
+  switch (type_) {
+    case DSO_KERNEL:
+      result = LoadKernel();
+      break;
+    case DSO_KERNEL_MODULE:
+      result = LoadKernelModule();
+      break;
+    case DSO_ELF_FILE: {
+      if (std::get<0>(SplitUrlInApk(path_))) {
+        result = LoadEmbeddedElfFile();
+      } else {
+        result = LoadElfFile();
+      }
+      break;
+    }
+  }
+  if (result) {
+    FixupSymbolLength();
+  } else {
+    symbols_.clear();
+  }
+  return result;
+}
+
+static bool IsKernelFunctionSymbol(const KernelSymbol& symbol) {
+  return (symbol.type == 'T' || symbol.type == 't' || symbol.type == 'W' ||
+          symbol.type == 'w');
+}
+
+static bool KernelSymbolCallback(const KernelSymbol& kernel_symbol, Dso* dso) {
+  if (IsKernelFunctionSymbol(kernel_symbol)) {
+    dso->InsertSymbol(Symbol(kernel_symbol.name, kernel_symbol.addr, 0));
+  }
+  return false;
+}
+
+static void VmlinuxSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso) {
+  if (elf_symbol.is_func) {
+    dso->InsertSymbol(
+        Symbol(elf_symbol.name, elf_symbol.vaddr, elf_symbol.len));
+  }
+}
+
+bool CheckReadSymbolResult(ElfStatus result, const std::string& filename) {
+  if (result == ElfStatus::NO_ERROR) {
+    return true;
+  } else if (result == ElfStatus::NO_SYMBOL_TABLE) {
+    // Lacking symbol table isn't considered as an error but worth reporting.
+    LOG(WARNING) << filename << " doesn't contain symbol table";
+    return true;
+  } else {
+    LOG(WARNING) << "failed to read symbols from " << filename
+                 << ": " << result;
+    return false;
+  }
+}
+
+bool Dso::LoadKernel() {
+  BuildId build_id = GetExpectedBuildId();
+  if (!vmlinux_.empty()) {
+    ElfStatus result = ParseSymbolsFromElfFile(vmlinux_, build_id,
+        std::bind(VmlinuxSymbolCallback, std::placeholders::_1, this));
+    return CheckReadSymbolResult(result, vmlinux_);
+  } else if (!kallsyms_.empty()) {
+    ProcessKernelSymbols(kallsyms_, std::bind(&KernelSymbolCallback,
+                                              std::placeholders::_1, this));
+    bool all_zero = true;
+    for (const auto& symbol : symbols_) {
+      if (symbol.addr != 0) {
+        all_zero = false;
+        break;
+      }
+    }
+    if (all_zero) {
+      LOG(WARNING)
+          << "Symbol addresses in /proc/kallsyms on device are all zero. "
+             "`echo 0 >/proc/sys/kernel/kptr_restrict` if possible.";
+      symbols_.clear();
+      return false;
+    }
+  } else {
+    if (!build_id.IsEmpty()) {
+      BuildId real_build_id;
+      if (!GetKernelBuildId(&real_build_id)) {
+        return false;
+      }
+      bool match = (build_id == real_build_id);
+      if (!match) {
+        LOG(WARNING) << "failed to read symbols from /proc/kallsyms: Build id "
+                     << "mismatch";
+        return false;
+      }
+    }
+
+    std::string kallsyms;
+    if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
+      LOG(DEBUG) << "failed to read /proc/kallsyms";
+      return false;
+    }
+    ProcessKernelSymbols(kallsyms, std::bind(&KernelSymbolCallback,
+                                             std::placeholders::_1, this));
+    bool all_zero = true;
+    for (const auto& symbol : symbols_) {
+      if (symbol.addr != 0) {
+        all_zero = false;
+        break;
+      }
+    }
+    if (all_zero) {
+      LOG(WARNING) << "Symbol addresses in /proc/kallsyms are all zero. "
+                      "`echo 0 >/proc/sys/kernel/kptr_restrict` if possible.";
+      symbols_.clear();
+      return false;
+    }
+  }
+  return true;
+}
+
+static void ElfFileSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso,
+                                  bool (*filter)(const ElfFileSymbol&)) {
+  if (filter(elf_symbol)) {
+    dso->InsertSymbol(
+        Symbol(elf_symbol.name, elf_symbol.vaddr, elf_symbol.len));
+  }
+}
+
+static bool SymbolFilterForKernelModule(const ElfFileSymbol& elf_symbol) {
+  // TODO: Parse symbol outside of .text section.
+  return (elf_symbol.is_func && elf_symbol.is_in_text_section);
+}
+
+bool Dso::LoadKernelModule() {
+  BuildId build_id = GetExpectedBuildId();
+  ElfStatus result = ParseSymbolsFromElfFile(GetDebugFilePath(), build_id,
+      std::bind(ElfFileSymbolCallback, std::placeholders::_1, this,
+                SymbolFilterForKernelModule));
+  return CheckReadSymbolResult(result, GetDebugFilePath());
+}
+
+static bool SymbolFilterForDso(const ElfFileSymbol& elf_symbol) {
+  return elf_symbol.is_func ||
+         (elf_symbol.is_label && elf_symbol.is_in_text_section);
+}
+
+bool Dso::LoadElfFile() {
+  BuildId build_id = GetExpectedBuildId();
+
+  if (symfs_dir_.empty()) {
+    // Linux host can store debug shared libraries in /usr/lib/debug.
+    ElfStatus result = ParseSymbolsFromElfFile(
+        "/usr/lib/debug" + path_, build_id,
+        std::bind(ElfFileSymbolCallback, std::placeholders::_1, this,
+                  SymbolFilterForDso));
+    if (result == ElfStatus::NO_ERROR) {
+      return true;
+    }
+  }
+  ElfStatus result = ParseSymbolsFromElfFile(
+      GetDebugFilePath(), build_id,
+      std::bind(ElfFileSymbolCallback, std::placeholders::_1, this,
+                SymbolFilterForDso));
+  return CheckReadSymbolResult(result, GetDebugFilePath());
+}
+
+bool Dso::LoadEmbeddedElfFile() {
+  BuildId build_id = GetExpectedBuildId();
+  auto tuple = SplitUrlInApk(GetDebugFilePath());
+  CHECK(std::get<0>(tuple));
+  ElfStatus result = ParseSymbolsFromApkFile(
+      std::get<1>(tuple), std::get<2>(tuple), build_id,
+      std::bind(ElfFileSymbolCallback, std::placeholders::_1, this,
+                SymbolFilterForDso));
+  return CheckReadSymbolResult(result, GetDebugFilePath());
+}
+
+void Dso::InsertSymbol(const Symbol& symbol) { symbols_.insert(symbol); }
+
+void Dso::FixupSymbolLength() {
+  Symbol* prev_symbol = nullptr;
+  for (auto& symbol : symbols_) {
+    if (prev_symbol != nullptr && prev_symbol->len == 0) {
+      prev_symbol->len = symbol.addr - prev_symbol->addr;
+    }
+    prev_symbol = const_cast<Symbol*>(&symbol);
+  }
+  if (prev_symbol != nullptr && prev_symbol->len == 0) {
+    prev_symbol->len = std::numeric_limits<uint64_t>::max() - prev_symbol->addr;
+  }
+}
+
+const char* DsoTypeToString(DsoType dso_type) {
+  switch (dso_type) {
+    case DSO_KERNEL:
+      return "dso_kernel";
+    case DSO_KERNEL_MODULE:
+      return "dso_kernel_module";
+    case DSO_ELF_FILE:
+      return "dso_elf_file";
+    default:
+      return "unknown";
+  }
+}
diff --git a/simpleperf/dso.h b/simpleperf/dso.h
new file mode 100644
index 0000000..c381e6d
--- /dev/null
+++ b/simpleperf/dso.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_DSO_H_
+#define SIMPLE_PERF_DSO_H_
+
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "build_id.h"
+
+struct Symbol {
+  uint64_t addr;
+  uint64_t len;
+
+  Symbol(const std::string& name, uint64_t addr, uint64_t len);
+  const char* Name() const { return name_; }
+
+  const char* DemangledName() const;
+
+  bool HasDumped() const { return has_dumped_; }
+
+  void SetDumped() const { has_dumped_ = true; }
+
+ private:
+  const char* name_;
+  mutable const char* demangled_name_;
+  mutable bool has_dumped_;
+};
+
+struct SymbolComparator {
+  bool operator()(const Symbol& symbol1, const Symbol& symbol2) {
+    return symbol1.addr < symbol2.addr;
+  }
+};
+
+enum DsoType {
+  DSO_KERNEL,
+  DSO_KERNEL_MODULE,
+  DSO_ELF_FILE,
+};
+
+struct KernelSymbol;
+struct ElfFileSymbol;
+
+struct Dso {
+ public:
+  static void SetDemangle(bool demangle);
+  static std::string Demangle(const std::string& name);
+  static bool SetSymFsDir(const std::string& symfs_dir);
+  static void SetVmlinux(const std::string& vmlinux);
+  static void SetKallsyms(std::string kallsyms) {
+    if (!kallsyms.empty()) {
+      kallsyms_ = std::move(kallsyms);
+    }
+  }
+  static void SetBuildIds(
+      const std::vector<std::pair<std::string, BuildId>>& build_ids);
+
+  static std::unique_ptr<Dso> CreateDso(DsoType dso_type,
+                                        const std::string& dso_path);
+
+  ~Dso();
+
+  DsoType type() const { return type_; }
+
+  uint64_t id() const { return id_; }
+
+  // Return the path recorded in perf.data.
+  const std::string& Path() const { return path_; }
+  // Return the path containing symbol table and debug information.
+  const std::string& GetDebugFilePath() const { return debug_file_path_; }
+  // Return the file name without directory info.
+  const std::string& FileName() const { return file_name_; }
+
+  bool HasDumped() const { return has_dumped_; }
+
+  void SetDumped() { has_dumped_ = true; }
+
+  // Return the minimum virtual address in program header.
+  uint64_t MinVirtualAddress();
+  void SetMinVirtualAddress(uint64_t min_vaddr) { min_vaddr_ = min_vaddr; }
+
+  const Symbol* FindSymbol(uint64_t vaddr_in_dso);
+  void InsertSymbol(const Symbol& symbol);
+
+ private:
+  static bool demangle_;
+  static std::string symfs_dir_;
+  static std::string vmlinux_;
+  static std::string kallsyms_;
+  static std::unordered_map<std::string, BuildId> build_id_map_;
+  static size_t dso_count_;
+
+  Dso(DsoType type, uint64_t id, const std::string& path);
+  bool Load();
+  bool LoadKernel();
+  bool LoadKernelModule();
+  bool LoadElfFile();
+  bool LoadEmbeddedElfFile();
+  void FixupSymbolLength();
+  BuildId GetExpectedBuildId();
+
+  const DsoType type_;
+  const uint64_t id_;
+  // path of the shared library used by the profiled program
+  const std::string path_;
+  // path of the shared library having symbol table and debug information
+  // It is the same as path_, or has the same build id as path_.
+  std::string debug_file_path_;
+  // File name of the shared library, got by removing directories in path_.
+  std::string file_name_;
+  uint64_t min_vaddr_;
+  std::set<Symbol, SymbolComparator> symbols_;
+  bool is_loaded_;
+  bool has_dumped_;
+};
+
+const char* DsoTypeToString(DsoType dso_type);
+
+#endif  // SIMPLE_PERF_DSO_H_
diff --git a/simpleperf/dwarf_unwind.cpp b/simpleperf/dwarf_unwind.cpp
new file mode 100644
index 0000000..5444ef9
--- /dev/null
+++ b/simpleperf/dwarf_unwind.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dwarf_unwind.h"
+
+#include <ucontext.h>
+
+#include <backtrace/Backtrace.h>
+#include <android-base/logging.h>
+
+#include "thread_tree.h"
+
+#define SetUContextReg(dst, perf_regno)          \
+  do {                                           \
+    uint64_t value;                              \
+    if (GetRegValue(regs, perf_regno, &value)) { \
+      (dst) = value;                             \
+    }                                            \
+  } while (0)
+
+static ucontext_t BuildUContextFromRegs(const RegSet& regs __attribute__((unused))) {
+  ucontext_t ucontext;
+  memset(&ucontext, 0, sizeof(ucontext));
+#if defined(__i386__)
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_GS], PERF_REG_X86_GS);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_FS], PERF_REG_X86_FS);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ES], PERF_REG_X86_ES);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_DS], PERF_REG_X86_DS);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EAX], PERF_REG_X86_AX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EBX], PERF_REG_X86_BX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ECX], PERF_REG_X86_CX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EDX], PERF_REG_X86_DX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ESI], PERF_REG_X86_SI);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EDI], PERF_REG_X86_DI);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EBP], PERF_REG_X86_BP);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EIP], PERF_REG_X86_IP);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ESP], PERF_REG_X86_SP);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_CS], PERF_REG_X86_CS);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EFL], PERF_REG_X86_FLAGS);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_SS], PERF_REG_X86_SS);
+#elif defined(__x86_64__)
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R8], PERF_REG_X86_R8);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R9], PERF_REG_X86_R9);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R10], PERF_REG_X86_R10);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R11], PERF_REG_X86_R11);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R12], PERF_REG_X86_R12);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R13], PERF_REG_X86_R13);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R14], PERF_REG_X86_R14);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R15], PERF_REG_X86_R15);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RDI], PERF_REG_X86_DI);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RSI], PERF_REG_X86_SI);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RBP], PERF_REG_X86_BP);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RBX], PERF_REG_X86_BX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RDX], PERF_REG_X86_DX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RAX], PERF_REG_X86_AX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RCX], PERF_REG_X86_CX);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RSP], PERF_REG_X86_SP);
+  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RIP], PERF_REG_X86_IP);
+#elif defined(__aarch64__)
+  for (size_t i = PERF_REG_ARM64_X0; i < PERF_REG_ARM64_MAX; ++i) {
+    SetUContextReg(ucontext.uc_mcontext.regs[i], i);
+  }
+#elif defined(__arm__)
+  SetUContextReg(ucontext.uc_mcontext.arm_r0, PERF_REG_ARM_R0);
+  SetUContextReg(ucontext.uc_mcontext.arm_r1, PERF_REG_ARM_R1);
+  SetUContextReg(ucontext.uc_mcontext.arm_r2, PERF_REG_ARM_R2);
+  SetUContextReg(ucontext.uc_mcontext.arm_r3, PERF_REG_ARM_R3);
+  SetUContextReg(ucontext.uc_mcontext.arm_r4, PERF_REG_ARM_R4);
+  SetUContextReg(ucontext.uc_mcontext.arm_r5, PERF_REG_ARM_R5);
+  SetUContextReg(ucontext.uc_mcontext.arm_r6, PERF_REG_ARM_R6);
+  SetUContextReg(ucontext.uc_mcontext.arm_r7, PERF_REG_ARM_R7);
+  SetUContextReg(ucontext.uc_mcontext.arm_r8, PERF_REG_ARM_R8);
+  SetUContextReg(ucontext.uc_mcontext.arm_r9, PERF_REG_ARM_R9);
+  SetUContextReg(ucontext.uc_mcontext.arm_r10, PERF_REG_ARM_R10);
+  SetUContextReg(ucontext.uc_mcontext.arm_fp, PERF_REG_ARM_FP);
+  SetUContextReg(ucontext.uc_mcontext.arm_ip, PERF_REG_ARM_IP);
+  SetUContextReg(ucontext.uc_mcontext.arm_sp, PERF_REG_ARM_SP);
+  SetUContextReg(ucontext.uc_mcontext.arm_lr, PERF_REG_ARM_LR);
+  SetUContextReg(ucontext.uc_mcontext.arm_pc, PERF_REG_ARM_PC);
+#endif
+  return ucontext;
+}
+
+std::vector<uint64_t> UnwindCallChain(ArchType arch, const ThreadEntry& thread,
+                                      const RegSet& regs, const char* stack,
+                                      size_t stack_size, bool strict_arch_check) {
+  std::vector<uint64_t> result;
+  if (!IsArchTheSame(arch, GetBuildArch(), strict_arch_check)) {
+    LOG(FATAL) << "simpleperf is built in arch " << GetArchString(GetBuildArch())
+            << ", and can't do stack unwinding for arch " << GetArchString(arch);
+    return result;
+  }
+  uint64_t sp_reg_value;
+  if (!GetSpRegValue(regs, arch, &sp_reg_value)) {
+    LOG(ERROR) << "can't get sp reg value";
+    return result;
+  }
+  uint64_t stack_addr = sp_reg_value;
+
+  std::vector<backtrace_map_t> bt_maps(thread.maps.size());
+  size_t map_index = 0;
+  for (auto& map : thread.maps) {
+    backtrace_map_t& bt_map = bt_maps[map_index++];
+    bt_map.start = map->start_addr;
+    bt_map.end = map->start_addr + map->len;
+    bt_map.offset = map->pgoff;
+    bt_map.name = map->dso->GetDebugFilePath();
+  }
+  std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(thread.pid, bt_maps));
+
+  backtrace_stackinfo_t stack_info;
+  stack_info.start = stack_addr;
+  stack_info.end = stack_addr + stack_size;
+  stack_info.data = reinterpret_cast<const uint8_t*>(stack);
+
+  std::unique_ptr<Backtrace> backtrace(
+      Backtrace::CreateOffline(thread.pid, thread.tid, backtrace_map.get(), stack_info, true));
+  ucontext_t ucontext = BuildUContextFromRegs(regs);
+  if (backtrace->Unwind(0, &ucontext)) {
+    for (auto it = backtrace->begin(); it != backtrace->end(); ++it) {
+      // Unwinding in arm architecture can return 0 pc address.
+      if (it->pc == 0) {
+        break;
+      }
+      result.push_back(it->pc);
+    }
+  }
+  return result;
+}
diff --git a/simpleperf/dwarf_unwind.h b/simpleperf/dwarf_unwind.h
new file mode 100644
index 0000000..2a28a9e
--- /dev/null
+++ b/simpleperf/dwarf_unwind.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_DWARF_UNWIND_H_
+#define SIMPLE_PERF_DWARF_UNWIND_H_
+
+#include <vector>
+
+#include "perf_regs.h"
+
+namespace simpleperf {
+struct ThreadEntry;
+}
+
+using ThreadEntry = simpleperf::ThreadEntry;
+
+std::vector<uint64_t> UnwindCallChain(ArchType arch, const ThreadEntry& thread, const RegSet& regs,
+                                      const char* stack, size_t stack_size, bool strict_arch_check);
+
+#endif  // SIMPLE_PERF_DWARF_UNWIND_H_
diff --git a/simpleperf/environment.cpp b/simpleperf/environment.cpp
new file mode 100644
index 0000000..621bc9f
--- /dev/null
+++ b/simpleperf/environment.cpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "environment.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <limits>
+#include <set>
+#include <unordered_map>
+#include <vector>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/parseint.h>
+#include <android-base/strings.h>
+#include <android-base/stringprintf.h>
+
+#if defined(__ANDROID__)
+#include <sys/system_properties.h>
+#endif
+
+#include "read_elf.h"
+#include "utils.h"
+
+class LineReader {
+ public:
+  explicit LineReader(FILE* fp) : fp_(fp), buf_(nullptr), bufsize_(0) {
+  }
+
+  ~LineReader() {
+    free(buf_);
+    fclose(fp_);
+  }
+
+  char* ReadLine() {
+    if (getline(&buf_, &bufsize_, fp_) != -1) {
+      return buf_;
+    }
+    return nullptr;
+  }
+
+  size_t MaxLineSize() {
+    return bufsize_;
+  }
+
+ private:
+  FILE* fp_;
+  char* buf_;
+  size_t bufsize_;
+};
+
+std::vector<int> GetOnlineCpus() {
+  std::vector<int> result;
+  FILE* fp = fopen("/sys/devices/system/cpu/online", "re");
+  if (fp == nullptr) {
+    PLOG(ERROR) << "can't open online cpu information";
+    return result;
+  }
+
+  LineReader reader(fp);
+  char* line;
+  if ((line = reader.ReadLine()) != nullptr) {
+    result = GetCpusFromString(line);
+  }
+  CHECK(!result.empty()) << "can't get online cpu information";
+  return result;
+}
+
+std::vector<int> GetCpusFromString(const std::string& s) {
+  std::set<int> cpu_set;
+  bool have_dash = false;
+  const char* p = s.c_str();
+  char* endp;
+  int last_cpu;
+  int cpu;
+  // Parse line like: 0,1-3, 5, 7-8
+  while ((cpu = static_cast<int>(strtol(p, &endp, 10))) != 0 || endp != p) {
+    if (have_dash && !cpu_set.empty()) {
+      for (int t = last_cpu + 1; t < cpu; ++t) {
+        cpu_set.insert(t);
+      }
+    }
+    have_dash = false;
+    cpu_set.insert(cpu);
+    last_cpu = cpu;
+    p = endp;
+    while (!isdigit(*p) && *p != '\0') {
+      if (*p == '-') {
+        have_dash = true;
+      }
+      ++p;
+    }
+  }
+  return std::vector<int>(cpu_set.begin(), cpu_set.end());
+}
+
+static std::vector<KernelMmap> GetLoadedModules() {
+  std::vector<KernelMmap> result;
+  FILE* fp = fopen("/proc/modules", "re");
+  if (fp == nullptr) {
+    // There is no /proc/modules on Android devices, so we don't print error if failed to open it.
+    PLOG(DEBUG) << "failed to open file /proc/modules";
+    return result;
+  }
+  LineReader reader(fp);
+  char* line;
+  while ((line = reader.ReadLine()) != nullptr) {
+    // Parse line like: nf_defrag_ipv6 34768 1 nf_conntrack_ipv6, Live 0xffffffffa0fe5000
+    char name[reader.MaxLineSize()];
+    uint64_t addr;
+    if (sscanf(line, "%s%*lu%*u%*s%*s 0x%" PRIx64, name, &addr) == 2) {
+      KernelMmap map;
+      map.name = name;
+      map.start_addr = addr;
+      result.push_back(map);
+    }
+  }
+  bool all_zero = true;
+  for (const auto& map : result) {
+    if (map.start_addr != 0) {
+      all_zero = false;
+    }
+  }
+  if (all_zero) {
+    LOG(DEBUG) << "addresses in /proc/modules are all zero, so ignore kernel modules";
+    return std::vector<KernelMmap>();
+  }
+  return result;
+}
+
+static std::string GetLinuxVersion() {
+  std::string content;
+  if (android::base::ReadFileToString("/proc/version", &content)) {
+    char s[content.size() + 1];
+    if (sscanf(content.c_str(), "Linux version %s", s) == 1) {
+      return s;
+    }
+  }
+  PLOG(FATAL) << "can't read linux version";
+  return "";
+}
+
+static void GetAllModuleFiles(const std::string& path,
+                              std::unordered_map<std::string, std::string>* module_file_map) {
+  for (const auto& name : GetEntriesInDir(path)) {
+    std::string entry_path = path + "/" + name;
+    if (IsRegularFile(entry_path) && android::base::EndsWith(name, ".ko")) {
+      std::string module_name = name.substr(0, name.size() - 3);
+      std::replace(module_name.begin(), module_name.end(), '-', '_');
+      module_file_map->insert(std::make_pair(module_name, entry_path));
+    } else if (IsDir(entry_path)) {
+      GetAllModuleFiles(entry_path, module_file_map);
+    }
+  }
+}
+
+static std::vector<KernelMmap> GetModulesInUse() {
+  // TODO: There is no /proc/modules or /lib/modules on Android, find methods work on it.
+  std::vector<KernelMmap> module_mmaps = GetLoadedModules();
+  std::string linux_version = GetLinuxVersion();
+  std::string module_dirpath = "/lib/modules/" + linux_version + "/kernel";
+  std::unordered_map<std::string, std::string> module_file_map;
+  GetAllModuleFiles(module_dirpath, &module_file_map);
+  for (auto& module : module_mmaps) {
+    auto it = module_file_map.find(module.name);
+    if (it != module_file_map.end()) {
+      module.filepath = it->second;
+    }
+  }
+  return module_mmaps;
+}
+
+void GetKernelAndModuleMmaps(KernelMmap* kernel_mmap, std::vector<KernelMmap>* module_mmaps) {
+  kernel_mmap->name = DEFAULT_KERNEL_MMAP_NAME;
+  kernel_mmap->start_addr = 0;
+  kernel_mmap->filepath = kernel_mmap->name;
+  *module_mmaps = GetModulesInUse();
+  for (auto& map : *module_mmaps) {
+    if (map.filepath.empty()) {
+      map.filepath = "[" + map.name + "]";
+    }
+  }
+
+  if (module_mmaps->size() == 0) {
+    kernel_mmap->len = std::numeric_limits<uint64_t>::max() - kernel_mmap->start_addr;
+  } else {
+    std::sort(
+        module_mmaps->begin(), module_mmaps->end(),
+        [](const KernelMmap& m1, const KernelMmap& m2) { return m1.start_addr < m2.start_addr; });
+    // When not having enough privilege, all addresses are read as 0.
+    if (kernel_mmap->start_addr == (*module_mmaps)[0].start_addr) {
+      kernel_mmap->len = 0;
+    } else {
+      kernel_mmap->len = (*module_mmaps)[0].start_addr - kernel_mmap->start_addr - 1;
+    }
+    for (size_t i = 0; i + 1 < module_mmaps->size(); ++i) {
+      if ((*module_mmaps)[i].start_addr == (*module_mmaps)[i + 1].start_addr) {
+        (*module_mmaps)[i].len = 0;
+      } else {
+        (*module_mmaps)[i].len =
+            (*module_mmaps)[i + 1].start_addr - (*module_mmaps)[i].start_addr - 1;
+      }
+    }
+    module_mmaps->back().len =
+        std::numeric_limits<uint64_t>::max() - module_mmaps->back().start_addr;
+  }
+}
+
+static bool ReadThreadNameAndTgid(const std::string& status_file, std::string* comm, pid_t* tgid) {
+  FILE* fp = fopen(status_file.c_str(), "re");
+  if (fp == nullptr) {
+    return false;
+  }
+  bool read_comm = false;
+  bool read_tgid = false;
+  LineReader reader(fp);
+  char* line;
+  while ((line = reader.ReadLine()) != nullptr) {
+    char s[reader.MaxLineSize()];
+    if (sscanf(line, "Name:%s", s) == 1) {
+      *comm = s;
+      read_comm = true;
+    } else if (sscanf(line, "Tgid:%d", tgid) == 1) {
+      read_tgid = true;
+    }
+    if (read_comm && read_tgid) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static std::vector<pid_t> GetThreadsInProcess(pid_t pid) {
+  std::vector<pid_t> result;
+  std::string task_dirname = android::base::StringPrintf("/proc/%d/task", pid);
+  for (const auto& name : GetSubDirs(task_dirname)) {
+    int tid;
+    if (!android::base::ParseInt(name.c_str(), &tid, 0)) {
+      continue;
+    }
+    result.push_back(tid);
+  }
+  return result;
+}
+
+static bool GetThreadComm(pid_t pid, std::vector<ThreadComm>* thread_comms) {
+  std::vector<pid_t> tids = GetThreadsInProcess(pid);
+  for (auto& tid : tids) {
+    std::string status_file = android::base::StringPrintf("/proc/%d/task/%d/status", pid, tid);
+    std::string comm;
+    pid_t tgid;
+    // It is possible that the process or thread exited before we can read its status.
+    if (!ReadThreadNameAndTgid(status_file, &comm, &tgid)) {
+      continue;
+    }
+    CHECK_EQ(pid, tgid);
+    ThreadComm thread;
+    thread.tid = tid;
+    thread.pid = pid;
+    thread.comm = comm;
+    thread_comms->push_back(thread);
+  }
+  return true;
+}
+
+bool GetThreadComms(std::vector<ThreadComm>* thread_comms) {
+  thread_comms->clear();
+  for (const auto& name : GetSubDirs("/proc")) {
+    int pid;
+    if (!android::base::ParseInt(name.c_str(), &pid, 0)) {
+      continue;
+    }
+    if (!GetThreadComm(pid, thread_comms)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool GetThreadMmapsInProcess(pid_t pid, std::vector<ThreadMmap>* thread_mmaps) {
+  std::string map_file = android::base::StringPrintf("/proc/%d/maps", pid);
+  FILE* fp = fopen(map_file.c_str(), "re");
+  if (fp == nullptr) {
+    PLOG(DEBUG) << "can't open file " << map_file;
+    return false;
+  }
+  thread_mmaps->clear();
+  LineReader reader(fp);
+  char* line;
+  while ((line = reader.ReadLine()) != nullptr) {
+    // Parse line like: 00400000-00409000 r-xp 00000000 fc:00 426998  /usr/lib/gvfs/gvfsd-http
+    uint64_t start_addr, end_addr, pgoff;
+    char type[reader.MaxLineSize()];
+    char execname[reader.MaxLineSize()];
+    strcpy(execname, "");
+    if (sscanf(line, "%" PRIx64 "-%" PRIx64 " %s %" PRIx64 " %*x:%*x %*u %s\n", &start_addr,
+               &end_addr, type, &pgoff, execname) < 4) {
+      continue;
+    }
+    if (strcmp(execname, "") == 0) {
+      strcpy(execname, DEFAULT_EXECNAME_FOR_THREAD_MMAP);
+    }
+    ThreadMmap thread;
+    thread.start_addr = start_addr;
+    thread.len = end_addr - start_addr;
+    thread.pgoff = pgoff;
+    thread.name = execname;
+    thread.executable = (type[2] == 'x');
+    thread_mmaps->push_back(thread);
+  }
+  return true;
+}
+
+bool GetKernelBuildId(BuildId* build_id) {
+  ElfStatus result = GetBuildIdFromNoteFile("/sys/kernel/notes", build_id);
+  if (result != ElfStatus::NO_ERROR) {
+    LOG(WARNING) << "failed to read /sys/kernel/notes: " << result;
+  }
+  return result == ElfStatus::NO_ERROR;
+}
+
+bool GetModuleBuildId(const std::string& module_name, BuildId* build_id) {
+  std::string notefile = "/sys/module/" + module_name + "/notes/.note.gnu.build-id";
+  return GetBuildIdFromNoteFile(notefile, build_id);
+}
+
+bool GetValidThreadsFromProcessString(const std::string& pid_str, std::set<pid_t>* tid_set) {
+  std::vector<std::string> strs = android::base::Split(pid_str, ",");
+  for (const auto& s : strs) {
+    int pid;
+    if (!android::base::ParseInt(s.c_str(), &pid, 0)) {
+      LOG(ERROR) << "Invalid pid '" << s << "'";
+      return false;
+    }
+    std::vector<pid_t> tids = GetThreadsInProcess(pid);
+    if (tids.empty()) {
+      LOG(ERROR) << "Non existing process '" << pid << "'";
+      return false;
+    }
+    tid_set->insert(tids.begin(), tids.end());
+  }
+  return true;
+}
+
+bool GetValidThreadsFromThreadString(const std::string& tid_str, std::set<pid_t>* tid_set) {
+  std::vector<std::string> strs = android::base::Split(tid_str, ",");
+  for (const auto& s : strs) {
+    int tid;
+    if (!android::base::ParseInt(s.c_str(), &tid, 0)) {
+      LOG(ERROR) << "Invalid tid '" << s << "'";
+      return false;
+    }
+    if (!IsDir(android::base::StringPrintf("/proc/%d", tid))) {
+      LOG(ERROR) << "Non existing thread '" << tid << "'";
+      return false;
+    }
+    tid_set->insert(tid);
+  }
+  return true;
+}
+
+bool GetExecPath(std::string* exec_path) {
+  char path[PATH_MAX];
+  ssize_t path_len = readlink("/proc/self/exe", path, sizeof(path));
+  if (path_len <= 0 || path_len >= static_cast<ssize_t>(sizeof(path))) {
+    PLOG(ERROR) << "readlink failed";
+    return false;
+  }
+  path[path_len] = '\0';
+  *exec_path = path;
+  return true;
+}
+
+/*
+ * perf event paranoia level:
+ *  -1 - not paranoid at all
+ *   0 - disallow raw tracepoint access for unpriv
+ *   1 - disallow cpu events for unpriv
+ *   2 - disallow kernel profiling for unpriv
+ *   3 - disallow user profiling for unpriv
+ */
+static bool ReadPerfEventParanoid(int* value) {
+  std::string s;
+  if (!android::base::ReadFileToString("/proc/sys/kernel/perf_event_paranoid", &s)) {
+    PLOG(ERROR) << "failed to read /proc/sys/kernel/perf_event_paranoid";
+    return false;
+  }
+  s = android::base::Trim(s);
+  if (!android::base::ParseInt(s.c_str(), value)) {
+    PLOG(ERROR) << "failed to parse /proc/sys/kernel/perf_event_paranoid: " << s;
+    return false;
+  }
+  return true;
+}
+
+static const char* GetLimitLevelDescription(int limit_level) {
+  switch (limit_level) {
+    case -1: return "unlimited";
+    case 0: return "disallowing raw tracepoint access for unpriv";
+    case 1: return "disallowing cpu events for unpriv";
+    case 2: return "disallowing kernel profiling for unpriv";
+    case 3: return "disallowing user profiling for unpriv";
+    default: return "unknown level";
+  }
+}
+
+bool CheckPerfEventLimit() {
+  // root is not limited by /proc/sys/kernel/perf_event_paranoid.
+  if (IsRoot()) {
+    return true;
+  }
+  int limit_level;
+  if (!ReadPerfEventParanoid(&limit_level)) {
+    return false;
+  }
+  if (limit_level <= 1) {
+    return true;
+  }
+#if defined(__ANDROID__)
+  // Try to enable perf_event_paranoid by setprop security.perf_harden=0.
+  if (__system_property_set("security.perf_harden", "0") == 0) {
+    sleep(1);
+    if (ReadPerfEventParanoid(&limit_level) && limit_level <= 1) {
+      return true;
+    }
+  }
+  LOG(WARNING) << "/proc/sys/kernel/perf_event_paranoid is " << limit_level
+      << ", " << GetLimitLevelDescription(limit_level) << ".";
+  LOG(WARNING) << "Try using `adb shell setprop security.perf_harden 0` to allow profiling.";
+#else
+  LOG(WARNING) << "/proc/sys/kernel/perf_event_paranoid is " << limit_level
+      << ", " << GetLimitLevelDescription(limit_level) << ".";
+#endif
+  return true;
+}
+
+bool CheckSampleFrequency(uint64_t sample_freq) {
+  if (sample_freq == 0) {
+    LOG(ERROR) << "Sample frequency can't be zero.";
+    return false;
+  }
+  std::string s;
+  if (!android::base::ReadFileToString("/proc/sys/kernel/perf_event_max_sample_rate", &s)) {
+    PLOG(WARNING) << "failed to read /proc/sys/kernel/perf_event_max_sample_rate";
+    // Omit the check if perf_event_max_sample_rate doesn't exist.
+    return true;
+  }
+  s = android::base::Trim(s);
+  uint64_t max_sample_freq;
+  if (!android::base::ParseUint(s.c_str(), &max_sample_freq)) {
+    LOG(ERROR) << "failed to parse /proc/sys/kernel/perf_event_max_sample_rate: " << s;
+    return false;
+  }
+  if (sample_freq > max_sample_freq) {
+    LOG(ERROR) << "Sample frequency " << sample_freq << " is out of range [1, "
+        << max_sample_freq << "]";
+    return false;
+  }
+  return true;
+}
+
+bool CheckKernelSymbolAddresses() {
+  const std::string kptr_restrict_file = "/proc/sys/kernel/kptr_restrict";
+  std::string s;
+  if (!android::base::ReadFileToString(kptr_restrict_file, &s)) {
+    PLOG(WARNING) << "failed to read " << kptr_restrict_file;
+    return false;
+  }
+  s = android::base::Trim(s);
+  int value;
+  if (!android::base::ParseInt(s.c_str(), &value)) {
+    LOG(ERROR) << "failed to parse " << kptr_restrict_file << ": " << s;
+    return false;
+  }
+  if (value == 0) {
+    return true;
+  }
+  if (value == 1 && IsRoot()) {
+    return true;
+  }
+  LOG(WARNING) << "Access to kernel symbol addresses is restricted. If "
+      << "possible, please do `echo 0 >/proc/sys/kernel/kptr_restrict` "
+      << "to fix this.";
+  return false;
+}
diff --git a/simpleperf/environment.h b/simpleperf/environment.h
new file mode 100644
index 0000000..2f0f59c
--- /dev/null
+++ b/simpleperf/environment.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_ENVIRONMENT_H_
+#define SIMPLE_PERF_ENVIRONMENT_H_
+
+#include <sys/types.h>
+
+#include <functional>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "build_id.h"
+
+std::vector<int> GetOnlineCpus();
+std::vector<int> GetCpusFromString(const std::string& s);
+
+constexpr char DEFAULT_KERNEL_MMAP_NAME[] = "[kernel.kallsyms]";
+
+struct KernelMmap {
+  std::string name;
+  uint64_t start_addr;
+  uint64_t len;
+  std::string filepath;
+};
+
+void GetKernelAndModuleMmaps(KernelMmap* kernel_mmap, std::vector<KernelMmap>* module_mmaps);
+
+struct ThreadComm {
+  pid_t pid, tid;
+  std::string comm;
+};
+
+bool GetThreadComms(std::vector<ThreadComm>* thread_comms);
+
+constexpr char DEFAULT_EXECNAME_FOR_THREAD_MMAP[] = "//anon";
+
+struct ThreadMmap {
+  uint64_t start_addr;
+  uint64_t len;
+  uint64_t pgoff;
+  std::string name;
+  bool executable;
+};
+
+bool GetThreadMmapsInProcess(pid_t pid, std::vector<ThreadMmap>* thread_mmaps);
+
+constexpr char DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID[] = "[kernel.kallsyms]";
+
+bool GetKernelBuildId(BuildId* build_id);
+bool GetModuleBuildId(const std::string& module_name, BuildId* build_id);
+
+bool GetValidThreadsFromProcessString(const std::string& pid_str, std::set<pid_t>* tid_set);
+bool GetValidThreadsFromThreadString(const std::string& tid_str, std::set<pid_t>* tid_set);
+
+bool GetExecPath(std::string* exec_path);
+
+bool CheckPerfEventLimit();
+bool CheckSampleFrequency(uint64_t sample_freq);
+bool CheckKernelSymbolAddresses();
+
+#endif  // SIMPLE_PERF_ENVIRONMENT_H_
diff --git a/simpleperf/environment_test.cpp b/simpleperf/environment_test.cpp
new file mode 100644
index 0000000..9b4cbab
--- /dev/null
+++ b/simpleperf/environment_test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "environment.h"
+
+TEST(environment, GetCpusFromString) {
+  ASSERT_EQ(GetCpusFromString(""), std::vector<int>());
+  ASSERT_EQ(GetCpusFromString("0-2"), std::vector<int>({0, 1, 2}));
+  ASSERT_EQ(GetCpusFromString("0,2-3"), std::vector<int>({0, 2, 3}));
+  ASSERT_EQ(GetCpusFromString("1,0-3,3,4"), std::vector<int>({0, 1, 2, 3, 4}));
+}
diff --git a/simpleperf/event_attr.cpp b/simpleperf/event_attr.cpp
new file mode 100644
index 0000000..1936448
--- /dev/null
+++ b/simpleperf/event_attr.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "event_attr.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string>
+#include <unordered_map>
+
+#include <android-base/logging.h>
+
+#include "event_type.h"
+#include "utils.h"
+
+static std::string BitsToString(const std::string& name, uint64_t bits,
+                                const std::vector<std::pair<int, std::string>>& bit_names) {
+  std::string result;
+  for (auto& p : bit_names) {
+    if (bits & p.first) {
+      bits &= ~p.first;
+      if (!result.empty()) {
+        result += ", ";
+      }
+      result += p.second;
+    }
+  }
+  if (bits != 0) {
+    LOG(DEBUG) << "unknown " << name << " bits: " << std::hex << bits;
+  }
+  return result;
+}
+
+static std::string SampleTypeToString(uint64_t sample_type) {
+  static std::vector<std::pair<int, std::string>> sample_type_names = {
+      {PERF_SAMPLE_ADDR, "addr"},
+      {PERF_SAMPLE_BRANCH_STACK, "branch_stack"},
+      {PERF_SAMPLE_CALLCHAIN, "callchain"},
+      {PERF_SAMPLE_CPU, "cpu"},
+      {PERF_SAMPLE_ID, "id"},
+      {PERF_SAMPLE_IP, "ip"},
+      {PERF_SAMPLE_PERIOD, "period"},
+      {PERF_SAMPLE_RAW, "raw"},
+      {PERF_SAMPLE_READ, "read"},
+      {PERF_SAMPLE_REGS_USER, "regs_user"},
+      {PERF_SAMPLE_STACK_USER, "stack_user"},
+      {PERF_SAMPLE_STREAM_ID, "stream_id"},
+      {PERF_SAMPLE_TID, "tid"},
+      {PERF_SAMPLE_TIME, "time"},
+  };
+  return BitsToString("sample_type", sample_type, sample_type_names);
+}
+
+static std::string ReadFormatToString(uint64_t read_format) {
+  static std::vector<std::pair<int, std::string>> read_format_names = {
+      {PERF_FORMAT_TOTAL_TIME_ENABLED, "total_time_enabled"},
+      {PERF_FORMAT_TOTAL_TIME_RUNNING, "total_time_running"},
+      {PERF_FORMAT_ID, "id"},
+      {PERF_FORMAT_GROUP, "group"},
+  };
+  return BitsToString("read_format", read_format, read_format_names);
+}
+
+perf_event_attr CreateDefaultPerfEventAttr(const EventType& event_type) {
+  perf_event_attr attr;
+  memset(&attr, 0, sizeof(attr));
+  attr.size = sizeof(perf_event_attr);
+  attr.type = event_type.type;
+  attr.config = event_type.config;
+  attr.mmap = 1;
+  attr.comm = 1;
+  attr.disabled = 0;
+  // Changing read_format affects the layout of the data read from perf_event_file, namely
+  // PerfCounter in event_fd.h.
+  attr.read_format =
+      PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
+  attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_PERIOD |
+      PERF_SAMPLE_CPU | PERF_SAMPLE_ID;
+
+  if (attr.type == PERF_TYPE_TRACEPOINT) {
+    // Tracepoint information are stored in raw data in sample records.
+    attr.sample_type |= PERF_SAMPLE_RAW;
+  }
+  return attr;
+}
+
+void DumpPerfEventAttr(const perf_event_attr& attr, size_t indent) {
+  std::string event_name = GetEventNameByAttr(attr);
+  PrintIndented(indent, "event_attr: for event type %s\n", event_name.c_str());
+
+  PrintIndented(indent + 1, "type %u, size %u, config %llu\n", attr.type, attr.size, attr.config);
+
+  if (attr.freq != 0) {
+    PrintIndented(indent + 1, "sample_freq %llu\n", attr.sample_freq);
+  } else {
+    PrintIndented(indent + 1, "sample_period %llu\n", attr.sample_period);
+  }
+
+  PrintIndented(indent + 1, "sample_type (0x%llx) %s\n", attr.sample_type,
+                SampleTypeToString(attr.sample_type).c_str());
+
+  PrintIndented(indent + 1, "read_format (0x%llx) %s\n", attr.read_format,
+                ReadFormatToString(attr.read_format).c_str());
+
+  PrintIndented(indent + 1, "disabled %u, inherit %u, pinned %u, exclusive %u\n", attr.disabled,
+                attr.inherit, attr.pinned, attr.exclusive);
+
+  PrintIndented(indent + 1, "exclude_user %u, exclude_kernel %u, exclude_hv %u\n",
+                attr.exclude_user, attr.exclude_kernel, attr.exclude_hv);
+
+  PrintIndented(indent + 1, "exclude_idle %u, mmap %u, comm %u, freq %u\n", attr.exclude_idle,
+                attr.mmap, attr.comm, attr.freq);
+
+  PrintIndented(indent + 1, "inherit_stat %u, enable_on_exec %u, task %u\n", attr.inherit_stat,
+                attr.enable_on_exec, attr.task);
+
+  PrintIndented(indent + 1, "watermark %u, precise_ip %u, mmap_data %u\n", attr.watermark,
+                attr.precise_ip, attr.mmap_data);
+
+  PrintIndented(indent + 1, "sample_id_all %u, exclude_host %u, exclude_guest %u\n",
+                attr.sample_id_all, attr.exclude_host, attr.exclude_guest);
+  PrintIndented(indent + 1, "branch_sample_type 0x%" PRIx64 "\n", attr.branch_sample_type);
+  PrintIndented(indent + 1, "exclude_callchain_kernel %u, exclude_callchain_user %u\n",
+                attr.exclude_callchain_kernel, attr.exclude_callchain_user);
+  PrintIndented(indent + 1, "sample_regs_user 0x%" PRIx64 "\n", attr.sample_regs_user);
+  PrintIndented(indent + 1, "sample_stack_user 0x%" PRIx64 "\n", attr.sample_stack_user);
+}
+
+bool GetCommonEventIdPositionsForAttrs(std::vector<perf_event_attr>& attrs,
+                                           size_t* event_id_pos_in_sample_records,
+                                           size_t* event_id_reverse_pos_in_non_sample_records) {
+  // When there are more than one perf_event_attrs, we need to read event id
+  // in each record to decide current record should use which attr. So
+  // we need to determine the event id position in a record here.
+  std::vector<uint64_t> sample_types;
+  for (const auto& attr : attrs) {
+    sample_types.push_back(attr.sample_type);
+  }
+  // First determine event_id_pos_in_sample_records.
+  // If PERF_SAMPLE_IDENTIFIER is enabled, it is just after perf_event_header.
+  // If PERF_SAMPLE_ID is enabled, then PERF_SAMPLE_IDENTIFIER | IP | TID | TIME | ADDR
+  // should also be the same.
+  bool identifier_enabled = true;
+  bool id_enabled = true;
+  uint64_t flags_before_id_mask = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+      PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR;
+  uint64_t flags_before_id = sample_types[0] & flags_before_id_mask;
+  bool flags_before_id_are_the_same = true;
+  for (auto type : sample_types) {
+    identifier_enabled &= (type & PERF_SAMPLE_IDENTIFIER) != 0;
+    id_enabled &= (type & PERF_SAMPLE_ID) != 0;
+    flags_before_id_are_the_same &= (type & flags_before_id_mask) == flags_before_id;
+  }
+  if (identifier_enabled) {
+    *event_id_pos_in_sample_records = sizeof(perf_event_header);
+  } else if (id_enabled && flags_before_id_are_the_same) {
+    uint64_t pos = sizeof(perf_event_header);
+    while (flags_before_id != 0) {
+      // Each flags takes 8 bytes in sample records.
+      flags_before_id &= flags_before_id - 1;
+      pos += 8;
+    }
+    *event_id_pos_in_sample_records = pos;
+  } else {
+    LOG(ERROR) << "perf_event_attrs don't have a common event id position in sample records";
+    return false;
+  }
+
+  // Secondly determine event_id_reverse_pos_in_non_sample_record.
+  // If sample_id_all is not enabled, there is no event id in non sample records.
+  // If PERF_SAMPLE_IDENTIFIER is enabled, it is at the last 8 bytes of the record.
+  // If PERF_SAMPLE_ID is enabled, then PERF_SAMPLE_IDENTIFIER | CPU | STREAM_ID should
+  // also be the same.
+  bool sample_id_all_enabled = true;
+  for (const auto& attr : attrs) {
+    if (attr.sample_id_all == 0) {
+      sample_id_all_enabled = false;
+    }
+  }
+  if (!sample_id_all_enabled) {
+    LOG(ERROR) << "there are perf_event_attrs not enabling sample_id_all, so can't determine "
+               << "perf_event_attr for non sample records";
+    return false;
+  }
+  uint64_t flags_after_id_mask = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID;
+  uint64_t flags_after_id = sample_types[0] & flags_after_id_mask;
+  bool flags_after_id_are_the_same = true;
+  for (auto type : sample_types) {
+    flags_after_id_are_the_same &= (type & flags_after_id_mask) == flags_after_id;
+  }
+  if (identifier_enabled) {
+    *event_id_reverse_pos_in_non_sample_records = 8;
+  } else if (id_enabled && flags_after_id_are_the_same) {
+    uint64_t pos = 8;
+    while (flags_after_id != 0) {
+      // Each flag takes 8 bytes in sample_id of non sample records.
+      flags_after_id &= flags_after_id - 1;
+      pos += 8;
+    }
+    *event_id_reverse_pos_in_non_sample_records = pos;
+  } else {
+    LOG(ERROR) << "perf_event_attrs don't have a common event id reverse position in non sample records";
+    return false;
+  }
+  return true;
+}
+
+bool IsTimestampSupported(const perf_event_attr& attr) {
+  return attr.sample_id_all && (attr.sample_type & PERF_SAMPLE_TIME);
+}
+
+bool IsCpuSupported(const perf_event_attr& attr) {
+  return attr.sample_id_all && (attr.sample_type & PERF_SAMPLE_CPU);
+}
+
+std::string GetEventNameByAttr(const perf_event_attr& attr) {
+  for (const auto& event_type : GetAllEventTypes()) {
+    if (event_type.type == attr.type && event_type.config == attr.config) {
+      std::string name = event_type.name;
+      if (attr.exclude_user && !attr.exclude_kernel) {
+        name += ":k";
+      } else if (attr.exclude_kernel && !attr.exclude_user) {
+        name += ":u";
+      }
+      return name;
+    }
+  }
+  return "unknown";
+}
diff --git a/simpleperf/event_attr.h b/simpleperf/event_attr.h
new file mode 100644
index 0000000..9182bb9
--- /dev/null
+++ b/simpleperf/event_attr.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_EVENT_ATTR_H_
+#define SIMPLE_PERF_EVENT_ATTR_H_
+
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "perf_event.h"
+
+struct EventType;
+
+perf_event_attr CreateDefaultPerfEventAttr(const EventType& event_type);
+void DumpPerfEventAttr(const perf_event_attr& attr, size_t indent = 0);
+bool GetCommonEventIdPositionsForAttrs(std::vector<perf_event_attr>& attrs,
+                                       size_t* event_id_pos_in_sample_records,
+                                       size_t* event_id_reverse_pos_in_non_sample_records);
+bool IsTimestampSupported(const perf_event_attr& attr);
+bool IsCpuSupported(const perf_event_attr& attr);
+// Return event name with modifier if the event is found, otherwise return "unknown".
+std::string GetEventNameByAttr(const perf_event_attr& attr);
+
+#endif  // SIMPLE_PERF_EVENT_ATTR_H_
diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp
new file mode 100644
index 0000000..ed4c660
--- /dev/null
+++ b/simpleperf/event_fd.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "event_fd.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <atomic>
+#include <memory>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+
+#include "event_attr.h"
+#include "event_type.h"
+#include "perf_event.h"
+#include "utils.h"
+
+std::vector<char> EventFd::data_process_buffer_;
+
+static int perf_event_open(const perf_event_attr& attr, pid_t pid, int cpu,
+                           int group_fd, unsigned long flags) {  // NOLINT
+  return syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, flags);
+}
+
+std::unique_ptr<EventFd> EventFd::OpenEventFile(const perf_event_attr& attr,
+                                                pid_t tid, int cpu,
+                                                EventFd* group_event_fd,
+                                                bool report_error) {
+  std::string event_name = GetEventNameByAttr(attr);
+  int group_fd = -1;
+  if (group_event_fd != nullptr) {
+    group_fd = group_event_fd->perf_event_fd_;
+  }
+  int perf_event_fd = perf_event_open(attr, tid, cpu, group_fd, 0);
+  if (perf_event_fd == -1) {
+    if (report_error) {
+      PLOG(ERROR) << "open perf_event_file (event " << event_name << ", tid "
+                  << tid << ", cpu " << cpu << ", group_fd " << group_fd
+                  << ") failed";
+    } else {
+      PLOG(DEBUG) << "open perf_event_file (event " << event_name << ", tid "
+                  << tid << ", cpu " << cpu << ", group_fd " << group_fd
+                  << ") failed";
+    }
+    return nullptr;
+  }
+  if (fcntl(perf_event_fd, F_SETFD, FD_CLOEXEC) == -1) {
+    if (report_error) {
+      PLOG(ERROR) << "fcntl(FD_CLOEXEC) for perf_event_file (event "
+                  << event_name << ", tid " << tid << ", cpu " << cpu
+                  << ", group_fd " << group_fd << ") failed";
+    } else {
+      PLOG(DEBUG) << "fcntl(FD_CLOEXEC) for perf_event_file (event "
+                  << event_name << ", tid " << tid << ", cpu " << cpu
+                  << ", group_fd " << group_fd << ") failed";
+    }
+    return nullptr;
+  }
+  return std::unique_ptr<EventFd>(
+      new EventFd(attr, perf_event_fd, event_name, tid, cpu));
+}
+
+EventFd::~EventFd() {
+  DestroyMappedBuffer();
+  close(perf_event_fd_);
+}
+
+std::string EventFd::Name() const {
+  return android::base::StringPrintf(
+      "perf_event_file(event %s, tid %d, cpu %d)", event_name_.c_str(), tid_,
+      cpu_);
+}
+
+uint64_t EventFd::Id() const {
+  if (id_ == 0) {
+    PerfCounter counter;
+    if (ReadCounter(&counter)) {
+      id_ = counter.id;
+    }
+  }
+  return id_;
+}
+
+bool EventFd::EnableEvent() {
+  int result = ioctl(perf_event_fd_, PERF_EVENT_IOC_ENABLE, 0);
+  if (result < 0) {
+    PLOG(ERROR) << "ioctl(enable) " << Name() << " failed";
+    return false;
+  }
+  return true;
+}
+
+bool EventFd::ReadCounter(PerfCounter* counter) const {
+  CHECK(counter != nullptr);
+  if (!android::base::ReadFully(perf_event_fd_, counter, sizeof(*counter))) {
+    PLOG(ERROR) << "ReadCounter from " << Name() << " failed";
+    return false;
+  }
+  return true;
+}
+
+bool EventFd::CreateMappedBuffer(size_t mmap_pages, bool report_error) {
+  CHECK(IsPowerOfTwo(mmap_pages));
+  size_t page_size = sysconf(_SC_PAGE_SIZE);
+  size_t mmap_len = (mmap_pages + 1) * page_size;
+  void* mmap_addr = mmap(nullptr, mmap_len, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         perf_event_fd_, 0);
+  if (mmap_addr == MAP_FAILED) {
+    bool is_perm_error = (errno == EPERM);
+    if (report_error) {
+      PLOG(ERROR) << "mmap(" << mmap_pages << ") failed for " << Name();
+    } else {
+      PLOG(DEBUG) << "mmap(" << mmap_pages << ") failed for " << Name();
+    }
+    if (report_error && is_perm_error) {
+      LOG(ERROR)
+          << "It seems the kernel doesn't allow allocating enough "
+          << "buffer for dumping samples, consider decreasing mmap pages(-m), "
+          << "or decreasing the number of events(-e).";
+    }
+    return false;
+  }
+  mmap_addr_ = mmap_addr;
+  mmap_len_ = mmap_len;
+  mmap_metadata_page_ = reinterpret_cast<perf_event_mmap_page*>(mmap_addr_);
+  mmap_data_buffer_ = reinterpret_cast<char*>(mmap_addr_) + page_size;
+  mmap_data_buffer_size_ = mmap_len_ - page_size;
+  if (data_process_buffer_.size() < mmap_data_buffer_size_) {
+    data_process_buffer_.resize(mmap_data_buffer_size_);
+  }
+  return true;
+}
+
+bool EventFd::ShareMappedBuffer(const EventFd& event_fd, bool report_error) {
+  CHECK(!HasMappedBuffer());
+  CHECK(event_fd.HasMappedBuffer());
+  int result =
+      ioctl(perf_event_fd_, PERF_EVENT_IOC_SET_OUTPUT, event_fd.perf_event_fd_);
+  if (result != 0) {
+    if (report_error) {
+      PLOG(ERROR) << "failed to share mapped buffer of "
+                  << event_fd.perf_event_fd_ << " with " << perf_event_fd_;
+    }
+    return false;
+  }
+  return true;
+}
+
+void EventFd::DestroyMappedBuffer() {
+  if (HasMappedBuffer()) {
+    munmap(mmap_addr_, mmap_len_);
+    mmap_addr_ = nullptr;
+    mmap_len_ = 0;
+    mmap_metadata_page_ = nullptr;
+    mmap_data_buffer_ = nullptr;
+    mmap_data_buffer_size_ = 0;
+  }
+}
+
+size_t EventFd::GetAvailableMmapData(const char** pdata) {
+  if (!HasMappedBuffer()) {
+    return 0;
+  }
+  // The mmap_data_buffer is used as a ring buffer between the kernel and
+  // simpleperf. The kernel continuously writes records to the buffer, and
+  // simpleperf continuously read records out.
+  //         _________________________________________
+  // buffer | can write   |   can read   |  can write |
+  //                      ^              ^
+  //                    read_head       write_head
+  //
+  // So simpleperf can read records in [read_head, write_head), and the kernel
+  // can write records in [write_head, read_head). The kernel is responsible
+  // for updating write_head, and simpleperf is responsible for updating
+  // read_head.
+
+  size_t buf_mask = mmap_data_buffer_size_ - 1;
+  size_t write_head =
+      static_cast<size_t>(mmap_metadata_page_->data_head & buf_mask);
+  size_t read_head =
+      static_cast<size_t>(mmap_metadata_page_->data_tail & buf_mask);
+
+  if (read_head == write_head) {
+    // No available data.
+    return 0;
+  }
+
+  // Make sure we can see the data after the fence.
+  std::atomic_thread_fence(std::memory_order_acquire);
+
+  // Copy records from mapped buffer to data_process_buffer. Note that records
+  // can be wrapped at the end of the mapped buffer.
+  char* to = data_process_buffer_.data();
+  if (read_head < write_head) {
+    char* from = mmap_data_buffer_ + read_head;
+    size_t n = write_head - read_head;
+    memcpy(to, from, n);
+    to += n;
+  } else {
+    char* from = mmap_data_buffer_ + read_head;
+    size_t n = mmap_data_buffer_size_ - read_head;
+    memcpy(to, from, n);
+    to += n;
+    from = mmap_data_buffer_;
+    n = write_head;
+    memcpy(to, from, n);
+    to += n;
+  }
+  size_t read_bytes = to - data_process_buffer_.data();
+  *pdata = data_process_buffer_.data();
+  DiscardMmapData(read_bytes);
+  return read_bytes;
+}
+
+void EventFd::DiscardMmapData(size_t discard_size) {
+  mmap_metadata_page_->data_tail += discard_size;
+}
+
+bool IsEventAttrSupportedByKernel(perf_event_attr attr) {
+  auto event_fd = EventFd::OpenEventFile(attr, getpid(), -1, nullptr, false);
+  return event_fd != nullptr;
+}
diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h
new file mode 100644
index 0000000..ad56cfc
--- /dev/null
+++ b/simpleperf/event_fd.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_EVENT_FD_H_
+#define SIMPLE_PERF_EVENT_FD_H_
+
+#include <sys/types.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <android-base/macros.h>
+
+#include "IOEventLoop.h"
+#include "perf_event.h"
+
+struct PerfCounter {
+  uint64_t value;  // The value of the event specified by the perf_event_file.
+  uint64_t time_enabled;  // The enabled time.
+  uint64_t time_running;  // The running time.
+  uint64_t id;            // The id of the perf_event_file.
+};
+
+// EventFd represents an opened perf_event_file.
+class EventFd {
+ public:
+  static std::unique_ptr<EventFd> OpenEventFile(const perf_event_attr& attr,
+                                                pid_t tid, int cpu,
+                                                EventFd* group_event_fd,
+                                                bool report_error = true);
+
+  ~EventFd();
+
+  // Give information about this perf_event_file, like (event_name, tid, cpu).
+  std::string Name() const;
+
+  uint64_t Id() const;
+
+  pid_t ThreadId() const { return tid_; }
+
+  int Cpu() const { return cpu_; }
+
+  int fd() const { return perf_event_fd_; }
+
+  const perf_event_attr& attr() const { return attr_; }
+
+  // It tells the kernel to start counting and recording events specified by
+  // this file.
+  bool EnableEvent();
+
+  bool ReadCounter(PerfCounter* counter) const;
+
+  // Create mapped buffer used to receive records sent by the kernel.
+  // mmap_pages should be power of 2.
+  bool CreateMappedBuffer(size_t mmap_pages, bool report_error);
+
+  // Share the mapped buffer used by event_fd. The two EventFds should monitor
+  // the same event on the same cpu, but have different thread ids.
+  bool ShareMappedBuffer(const EventFd& event_fd, bool report_error);
+
+  bool HasMappedBuffer() const { return mmap_data_buffer_size_ != 0; }
+
+  void DestroyMappedBuffer();
+
+  // When the kernel writes new sampled records to the mapped area, we can get
+  // them by returning the start address and size of the data.
+  size_t GetAvailableMmapData(const char** pdata);
+
+ private:
+  EventFd(const perf_event_attr& attr, int perf_event_fd,
+          const std::string& event_name, pid_t tid, int cpu)
+      : attr_(attr),
+        perf_event_fd_(perf_event_fd),
+        id_(0),
+        event_name_(event_name),
+        tid_(tid),
+        cpu_(cpu),
+        mmap_addr_(nullptr),
+        mmap_len_(0),
+        mmap_metadata_page_(nullptr),
+        mmap_data_buffer_(nullptr),
+        mmap_data_buffer_size_(0) {}
+
+  // Discard how much data we have read, so the kernel can reuse this part of
+  // mapped area to store new data.
+  void DiscardMmapData(size_t discard_size);
+
+  const perf_event_attr attr_;
+  int perf_event_fd_;
+  mutable uint64_t id_;
+  const std::string event_name_;
+  pid_t tid_;
+  int cpu_;
+
+  void* mmap_addr_;
+  size_t mmap_len_;
+  perf_event_mmap_page* mmap_metadata_page_;  // The first page of mmap_area.
+  char* mmap_data_buffer_;  // Starting from the second page of mmap_area,
+                            // containing records written by then kernel.
+  size_t mmap_data_buffer_size_;
+
+  // As mmap_data_buffer is a ring buffer, it is possible that one record is
+  // wrapped at the end of the buffer. So we need to copy records from
+  // mmap_data_buffer to data_process_buffer before processing them.
+  static std::vector<char> data_process_buffer_;
+
+  DISALLOW_COPY_AND_ASSIGN(EventFd);
+};
+
+bool IsEventAttrSupportedByKernel(perf_event_attr attr);
+
+#endif  // SIMPLE_PERF_EVENT_FD_H_
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
new file mode 100644
index 0000000..6235e32
--- /dev/null
+++ b/simpleperf/event_selection_set.cpp
@@ -0,0 +1,441 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "event_selection_set.h"
+
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+
+#include "environment.h"
+#include "event_attr.h"
+#include "event_type.h"
+#include "IOEventLoop.h"
+#include "perf_regs.h"
+
+bool IsBranchSamplingSupported() {
+  const EventType* type = FindEventTypeByName("cpu-cycles");
+  if (type == nullptr) {
+    return false;
+  }
+  perf_event_attr attr = CreateDefaultPerfEventAttr(*type);
+  attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+  attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
+  return IsEventAttrSupportedByKernel(attr);
+}
+
+bool IsDwarfCallChainSamplingSupported() {
+  const EventType* type = FindEventTypeByName("cpu-cycles");
+  if (type == nullptr) {
+    return false;
+  }
+  perf_event_attr attr = CreateDefaultPerfEventAttr(*type);
+  attr.sample_type |=
+      PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER;
+  attr.exclude_callchain_user = 1;
+  attr.sample_regs_user = GetSupportedRegMask(GetBuildArch());
+  attr.sample_stack_user = 8192;
+  return IsEventAttrSupportedByKernel(attr);
+}
+
+bool EventSelectionSet::BuildAndCheckEventSelection(
+    const std::string& event_name, EventSelection* selection) {
+  std::unique_ptr<EventTypeAndModifier> event_type = ParseEventType(event_name);
+  if (event_type == nullptr) {
+    return false;
+  }
+  selection->event_type_modifier = *event_type;
+  selection->event_attr = CreateDefaultPerfEventAttr(event_type->event_type);
+  selection->event_attr.exclude_user = event_type->exclude_user;
+  selection->event_attr.exclude_kernel = event_type->exclude_kernel;
+  selection->event_attr.exclude_hv = event_type->exclude_hv;
+  selection->event_attr.exclude_host = event_type->exclude_host;
+  selection->event_attr.exclude_guest = event_type->exclude_guest;
+  selection->event_attr.precise_ip = event_type->precise_ip;
+  if (!IsEventAttrSupportedByKernel(selection->event_attr)) {
+    LOG(ERROR) << "Event type '" << event_type->name
+               << "' is not supported by the kernel";
+    return false;
+  }
+  selection->event_fds.clear();
+
+  for (const auto& group : groups_) {
+    for (const auto& sel : group) {
+      if (sel.event_type_modifier.name == selection->event_type_modifier.name) {
+        LOG(ERROR) << "Event type '" << sel.event_type_modifier.name
+                   << "' appears more than once";
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::AddEventType(const std::string& event_name) {
+  return AddEventGroup(std::vector<std::string>(1, event_name));
+}
+
+bool EventSelectionSet::AddEventGroup(
+    const std::vector<std::string>& event_names) {
+  EventSelectionGroup group;
+  for (const auto& event_name : event_names) {
+    EventSelection selection;
+    if (!BuildAndCheckEventSelection(event_name, &selection)) {
+      return false;
+    }
+    selection.selection_id = group.size();
+    selection.group_id = groups_.size();
+    group.push_back(std::move(selection));
+  }
+  groups_.push_back(std::move(group));
+  UnionSampleType();
+  return true;
+}
+
+// Union the sample type of different event attrs can make reading sample
+// records in perf.data easier.
+void EventSelectionSet::UnionSampleType() {
+  uint64_t sample_type = 0;
+  for (const auto& group : groups_) {
+    for (const auto& selection : group) {
+      sample_type |= selection.event_attr.sample_type;
+    }
+  }
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_type = sample_type;
+    }
+  }
+}
+
+void EventSelectionSet::SetEnableOnExec(bool enable) {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      // If sampling is enabled on exec, then it is disabled at startup,
+      // otherwise it should be enabled at startup. Don't use
+      // ioctl(PERF_EVENT_IOC_ENABLE) to enable it after perf_event_open().
+      // Because some android kernels can't handle ioctl() well when cpu-hotplug
+      // happens. See http://b/25193162.
+      if (enable) {
+        selection.event_attr.enable_on_exec = 1;
+        selection.event_attr.disabled = 1;
+      } else {
+        selection.event_attr.enable_on_exec = 0;
+        selection.event_attr.disabled = 0;
+      }
+    }
+  }
+}
+
+bool EventSelectionSet::GetEnableOnExec() {
+  for (const auto& group : groups_) {
+    for (const auto& selection : group) {
+      if (selection.event_attr.enable_on_exec == 0) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+void EventSelectionSet::SampleIdAll() {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_id_all = 1;
+    }
+  }
+}
+
+void EventSelectionSet::SetSampleFreq(const EventSelection& selection,
+                                      uint64_t sample_freq) {
+  EventSelection& sel = groups_[selection.group_id][selection.selection_id];
+  sel.event_attr.freq = 1;
+  sel.event_attr.sample_freq = sample_freq;
+}
+
+void EventSelectionSet::SetSamplePeriod(const EventSelection& selection,
+                                        uint64_t sample_period) {
+  EventSelection& sel = groups_[selection.group_id][selection.selection_id];
+  sel.event_attr.freq = 0;
+  sel.event_attr.sample_period = sample_period;
+}
+
+bool EventSelectionSet::SetBranchSampling(uint64_t branch_sample_type) {
+  if (branch_sample_type != 0 &&
+      (branch_sample_type &
+       (PERF_SAMPLE_BRANCH_ANY | PERF_SAMPLE_BRANCH_ANY_CALL |
+        PERF_SAMPLE_BRANCH_ANY_RETURN | PERF_SAMPLE_BRANCH_IND_CALL)) == 0) {
+    LOG(ERROR) << "Invalid branch_sample_type: 0x" << std::hex
+               << branch_sample_type;
+    return false;
+  }
+  if (branch_sample_type != 0 && !IsBranchSamplingSupported()) {
+    LOG(ERROR) << "branch stack sampling is not supported on this device.";
+    return false;
+  }
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      perf_event_attr& attr = selection.event_attr;
+      if (branch_sample_type != 0) {
+        attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+      } else {
+        attr.sample_type &= ~PERF_SAMPLE_BRANCH_STACK;
+      }
+      attr.branch_sample_type = branch_sample_type;
+    }
+  }
+  return true;
+}
+
+void EventSelectionSet::EnableFpCallChainSampling() {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+    }
+  }
+}
+
+bool EventSelectionSet::EnableDwarfCallChainSampling(uint32_t dump_stack_size) {
+  if (!IsDwarfCallChainSamplingSupported()) {
+    LOG(ERROR) << "dwarf callchain sampling is not supported on this device.";
+    return false;
+  }
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_type |= PERF_SAMPLE_CALLCHAIN |
+                                          PERF_SAMPLE_REGS_USER |
+                                          PERF_SAMPLE_STACK_USER;
+      selection.event_attr.exclude_callchain_user = 1;
+      selection.event_attr.sample_regs_user =
+          GetSupportedRegMask(GetBuildArch());
+      selection.event_attr.sample_stack_user = dump_stack_size;
+    }
+  }
+  return true;
+}
+
+void EventSelectionSet::SetInherit(bool enable) {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.inherit = (enable ? 1 : 0);
+    }
+  }
+}
+
+void EventSelectionSet::SetLowWatermark() {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.wakeup_events = 1;
+    }
+  }
+}
+
+static bool CheckIfCpusOnline(const std::vector<int>& cpus) {
+  std::vector<int> online_cpus = GetOnlineCpus();
+  for (const auto& cpu : cpus) {
+    if (std::find(online_cpus.begin(), online_cpus.end(), cpu) ==
+        online_cpus.end()) {
+      LOG(ERROR) << "cpu " << cpu << " is not online.";
+      return false;
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::OpenEventFilesForCpus(const std::vector<int>& cpus) {
+  return OpenEventFilesForThreadsOnCpus({-1}, cpus);
+}
+
+bool EventSelectionSet::OpenEventFilesForThreadsOnCpus(
+    const std::vector<pid_t>& threads, std::vector<int> cpus) {
+  if (!cpus.empty()) {
+    // cpus = {-1} means open an event file for all cpus.
+    if (!(cpus.size() == 1 && cpus[0] == -1) && !CheckIfCpusOnline(cpus)) {
+      return false;
+    }
+  } else {
+    cpus = GetOnlineCpus();
+  }
+  return OpenEventFiles(threads, cpus);
+}
+
+bool EventSelectionSet::OpenEventFiles(const std::vector<pid_t>& threads,
+                                       const std::vector<int>& cpus) {
+  for (auto& group : groups_) {
+    for (const auto& tid : threads) {
+      size_t open_per_thread = 0;
+      std::string failed_event_type;
+      for (const auto& cpu : cpus) {
+        std::vector<std::unique_ptr<EventFd>> event_fds;
+        // Given a tid and cpu, events on the same group should be all opened
+        // successfully or all failed to open.
+        for (auto& selection : group) {
+          EventFd* group_fd = nullptr;
+          if (selection.selection_id != 0) {
+            group_fd = event_fds[0].get();
+          }
+          std::unique_ptr<EventFd> event_fd =
+              EventFd::OpenEventFile(selection.event_attr, tid, cpu, group_fd);
+          if (event_fd != nullptr) {
+            LOG(VERBOSE) << "OpenEventFile for " << event_fd->Name();
+            event_fds.push_back(std::move(event_fd));
+          } else {
+            failed_event_type = selection.event_type_modifier.name;
+            break;
+          }
+        }
+        if (event_fds.size() == group.size()) {
+          for (size_t i = 0; i < group.size(); ++i) {
+            group[i].event_fds.push_back(std::move(event_fds[i]));
+          }
+          ++open_per_thread;
+        }
+      }
+      // As the online cpus can be enabled or disabled at runtime, we may not
+      // open event file for all cpus successfully. But we should open at least
+      // one cpu successfully.
+      if (open_per_thread == 0) {
+        PLOG(ERROR) << "failed to open perf event file for event_type "
+                    << failed_event_type << " for "
+                    << (tid == -1 ? "all threads" : android::base::StringPrintf(
+                                                        " thread %d", tid));
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::ReadCounters(std::vector<CountersInfo>* counters) {
+  counters->clear();
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      CountersInfo counters_info;
+      counters_info.selection = &selection;
+      for (auto& event_fd : selection.event_fds) {
+        CountersInfo::CounterInfo counter_info;
+        if (!event_fd->ReadCounter(&counter_info.counter)) {
+          return false;
+        }
+        counter_info.tid = event_fd->ThreadId();
+        counter_info.cpu = event_fd->Cpu();
+        counters_info.counters.push_back(counter_info);
+      }
+      counters->push_back(counters_info);
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::MmapEventFiles(size_t min_mmap_pages,
+                                       size_t max_mmap_pages) {
+  for (size_t i = max_mmap_pages; i >= min_mmap_pages; i >>= 1) {
+    if (MmapEventFiles(i, i == min_mmap_pages)) {
+      LOG(VERBOSE) << "Mapped buffer size is " << i << " pages.";
+      return true;
+    }
+    for (auto& group : groups_) {
+      for (auto& selection : group) {
+        for (auto& event_fd : selection.event_fds) {
+          event_fd->DestroyMappedBuffer();
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool EventSelectionSet::MmapEventFiles(size_t mmap_pages, bool report_error) {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      // For each event, allocate a mapped buffer for each cpu.
+      std::map<int, EventFd*> cpu_map;
+      for (auto& event_fd : selection.event_fds) {
+        auto it = cpu_map.find(event_fd->Cpu());
+        if (it != cpu_map.end()) {
+          if (!event_fd->ShareMappedBuffer(*(it->second), report_error)) {
+            return false;
+          }
+        } else {
+          if (!event_fd->CreateMappedBuffer(mmap_pages, report_error)) {
+            return false;
+          }
+          cpu_map.insert(std::make_pair(event_fd->Cpu(), event_fd.get()));
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::PrepareToReadMmapEventData(
+    IOEventLoop& loop, const std::function<bool(Record*)>& callback) {
+  // Add read Events for perf event files having mapped buffer.
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      for (auto& event_fd : selection.event_fds) {
+        if (event_fd->HasMappedBuffer()) {
+          if (!loop.AddReadEvent(event_fd->fd(), [&]() {
+                return ReadMmapEventDataForFd(event_fd);
+              })) {
+            return false;
+          }
+        }
+      }
+    }
+  }
+
+  // Prepare record callback function.
+  record_callback_ = callback;
+  return true;
+}
+
+bool EventSelectionSet::ReadMmapEventDataForFd(
+    std::unique_ptr<EventFd>& event_fd) {
+  const char* data;
+  // Call GetAvailableMmapData() only once instead of calling in a loop, because
+  // 1) A mapped buffer caches data before needing to be read again. By default
+  //    it raises read Event when half full.
+  // 2) Spinning on one mapped buffer can make other mapped buffers overflow.
+  size_t size = event_fd->GetAvailableMmapData(&data);
+  if (size == 0) {
+    return true;
+  }
+  std::vector<std::unique_ptr<Record>> records =
+      ReadRecordsFromBuffer(event_fd->attr(), data, size);
+  for (auto& r : records) {
+    if (!record_callback_(r.get())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::FinishReadMmapEventData() {
+  // Read each mapped buffer once, because some data may exist in the buffers
+  // but is not much enough to raise read Events.
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      for (auto& event_fd : selection.event_fds) {
+        if (event_fd->HasMappedBuffer()) {
+          if (!ReadMmapEventDataForFd(event_fd)) {
+            return false;
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
new file mode 100644
index 0000000..a947320
--- /dev/null
+++ b/simpleperf/event_selection_set.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_EVENT_SELECTION_SET_H_
+#define SIMPLE_PERF_EVENT_SELECTION_SET_H_
+
+#include <functional>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include <android-base/macros.h>
+
+#include "event_fd.h"
+#include "event_type.h"
+#include "perf_event.h"
+#include "record.h"
+
+struct EventSelection {
+  uint32_t group_id;
+  uint32_t selection_id;
+  EventTypeAndModifier event_type_modifier;
+  perf_event_attr event_attr;
+  std::vector<std::unique_ptr<EventFd>> event_fds;
+};
+typedef std::vector<EventSelection> EventSelectionGroup;
+
+struct CountersInfo {
+  const EventSelection* selection;
+  struct CounterInfo {
+    pid_t tid;
+    int cpu;
+    PerfCounter counter;
+  };
+  std::vector<CounterInfo> counters;
+};
+
+class IOEventLoop;
+
+// EventSelectionSet helps to monitor events. It is used in following steps:
+// 1. Create an EventSelectionSet, and add event types to monitor by calling
+//    AddEventType() or AddEventGroup().
+// 2. Define how to monitor events by calling SetEnableOnExec(), SampleIdAll(),
+//    SetSampleFreq(), etc.
+// 3. Start monitoring by calling OpenEventFilesForCpus() or
+//    OpenEventFilesForThreadsOnCpus(). If SetEnableOnExec() has been called
+//    in step 2, monitor will be delayed until the monitored thread calls
+//    exec().
+// 4. Read counters by calling ReadCounters(), or read mapped event records
+//    by calling MmapEventFiles(), PrepareToReadMmapEventData() and
+//    FinishReadMmapEventData().
+// 5. Stop monitoring automatically in the destructor of EventSelectionSet by
+//    closing perf event files.
+
+class EventSelectionSet {
+ public:
+  EventSelectionSet() {}
+
+  bool empty() const { return groups_.empty(); }
+
+  const std::vector<EventSelectionGroup>& groups() { return groups_; }
+
+  bool AddEventType(const std::string& event_name);
+  bool AddEventGroup(const std::vector<std::string>& event_names);
+
+  void SetEnableOnExec(bool enable);
+  bool GetEnableOnExec();
+  void SampleIdAll();
+  void SetSampleFreq(const EventSelection& selection, uint64_t sample_freq);
+  void SetSamplePeriod(const EventSelection& selection, uint64_t sample_period);
+  bool SetBranchSampling(uint64_t branch_sample_type);
+  void EnableFpCallChainSampling();
+  bool EnableDwarfCallChainSampling(uint32_t dump_stack_size);
+  void SetInherit(bool enable);
+  void SetLowWatermark();
+
+  bool OpenEventFilesForCpus(const std::vector<int>& cpus);
+  bool OpenEventFilesForThreadsOnCpus(const std::vector<pid_t>& threads,
+                                      std::vector<int> cpus);
+  bool ReadCounters(std::vector<CountersInfo>* counters);
+  bool MmapEventFiles(size_t min_mmap_pages, size_t max_mmap_pages);
+  bool PrepareToReadMmapEventData(IOEventLoop& loop,
+                                  const std::function<bool(Record*)>& callback);
+  bool FinishReadMmapEventData();
+
+ private:
+  bool BuildAndCheckEventSelection(const std::string& event_name,
+                                   EventSelection* selection);
+  void UnionSampleType();
+  bool OpenEventFiles(const std::vector<pid_t>& threads,
+                      const std::vector<int>& cpus);
+  bool MmapEventFiles(size_t mmap_pages, bool report_error);
+  bool ReadMmapEventDataForFd(std::unique_ptr<EventFd>& event_fd);
+
+  std::vector<EventSelectionGroup> groups_;
+
+  std::function<bool(Record*)> record_callback_;
+
+  DISALLOW_COPY_AND_ASSIGN(EventSelectionSet);
+};
+
+bool IsBranchSamplingSupported();
+bool IsDwarfCallChainSamplingSupported();
+
+#endif  // SIMPLE_PERF_EVENT_SELECTION_SET_H_
diff --git a/simpleperf/event_type.cpp b/simpleperf/event_type.cpp
new file mode 100644
index 0000000..bfa6aac
--- /dev/null
+++ b/simpleperf/event_type.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "event_type.h"
+
+#include <unistd.h>
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+
+#include "event_attr.h"
+#include "utils.h"
+
+#define EVENT_TYPE_TABLE_ENTRY(name, type, config) {name, type, config},
+
+static const std::vector<EventType> static_event_type_array = {
+#include "event_type_table.h"
+};
+
+static const std::vector<EventType> GetTracepointEventTypes() {
+  std::vector<EventType> result;
+  if (!IsRoot()) {
+    // Not having permission to profile tracing events.
+    return result;
+  }
+  const std::string tracepoint_dirname = "/sys/kernel/debug/tracing/events";
+  for (const auto& system_name : GetSubDirs(tracepoint_dirname)) {
+    std::string system_path = tracepoint_dirname + "/" + system_name;
+    for (const auto& event_name : GetSubDirs(system_path)) {
+      std::string id_path = system_path + "/" + event_name + "/id";
+      std::string id_content;
+      if (!android::base::ReadFileToString(id_path, &id_content)) {
+        continue;
+      }
+      char* endptr;
+      uint64_t id = strtoull(id_content.c_str(), &endptr, 10);
+      if (endptr == id_content.c_str()) {
+        LOG(DEBUG) << "unexpected id '" << id_content << "' in " << id_path;
+        continue;
+      }
+      result.push_back(EventType(system_name + ":" + event_name, PERF_TYPE_TRACEPOINT, id));
+    }
+  }
+  std::sort(result.begin(), result.end(),
+            [](const EventType& type1, const EventType& type2) { return type1.name < type2.name; });
+  return result;
+}
+
+const std::vector<EventType>& GetAllEventTypes() {
+  static std::vector<EventType> event_type_array;
+  if (event_type_array.empty()) {
+    event_type_array.insert(event_type_array.end(), static_event_type_array.begin(),
+                            static_event_type_array.end());
+    const std::vector<EventType> tracepoint_array = GetTracepointEventTypes();
+    event_type_array.insert(event_type_array.end(), tracepoint_array.begin(),
+                            tracepoint_array.end());
+  }
+  return event_type_array;
+}
+
+const EventType* FindEventTypeByName(const std::string& name) {
+  const EventType* result = nullptr;
+  for (auto& event_type : GetAllEventTypes()) {
+    if (event_type.name == name) {
+      result = &event_type;
+      break;
+    }
+  }
+  if (result == nullptr) {
+    LOG(ERROR) << "Unknown event_type '" << name
+               << "', try `simpleperf list` to list all possible event type names";
+    return nullptr;
+  }
+  return result;
+}
+
+std::unique_ptr<EventTypeAndModifier> ParseEventType(const std::string& event_type_str) {
+  static std::string modifier_characters = "ukhGHp";
+  std::unique_ptr<EventTypeAndModifier> event_type_modifier(new EventTypeAndModifier);
+  event_type_modifier->name = event_type_str;
+  std::string event_type_name = event_type_str;
+  std::string modifier;
+  size_t comm_pos = event_type_str.rfind(':');
+  if (comm_pos != std::string::npos) {
+    bool match_modifier = true;
+    for (size_t i = comm_pos + 1; i < event_type_str.size(); ++i) {
+      char c = event_type_str[i];
+      if (c != ' ' && modifier_characters.find(c) == std::string::npos) {
+        match_modifier = false;
+        break;
+      }
+    }
+    if (match_modifier) {
+      event_type_name = event_type_str.substr(0, comm_pos);
+      modifier = event_type_str.substr(comm_pos + 1);
+    }
+  }
+  const EventType* event_type = FindEventTypeByName(event_type_name);
+  if (event_type == nullptr) {
+    // Try if the modifier belongs to the event type name, like some tracepoint events.
+    if (!modifier.empty()) {
+      event_type_name = event_type_str;
+      modifier.clear();
+      event_type = FindEventTypeByName(event_type_name);
+    }
+    if (event_type == nullptr) {
+      return nullptr;
+    }
+  }
+  event_type_modifier->event_type = *event_type;
+  if (modifier.find_first_of("ukh") != std::string::npos) {
+    event_type_modifier->exclude_user = true;
+    event_type_modifier->exclude_kernel = true;
+    event_type_modifier->exclude_hv = true;
+  }
+  if (modifier.find_first_of("GH") != std::string::npos) {
+    event_type_modifier->exclude_guest = true;
+    event_type_modifier->exclude_host = true;
+  }
+
+  for (auto& c : modifier) {
+    switch (c) {
+      case 'u':
+        event_type_modifier->exclude_user = false;
+        break;
+      case 'k':
+        event_type_modifier->exclude_kernel = false;
+        break;
+      case 'h':
+        event_type_modifier->exclude_hv = false;
+        break;
+      case 'G':
+        event_type_modifier->exclude_guest = false;
+        break;
+      case 'H':
+        event_type_modifier->exclude_host = false;
+        break;
+      case 'p':
+        event_type_modifier->precise_ip++;
+        break;
+      case ' ':
+        break;
+      default:
+        LOG(ERROR) << "Unknown event type modifier '" << c << "'";
+    }
+  }
+  event_type_modifier->modifier = modifier;
+  return event_type_modifier;
+}
diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h
new file mode 100644
index 0000000..12d83b3
--- /dev/null
+++ b/simpleperf/event_type.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_EVENT_H_
+#define SIMPLE_PERF_EVENT_H_
+
+#include <stdint.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+// EventType represents one type of event, like cpu_cycle_event, cache_misses_event.
+// The user knows one event type by its name, and the kernel knows one event type by its
+// (type, config) pair. EventType connects the two representations, and tells the user if
+// the event type is supported by the kernel.
+
+struct EventType {
+  EventType(const std::string& name, uint32_t type, uint64_t config)
+      : name(name), type(type), config(config) {
+  }
+
+  EventType() : type(0), config(0) {
+  }
+
+  std::string name;
+  uint32_t type;
+  uint64_t config;
+};
+
+const std::vector<EventType>& GetAllEventTypes();
+const EventType* FindEventTypeByName(const std::string& name);
+
+struct EventTypeAndModifier {
+  std::string name;
+  EventType event_type;
+  std::string modifier;
+  bool exclude_user;
+  bool exclude_kernel;
+  bool exclude_hv;
+  bool exclude_host;
+  bool exclude_guest;
+  int precise_ip : 2;
+
+  EventTypeAndModifier()
+      : exclude_user(false),
+        exclude_kernel(false),
+        exclude_hv(false),
+        exclude_host(false),
+        exclude_guest(false),
+        precise_ip(0) {
+  }
+};
+
+std::unique_ptr<EventTypeAndModifier> ParseEventType(const std::string& event_type_str);
+
+#endif  // SIMPLE_PERF_EVENT_H_
diff --git a/simpleperf/event_type_table.h b/simpleperf/event_type_table.h
new file mode 100644
index 0000000..a77be0a
--- /dev/null
+++ b/simpleperf/event_type_table.h
@@ -0,0 +1,65 @@
+// This file is auto-generated by generate-event_table.py.
+
+{"cpu-cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES},
+{"instructions", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS},
+{"cache-references", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES},
+{"cache-misses", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES},
+{"branch-instructions", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+{"branch-misses", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES},
+{"bus-cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES},
+{"stalled-cycles-frontend", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND},
+{"stalled-cycles-backend", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND},
+
+{"cpu-clock", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK},
+{"task-clock", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK},
+{"page-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS},
+{"context-switches", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES},
+{"cpu-migrations", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS},
+{"minor-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN},
+{"major-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ},
+{"alignment-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS},
+{"emulation-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS},
+
+{"L1-dcache-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-dcache-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"L1-dcache-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-dcache-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"L1-dcache-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-dcache-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"L1-icache-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-icache-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"L1-icache-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-icache-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"L1-icache-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-icache-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"LLC-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"LLC-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"LLC-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"LLC-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"LLC-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"LLC-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"dTLB-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"dTLB-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"dTLB-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"dTLB-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"dTLB-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"dTLB-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"iTLB-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"iTLB-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"iTLB-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"iTLB-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"iTLB-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"iTLB-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"branch-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"branch-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"branch-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"branch-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"branch-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"branch-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"node-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"node-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"node-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"node-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"node-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"node-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+
diff --git a/simpleperf/generate_event_type_table.py b/simpleperf/generate_event_type_table.py
new file mode 100755
index 0000000..ff60c23
--- /dev/null
+++ b/simpleperf/generate_event_type_table.py
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+def gen_event_type_entry_str(event_type_name, event_type, event_config):
+  """
+  return string like:
+  {"cpu-cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES},
+  """
+  return '{"%s", %s, %s},\n' % (event_type_name, event_type, event_config)
+
+
+def gen_hardware_events():
+  hardware_configs = ["cpu-cycles",
+                      "instructions",
+                      "cache-references",
+                      "cache-misses",
+                      "branch-instructions",
+                      "branch-misses",
+                      "bus-cycles",
+                      "stalled-cycles-frontend",
+                      "stalled-cycles-backend",
+                      ]
+  generated_str = ""
+  for config in hardware_configs:
+    event_type_name = config
+    event_config = "PERF_COUNT_HW_" + config.replace('-', '_').upper()
+
+    generated_str += gen_event_type_entry_str(
+        event_type_name, "PERF_TYPE_HARDWARE", event_config)
+
+  return generated_str
+
+
+def gen_software_events():
+  software_configs = ["cpu-clock",
+                      "task-clock",
+                      "page-faults",
+                      "context-switches",
+                      "cpu-migrations",
+                      ["minor-faults", "PERF_COUNT_SW_PAGE_FAULTS_MIN"],
+                      ["major-faults", "PERF_COUNT_SW_PAGE_FAULTS_MAJ"],
+                      "alignment-faults",
+                      "emulation-faults",
+                      ]
+  generated_str = ""
+  for config in software_configs:
+    if isinstance(config, list):
+      event_type_name = config[0]
+      event_config = config[1]
+    else:
+      event_type_name = config
+      event_config = "PERF_COUNT_SW_" + config.replace('-', '_').upper()
+
+    generated_str += gen_event_type_entry_str(
+        event_type_name, "PERF_TYPE_SOFTWARE", event_config)
+
+  return generated_str
+
+
+def gen_hw_cache_events():
+  hw_cache_types = [["L1-dcache", "PERF_COUNT_HW_CACHE_L1D"],
+                    ["L1-icache", "PERF_COUNT_HW_CACHE_L1I"],
+                    ["LLC", "PERF_COUNT_HW_CACHE_LL"],
+                    ["dTLB", "PERF_COUNT_HW_CACHE_DTLB"],
+                    ["iTLB", "PERF_COUNT_HW_CACHE_ITLB"],
+                    ["branch", "PERF_COUNT_HW_CACHE_BPU"],
+                    ["node", "PERF_COUNT_HW_CACHE_NODE"],
+                    ]
+  hw_cache_ops = [["loads", "load", "PERF_COUNT_HW_CACHE_OP_READ"],
+                  ["stores", "store", "PERF_COUNT_HW_CACHE_OP_WRITE"],
+                  ["prefetches", "prefetch",
+                   "PERF_COUNT_HW_CACHE_OP_PREFETCH"],
+                  ]
+  hw_cache_op_results = [["accesses", "PERF_COUNT_HW_CACHE_RESULT_ACCESS"],
+                         ["misses", "PERF_COUNT_HW_CACHE_RESULT_MISS"],
+                         ]
+  generated_str = ""
+  for (type_name, type_config) in hw_cache_types:
+    for (op_name_access, op_name_miss, op_config) in hw_cache_ops:
+      for (result_name, result_config) in hw_cache_op_results:
+        if result_name == "accesses":
+          event_type_name = type_name + '-' + op_name_access
+        else:
+          event_type_name = type_name + '-' + \
+              op_name_miss + '-' + result_name
+        event_config = "((%s) | (%s << 8) | (%s << 16))" % (
+            type_config, op_config, result_config)
+        generated_str += gen_event_type_entry_str(
+            event_type_name, "PERF_TYPE_HW_CACHE", event_config)
+
+  return generated_str
+
+
+def gen_events():
+  generated_str = "// This file is auto-generated by generate-event_table.py.\n\n"
+  generated_str += gen_hardware_events() + '\n'
+  generated_str += gen_software_events() + '\n'
+  generated_str += gen_hw_cache_events() + '\n'
+  return generated_str
+
+generated_str = gen_events()
+fh = open('event_type_table.h', 'w')
+fh.write(generated_str)
+fh.close()
diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h
new file mode 100644
index 0000000..88e7e91
--- /dev/null
+++ b/simpleperf/get_test_data.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_GET_TEST_DATA_H_
+#define SIMPLE_PERF_GET_TEST_DATA_H_
+
+#include <string>
+
+#include "build_id.h"
+
+std::string GetTestData(const std::string& filename);
+const std::string& GetTestDataDir();
+
+// The source code of elf and elf_with_mini_debug_info is testdata/elf_file_source.cpp.
+static const std::string ELF_FILE = "elf";
+static const std::string ELF_FILE_WITH_MINI_DEBUG_INFO = "elf_with_mini_debug_info";
+// perf.data is generated by sampling on three processes running different
+// executables: elf, t1, t2 (all generated by elf_file_source.cpp, but with different
+// executable name).
+static const std::string PERF_DATA = "perf.data";
+
+// perf_with_multiple_pids_and_tids.data is generated by sampling on two processes, each
+// process running two threads.
+static const std::string PERF_DATA_WITH_MULTIPLE_PIDS_AND_TIDS = "perf_with_multiple_pids_and_tids.data";
+
+// perf_g_fp.data is generated by sampling on one process running elf using --call-graph fp option.
+static const std::string CALLGRAPH_FP_PERF_DATA = "perf_g_fp.data";
+// perf_b.data is generated by sampling on one process running elf using -b option.
+static const std::string BRANCH_PERF_DATA = "perf_b.data";
+// perf_with_mini_debug_info.data is generated by sampling on one process running
+// elf_with_mini_debug_info.
+static const std::string PERF_DATA_WITH_MINI_DEBUG_INFO = "perf_with_mini_debug_info.data";
+
+static BuildId elf_file_build_id("0b12a384a9f4a3f3659b7171ca615dbec3a81f71");
+
+
+// To generate apk supporting execution on shared libraries in apk:
+// 1. Add android:extractNativeLibs=false in AndroidManifest.xml.
+// 2. Use `zip -0` to store native libraries in apk without compression.
+// 3. Use `zipalign -p 4096` to make native libraries in apk start at page boundaries.
+//
+// The logical in libhello-jni.so is as below:
+//  volatile int GlobalVar;
+//
+//  while (true) {
+//    GlobalFunc() -> Func1() -> Func2()
+//  }
+// And most time is spent in Func2().
+static const std::string APK_FILE = "data/app/com.example.hellojni-1/base.apk";
+static const std::string NATIVELIB_IN_APK = "lib/arm64-v8a/libhello-jni.so";
+// has_embedded_native_libs_apk_perf.data is generated by sampling on one process running
+// APK_FILE using -g --no-unwind option.
+static const std::string NATIVELIB_IN_APK_PERF_DATA = "has_embedded_native_libs_apk_perf.data";
+// The offset and size info are extracted from the generated apk file to run read_apk tests.
+constexpr size_t NATIVELIB_OFFSET_IN_APK = 0x639000;
+constexpr size_t NATIVELIB_SIZE_IN_APK = 0x1678;
+
+static BuildId native_lib_build_id("8ed5755a7fdc07586ca228b8ee21621bce2c7a97");
+
+// perf_with_two_event_types.data is generated by sampling using -e cpu-cycles,cpu-clock option.
+static const std::string PERF_DATA_WITH_TWO_EVENT_TYPES = "perf_with_two_event_types.data";
+
+// perf_with_kernel_symbol.data is generated by `sudo simpleperf record ls -l`.
+static const std::string PERF_DATA_WITH_KERNEL_SYMBOL = "perf_with_kernel_symbol.data";
+
+// perf_with_symbols.data is generated by `sudo simpleperf record --dump-symbols` a process calling func2(int,int).
+static const std::string PERF_DATA_WITH_SYMBOLS = "perf_with_symbols.data";
+// perf_with_symbols.data is generated by `sudo simpleperf record --dump-symbols` a process using
+// a binary having non zero min virtual address.
+static const std::string PERF_DATA_WITH_SYMBOLS_FOR_NONZERO_MINVADDR_DSO =
+    "perf_with_symbols_for_nonzero_minvaddr_dso.data";
+
+// perf_kmem_slab_callgraph.data is generated by `simpleperf kmem record --slab --call-graph fp -f 100 sleep 0.0001`.
+static const std::string PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD = "perf_with_kmem_slab_callgraph.data";
+
+
+// perf_for_build_id_check.data is generated by recording a process running
+// testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check.
+static const std::string PERF_DATA_FOR_BUILD_ID_CHECK = "perf_for_build_id_check.data";
+static const std::string CORRECT_SYMFS_FOR_BUILD_ID_CHECK = "data/correct_symfs_for_build_id_check";
+static const std::string WRONG_SYMFS_FOR_BUILD_ID_CHECK = "data/wrong_symfs_for_build_id_check";
+
+static const std::string SYMFS_FOR_NO_SYMBOL_TABLE_WARNING = "data/symfs_for_no_symbol_table_warning";
+static const std::string SYMFS_FOR_READ_ELF_FILE_WARNING = "data/symfs_for_read_elf_file_warning";
+#endif  // SIMPLE_PERF_GET_TEST_DATA_H_
diff --git a/simpleperf/gtest_main.cpp b/simpleperf/gtest_main.cpp
new file mode 100644
index 0000000..395bba1
--- /dev/null
+++ b/simpleperf/gtest_main.cpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/test_utils.h>
+#include <ziparchive/zip_archive.h>
+
+#if defined(__ANDROID__)
+#include <sys/system_properties.h>
+#endif
+
+#include "get_test_data.h"
+#include "read_elf.h"
+#include "utils.h"
+
+static std::string testdata_dir;
+
+#if defined(__ANDROID__)
+static const std::string testdata_section = ".testzipdata";
+
+static bool ExtractTestDataFromElfSection() {
+  if (!MkdirWithParents(testdata_dir)) {
+    PLOG(ERROR) << "failed to create testdata_dir " << testdata_dir;
+    return false;
+  }
+  std::string content;
+  ElfStatus result = ReadSectionFromElfFile("/proc/self/exe", testdata_section, &content);
+  if (result != ElfStatus::NO_ERROR) {
+    LOG(ERROR) << "failed to read section " << testdata_section
+               << ": " << result;
+    return false;
+  }
+  TemporaryFile tmp_file;
+  if (!android::base::WriteStringToFile(content, tmp_file.path)) {
+    PLOG(ERROR) << "failed to write file " << tmp_file.path;
+    return false;
+  }
+  ArchiveHelper ahelper(tmp_file.fd, tmp_file.path);
+  if (!ahelper) {
+    LOG(ERROR) << "failed to open archive " << tmp_file.path;
+    return false;
+  }
+  ZipArchiveHandle& handle = ahelper.archive_handle();
+  void* cookie;
+  int ret = StartIteration(handle, &cookie, nullptr, nullptr);
+  if (ret != 0) {
+    LOG(ERROR) << "failed to start iterating zip entries";
+    return false;
+  }
+  std::unique_ptr<void, decltype(&EndIteration)> guard(cookie, EndIteration);
+  ZipEntry entry;
+  ZipString name;
+  while (Next(cookie, &entry, &name) == 0) {
+    std::string entry_name(name.name, name.name + name.name_length);
+    std::string path = testdata_dir + entry_name;
+    // Skip dir.
+    if (path.back() == '/') {
+      continue;
+    }
+    if (!MkdirWithParents(path)) {
+      LOG(ERROR) << "failed to create dir for " << path;
+      return false;
+    }
+    FileHelper fhelper = FileHelper::OpenWriteOnly(path);
+    if (!fhelper) {
+      PLOG(ERROR) << "failed to create file " << path;
+      return false;
+    }
+    std::vector<uint8_t> data(entry.uncompressed_length);
+    if (ExtractToMemory(handle, &entry, data.data(), data.size()) != 0) {
+      LOG(ERROR) << "failed to extract entry " << entry_name;
+      return false;
+    }
+    if (!android::base::WriteFully(fhelper.fd(), data.data(), data.size())) {
+      LOG(ERROR) << "failed to write file " << path;
+      return false;
+    }
+  }
+  return true;
+}
+
+class SavedPerfHardenProperty {
+ public:
+  SavedPerfHardenProperty() {
+    __system_property_get("security.perf_harden", prop_value_);
+    if (!android::base::ReadFileToString("/proc/sys/kernel/perf_event_paranoid",
+                                    &paranoid_value_)) {
+      PLOG(ERROR) << "failed to read /proc/sys/kernel/perf_event_paranoid";
+    }
+  }
+
+  ~SavedPerfHardenProperty() {
+    if (strlen(prop_value_) != 0) {
+      __system_property_set("security.perf_harden", prop_value_);
+      // Sleep one second to wait for security.perf_harden changing
+      // /proc/sys/kernel/perf_event_paranoid.
+      sleep(1);
+      std::string paranoid_value;
+      if (!android::base::ReadFileToString("/proc/sys/kernel/perf_event_paranoid",
+                                           &paranoid_value)) {
+        PLOG(ERROR) << "failed to read /proc/sys/kernel/perf_event_paranoid";
+        return;
+      }
+      if (paranoid_value_ != paranoid_value) {
+        LOG(ERROR) << "failed to restore /proc/sys/kernel/perf_event_paranoid";
+      }
+    }
+  }
+
+ private:
+  char prop_value_[PROP_VALUE_MAX];
+  std::string paranoid_value_;
+};
+
+#endif  // defined(__ANDROID__)
+
+int main(int argc, char** argv) {
+  InitLogging(argv, android::base::StderrLogger);
+  testing::InitGoogleTest(&argc, argv);
+  android::base::LogSeverity log_severity = android::base::WARNING;
+
+  for (int i = 1; i < argc; ++i) {
+    if (strcmp(argv[i], "-t") == 0 && i + 1 < argc) {
+      testdata_dir = argv[i + 1];
+      i++;
+    } else if (strcmp(argv[i], "--log") == 0) {
+      if (i + 1 < argc) {
+        ++i;
+        if (!GetLogSeverity(argv[i], &log_severity)) {
+          LOG(ERROR) << "Unknown log severity: " << argv[i];
+          return 1;
+        }
+      } else {
+        LOG(ERROR) << "Missing argument for --log option.\n";
+        return 1;
+      }
+    }
+  }
+  android::base::ScopedLogSeverity severity(log_severity);
+
+#if defined(__ANDROID__)
+  std::unique_ptr<TemporaryDir> tmp_dir;
+  if (!::testing::GTEST_FLAG(list_tests) && testdata_dir.empty()) {
+    tmp_dir.reset(new TemporaryDir);
+    testdata_dir = std::string(tmp_dir->path) + "/";
+    if (!ExtractTestDataFromElfSection()) {
+      LOG(ERROR) << "failed to extract test data from elf section";
+      return 1;
+    }
+  }
+
+  // A cts test PerfEventParanoidTest.java is testing if
+  // /proc/sys/kernel/perf_event_paranoid is 3, so restore perf_harden
+  // value after current test to not break that test.
+  SavedPerfHardenProperty saved_perf_harden;
+#endif
+
+  if (!::testing::GTEST_FLAG(list_tests) && testdata_dir.empty()) {
+    printf("Usage: %s -t <testdata_dir>\n", argv[0]);
+    return 1;
+  }
+  if (testdata_dir.back() != '/') {
+    testdata_dir.push_back('/');
+  }
+  LOG(INFO) << "testdata is in " << testdata_dir;
+  return RUN_ALL_TESTS();
+}
+
+std::string GetTestData(const std::string& filename) {
+  return testdata_dir + filename;
+}
+
+const std::string& GetTestDataDir() {
+  return testdata_dir;
+}
diff --git a/simpleperf/main.cpp b/simpleperf/main.cpp
new file mode 100644
index 0000000..a8a8935
--- /dev/null
+++ b/simpleperf/main.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "command.h"
+#include "utils.h"
+
+constexpr int SIMPLEPERF_VERSION = 1;
+
+int main(int argc, char** argv) {
+  InitLogging(argv, android::base::StderrLogger);
+  std::vector<std::string> args;
+  android::base::LogSeverity log_severity = android::base::INFO;
+
+  for (int i = 1; i < argc; ++i) {
+    if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) {
+      args.insert(args.begin(), "help");
+    } else if (strcmp(argv[i], "--log") == 0) {
+      if (i + 1 < argc) {
+        ++i;
+        if (!GetLogSeverity(argv[i], &log_severity)) {
+          LOG(ERROR) << "Unknown log severity: " << argv[i];
+          return 1;
+        }
+      } else {
+        LOG(ERROR) << "Missing argument for --log option.\n";
+        return 1;
+      }
+    } else if (strcmp(argv[i], "--version") == 0) {
+      LOG(INFO) << "Simpleperf version " << SIMPLEPERF_VERSION << ", revision "
+                << SIMPLEPERF_REVISION;
+      return 0;
+    } else {
+      args.push_back(argv[i]);
+    }
+  }
+  android::base::ScopedLogSeverity severity(log_severity);
+
+  if (args.empty()) {
+    args.push_back("help");
+  }
+  std::unique_ptr<Command> command = CreateCommandInstance(args[0]);
+  if (command == nullptr) {
+    LOG(ERROR) << "malformed command line: unknown command " << args[0];
+    return 1;
+  }
+  std::string command_name = args[0];
+  args.erase(args.begin());
+
+  LOG(DEBUG) << "command '" << command_name << "' starts running";
+  bool result = command->Run(args);
+  LOG(DEBUG) << "command '" << command_name << "' "
+             << (result ? "finished successfully" : "failed");
+  return result ? 0 : 1;
+}
diff --git a/simpleperf/nonlinux_support/include/asm/byteorder.h b/simpleperf/nonlinux_support/include/asm/byteorder.h
new file mode 100644
index 0000000..d118abc
--- /dev/null
+++ b/simpleperf/nonlinux_support/include/asm/byteorder.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
diff --git a/simpleperf/nonlinux_support/include/linux/ioctl.h b/simpleperf/nonlinux_support/include/linux/ioctl.h
new file mode 100644
index 0000000..f580736
--- /dev/null
+++ b/simpleperf/nonlinux_support/include/linux/ioctl.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define __IO(type, nr)
+#define __IOR(type, nr, size)
+#define __IOW(type, nr, size)
diff --git a/simpleperf/nonlinux_support/include/linux/types.h b/simpleperf/nonlinux_support/include/linux/types.h
new file mode 100644
index 0000000..6af5b8c
--- /dev/null
+++ b/simpleperf/nonlinux_support/include/linux/types.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+typedef uint8_t __u8;
+typedef uint16_t __u16;
+typedef uint32_t __u32;
+typedef int32_t __s32;
+typedef uint64_t __u64;
+typedef int64_t __s64;
diff --git a/simpleperf/nonlinux_support/nonlinux_support.cpp b/simpleperf/nonlinux_support/nonlinux_support.cpp
new file mode 100644
index 0000000..58c1ba0
--- /dev/null
+++ b/simpleperf/nonlinux_support/nonlinux_support.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Add fake functions to build successfully on darwin.
+#include <android-base/logging.h>
+
+#include "dwarf_unwind.h"
+#include "environment.h"
+
+std::vector<uint64_t> UnwindCallChain(ArchType, const ThreadEntry&, const RegSet&,
+                                      const char*, size_t, bool) {
+  return std::vector<uint64_t>();
+}
+
+bool GetKernelBuildId(BuildId*) {
+  return false;
+}
diff --git a/simpleperf/perf_event.h b/simpleperf/perf_event.h
new file mode 100644
index 0000000..7e7e48d
--- /dev/null
+++ b/simpleperf/perf_event.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_PERF_EVENT_H_
+#define SIMPLE_PERF_PERF_EVENT_H_
+
+#if defined(USE_BIONIC_UAPI_HEADERS)
+#include <uapi/linux/perf_event.h>
+#else
+#include <linux/perf_event.h>
+#endif
+
+#endif  // SIMPLE_PERF_PERF_EVENT_H_
diff --git a/simpleperf/perf_regs.cpp b/simpleperf/perf_regs.cpp
new file mode 100644
index 0000000..0b15398
--- /dev/null
+++ b/simpleperf/perf_regs.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perf_regs.h"
+
+#include <unordered_map>
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
+
+#include "perf_event.h"
+
+ArchType ScopedCurrentArch::current_arch = GetBuildArch();
+
+ArchType GetArchType(const std::string& arch) {
+  if (arch == "x86" || arch == "i686") {
+    return ARCH_X86_32;
+  } else if (arch == "x86_64") {
+    return ARCH_X86_64;
+  } else if (arch == "aarch64") {
+    return ARCH_ARM64;
+  } else if (android::base::StartsWith(arch, "arm")) {
+    return ARCH_ARM;
+  }
+  LOG(ERROR) << "unsupported arch: " << arch;
+  return ARCH_UNSUPPORTED;
+}
+
+ArchType GetArchForAbi(ArchType machine_arch, int abi) {
+  if (abi == PERF_SAMPLE_REGS_ABI_32) {
+    if (machine_arch == ARCH_X86_64) {
+      return ARCH_X86_32;
+    }
+    if (machine_arch == ARCH_ARM64) {
+      return ARCH_ARM;
+    }
+  }
+  return machine_arch;
+}
+
+std::string GetArchString(ArchType arch) {
+  switch (arch) {
+    case ARCH_X86_32:
+      return "x86";
+    case ARCH_X86_64:
+      return "x86_64";
+    case ARCH_ARM64:
+      return "arm64";
+    case ARCH_ARM:
+      return "arm";
+    default:
+      break;
+  }
+  return "unknown";
+}
+
+// If strict_check, must have arch1 == arch2.
+// Otherwise, allow X86_32 with X86_64, ARM with ARM64.
+bool IsArchTheSame(ArchType arch1, ArchType arch2, bool strict_check) {
+  if (strict_check) {
+    return arch1 == arch2;
+  }
+  switch (arch1) {
+    case ARCH_X86_32:
+    case ARCH_X86_64:
+      return arch2 == ARCH_X86_32 || arch2 == ARCH_X86_64;
+    case ARCH_ARM64:
+    case ARCH_ARM:
+      return arch2 == ARCH_ARM64 || arch2 == ARCH_ARM;
+    default:
+      break;
+  }
+  return arch1 == arch2;
+}
+
+uint64_t GetSupportedRegMask(ArchType arch) {
+  switch (arch) {
+    case ARCH_X86_32:
+      return ((1ULL << PERF_REG_X86_32_MAX) - 1);
+    case ARCH_X86_64:
+      return (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
+              ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) & ~(1ULL << PERF_REG_X86_GS));
+    case ARCH_ARM:
+      return ((1ULL << PERF_REG_ARM_MAX) - 1);
+    case ARCH_ARM64:
+      return ((1ULL << PERF_REG_ARM64_MAX) - 1);
+    default:
+      return 0;
+  }
+  return 0;
+}
+
+static std::unordered_map<size_t, std::string> x86_reg_map = {
+    {PERF_REG_X86_AX, "ax"},       {PERF_REG_X86_BX, "bx"}, {PERF_REG_X86_CX, "cx"},
+    {PERF_REG_X86_DX, "dx"},       {PERF_REG_X86_SI, "si"}, {PERF_REG_X86_DI, "di"},
+    {PERF_REG_X86_BP, "bp"},       {PERF_REG_X86_SP, "sp"}, {PERF_REG_X86_IP, "ip"},
+    {PERF_REG_X86_FLAGS, "flags"}, {PERF_REG_X86_CS, "cs"}, {PERF_REG_X86_SS, "ss"},
+    {PERF_REG_X86_DS, "ds"},       {PERF_REG_X86_ES, "es"}, {PERF_REG_X86_FS, "fs"},
+    {PERF_REG_X86_GS, "gs"},
+};
+
+static std::unordered_map<size_t, std::string> arm_reg_map = {
+    {PERF_REG_ARM_FP, "fp"}, {PERF_REG_ARM_IP, "ip"}, {PERF_REG_ARM_SP, "sp"},
+    {PERF_REG_ARM_LR, "lr"}, {PERF_REG_ARM_PC, "pc"},
+};
+
+static std::unordered_map<size_t, std::string> arm64_reg_map = {
+    {PERF_REG_ARM64_LR, "lr"}, {PERF_REG_ARM64_SP, "sp"}, {PERF_REG_ARM64_PC, "pc"},
+};
+
+std::string GetRegName(size_t regno, ArchType arch) {
+  // Cast regno to int type to avoid -Werror=type-limits.
+  int reg = static_cast<int>(regno);
+  switch (arch) {
+    case ARCH_X86_64: {
+      if (reg >= PERF_REG_X86_R8 && reg <= PERF_REG_X86_R15) {
+        return android::base::StringPrintf("r%d", reg - PERF_REG_X86_R8 + 8);
+      }
+    }  // go through
+    case ARCH_X86_32: {
+      auto it = x86_reg_map.find(reg);
+      CHECK(it != x86_reg_map.end()) << "unknown reg " << reg;
+      return it->second;
+    }
+    case ARCH_ARM: {
+      if (reg >= PERF_REG_ARM_R0 && reg <= PERF_REG_ARM_R10) {
+        return android::base::StringPrintf("r%d", reg - PERF_REG_ARM_R0);
+      }
+      auto it = arm_reg_map.find(reg);
+      CHECK(it != arm_reg_map.end()) << "unknown reg " << reg;
+      return it->second;
+    }
+    case ARCH_ARM64: {
+      if (reg >= PERF_REG_ARM64_X0 && reg <= PERF_REG_ARM64_X29) {
+        return android::base::StringPrintf("r%d", reg - PERF_REG_ARM64_X0);
+      }
+      auto it = arm64_reg_map.find(reg);
+      CHECK(it != arm64_reg_map.end()) << "unknown reg " << reg;
+      return it->second;
+    }
+    default:
+      return "unknown";
+  }
+}
+
+RegSet CreateRegSet(uint64_t valid_mask, const uint64_t* valid_regs) {
+  RegSet regs;
+  regs.valid_mask = valid_mask;
+  for (int i = 0, j = 0; i < 64; ++i) {
+    if ((valid_mask >> i) & 1) {
+      regs.data[i] = valid_regs[j++];
+    }
+  }
+  return regs;
+}
+
+bool GetRegValue(const RegSet& regs, size_t regno, uint64_t* value) {
+  CHECK_LT(regno, 64U);
+  if ((regs.valid_mask >> regno) & 1) {
+    *value = regs.data[regno];
+    return true;
+  }
+  return false;
+}
+
+bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) {
+  size_t regno;
+  switch (arch) {
+    case ARCH_X86_32:
+      regno = PERF_REG_X86_SP;
+      break;
+    case ARCH_X86_64:
+      regno = PERF_REG_X86_SP;
+      break;
+    case ARCH_ARM:
+      regno = PERF_REG_ARM_SP;
+      break;
+    case ARCH_ARM64:
+      regno = PERF_REG_ARM64_SP;
+      break;
+    default:
+      return false;
+  }
+  return GetRegValue(regs, regno, value);
+}
diff --git a/simpleperf/perf_regs.h b/simpleperf/perf_regs.h
new file mode 100644
index 0000000..ff13d4f
--- /dev/null
+++ b/simpleperf/perf_regs.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_PERF_REGS_H_
+#define SIMPLE_PERF_PERF_REGS_H_
+
+#if defined(USE_BIONIC_UAPI_HEADERS)
+#include <uapi/asm-x86/asm/perf_regs.h>
+#include <uapi/asm-arm/asm/perf_regs.h>
+#define perf_event_arm_regs perf_event_arm64_regs
+#include <uapi/asm-arm64/asm/perf_regs.h>
+#else
+#include <asm-x86/asm/perf_regs.h>
+#include <asm-arm/asm/perf_regs.h>
+#define perf_event_arm_regs perf_event_arm64_regs
+#include <asm-arm64/asm/perf_regs.h>
+#endif
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+enum ArchType {
+  ARCH_X86_32,
+  ARCH_X86_64,
+  ARCH_ARM,
+  ARCH_ARM64,
+  ARCH_UNSUPPORTED,
+};
+
+constexpr ArchType GetBuildArch() {
+#if defined(__i386__)
+  return ARCH_X86_32;
+#elif defined(__x86_64__)
+  return ARCH_X86_64;
+#elif defined(__aarch64__)
+  return ARCH_ARM64;
+#elif defined(__arm__)
+  return ARCH_ARM;
+#else
+  return ARCH_UNSUPPORTED;
+#endif
+}
+
+ArchType GetArchType(const std::string& arch);
+ArchType GetArchForAbi(ArchType machine_arch, int abi);
+std::string GetArchString(ArchType arch);
+bool IsArchTheSame(ArchType arch1, ArchType arch2, bool strict_check);
+uint64_t GetSupportedRegMask(ArchType arch);
+std::string GetRegName(size_t regno, ArchType arch);
+
+class ScopedCurrentArch {
+ public:
+  explicit ScopedCurrentArch(ArchType arch) : saved_arch(current_arch) {
+    current_arch = arch;
+  }
+  ~ScopedCurrentArch() {
+    current_arch = saved_arch;
+  }
+  static ArchType GetCurrentArch() {
+    return current_arch;
+  }
+
+ private:
+  ArchType saved_arch;
+  static ArchType current_arch;
+};
+
+struct RegSet {
+  uint64_t valid_mask;
+  uint64_t data[64];
+};
+
+RegSet CreateRegSet(uint64_t valid_mask, const uint64_t* valid_regs);
+
+bool GetRegValue(const RegSet& regs, size_t regno, uint64_t* value);
+bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value);
+
+#endif  // SIMPLE_PERF_PERF_REGS_H_
diff --git a/simpleperf/read_apk.cpp b/simpleperf/read_apk.cpp
new file mode 100644
index 0000000..6a6b55f
--- /dev/null
+++ b/simpleperf/read_apk.cpp
@@ -0,0 +1,192 @@
+/*
+**
+** Copyright 2016, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include "read_apk.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <memory>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <ziparchive/zip_archive.h>
+#include "read_elf.h"
+#include "utils.h"
+
+std::map<ApkInspector::ApkOffset, std::unique_ptr<EmbeddedElf>> ApkInspector::embedded_elf_cache_;
+
+EmbeddedElf* ApkInspector::FindElfInApkByOffset(const std::string& apk_path, uint64_t file_offset) {
+  // Already in cache?
+  ApkOffset ami(apk_path, file_offset);
+  auto it = embedded_elf_cache_.find(ami);
+  if (it != embedded_elf_cache_.end()) {
+    return it->second.get();
+  }
+  std::unique_ptr<EmbeddedElf> elf = FindElfInApkByOffsetWithoutCache(apk_path, file_offset);
+  EmbeddedElf* result = elf.get();
+  embedded_elf_cache_[ami] = std::move(elf);
+  return result;
+}
+
+std::unique_ptr<EmbeddedElf> ApkInspector::FindElfInApkByOffsetWithoutCache(const std::string& apk_path,
+                                                                            uint64_t file_offset) {
+  // Crack open the apk(zip) file and take a look.
+  if (!IsValidApkPath(apk_path)) {
+    return nullptr;
+  }
+
+  FileHelper fhelper = FileHelper::OpenReadOnly(apk_path);
+  if (!fhelper) {
+    return nullptr;
+  }
+
+  ArchiveHelper ahelper(fhelper.fd(), apk_path);
+  if (!ahelper) {
+    return nullptr;
+  }
+  ZipArchiveHandle &handle = ahelper.archive_handle();
+
+  // Iterate through the zip file. Look for a zip entry corresponding
+  // to an uncompressed blob whose range intersects with the mmap
+  // offset we're interested in.
+  void* iteration_cookie;
+  if (StartIteration(handle, &iteration_cookie, nullptr, nullptr) < 0) {
+    return nullptr;
+  }
+  ZipEntry zentry;
+  ZipString zname;
+  bool found = false;
+  int zrc;
+  while ((zrc = Next(iteration_cookie, &zentry, &zname)) == 0) {
+    if (zentry.method == kCompressStored &&
+        file_offset >= static_cast<uint64_t>(zentry.offset) &&
+        file_offset < static_cast<uint64_t>(zentry.offset + zentry.uncompressed_length)) {
+      // Found.
+      found = true;
+      break;
+    }
+  }
+  EndIteration(iteration_cookie);
+  if (!found) {
+    return nullptr;
+  }
+
+  // We found something in the zip file at the right spot. Is it an ELF?
+  if (lseek(fhelper.fd(), zentry.offset, SEEK_SET) != zentry.offset) {
+    PLOG(ERROR) << "lseek() failed in " << apk_path << " offset " << zentry.offset;
+    return nullptr;
+  }
+  std::string entry_name;
+  entry_name.resize(zname.name_length,'\0');
+  memcpy(&entry_name[0], zname.name, zname.name_length);
+  ElfStatus result = IsValidElfFile(fhelper.fd());
+  if (result != ElfStatus::NO_ERROR) {
+    LOG(ERROR) << "problems reading ELF from " << apk_path << " entry '"
+               << entry_name << "': " << result;
+    return nullptr;
+  }
+
+  // Elf found: add EmbeddedElf to vector, update cache.
+  return std::unique_ptr<EmbeddedElf>(new EmbeddedElf(apk_path, entry_name, zentry.offset,
+                                                      zentry.uncompressed_length));
+}
+
+std::unique_ptr<EmbeddedElf> ApkInspector::FindElfInApkByName(const std::string& apk_path,
+                                                              const std::string& elf_filename) {
+  if (!IsValidApkPath(apk_path)) {
+    return nullptr;
+  }
+  FileHelper fhelper = FileHelper::OpenReadOnly(apk_path);
+  if (!fhelper) {
+    return nullptr;
+  }
+  ArchiveHelper ahelper(fhelper.fd(), apk_path);
+  if (!ahelper) {
+    return nullptr;
+  }
+  ZipArchiveHandle& handle = ahelper.archive_handle();
+  ZipEntry zentry;
+  int32_t rc = FindEntry(handle, ZipString(elf_filename.c_str()), &zentry);
+  if (rc != 0) {
+    LOG(ERROR) << "failed to find " << elf_filename << " in " << apk_path
+        << ": " << ErrorCodeString(rc);
+    return nullptr;
+  }
+  if (zentry.method != kCompressStored || zentry.compressed_length != zentry.uncompressed_length) {
+    LOG(ERROR) << "shared library " << elf_filename << " in " << apk_path << " is compressed";
+    return nullptr;
+  }
+  return std::unique_ptr<EmbeddedElf>(new EmbeddedElf(apk_path, elf_filename, zentry.offset,
+                                                  zentry.uncompressed_length));
+}
+
+bool IsValidApkPath(const std::string& apk_path) {
+  static const char zip_preamble[] = {0x50, 0x4b, 0x03, 0x04 };
+  if (!IsRegularFile(apk_path)) {
+    return false;
+  }
+  std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
+  FILE* fp = fopen(apk_path.c_str(), mode.c_str());
+  if (fp == nullptr) {
+    return false;
+  }
+  char buf[4];
+  if (fread(buf, 4, 1, fp) != 1) {
+    fclose(fp);
+    return false;
+  }
+  fclose(fp);
+  return memcmp(buf, zip_preamble, 4) == 0;
+}
+
+// Refer file in apk in compliance with http://developer.android.com/reference/java/net/JarURLConnection.html.
+std::string GetUrlInApk(const std::string& apk_path, const std::string& elf_filename) {
+  return apk_path + "!/" + elf_filename;
+}
+
+std::tuple<bool, std::string, std::string> SplitUrlInApk(const std::string& path) {
+  size_t pos = path.find("!/");
+  if (pos == std::string::npos) {
+    return std::make_tuple(false, "", "");
+  }
+  return std::make_tuple(true, path.substr(0, pos), path.substr(pos + 2));
+}
+
+ElfStatus GetBuildIdFromApkFile(const std::string& apk_path, const std::string& elf_filename,
+                           BuildId* build_id) {
+  std::unique_ptr<EmbeddedElf> ee = ApkInspector::FindElfInApkByName(apk_path, elf_filename);
+  if (ee == nullptr) {
+    return ElfStatus::FILE_NOT_FOUND;
+  }
+  return GetBuildIdFromEmbeddedElfFile(apk_path, ee->entry_offset(), ee->entry_size(), build_id);
+}
+
+ElfStatus ParseSymbolsFromApkFile(const std::string& apk_path, const std::string& elf_filename,
+                             const BuildId& expected_build_id,
+                             const std::function<void(const ElfFileSymbol&)>& callback) {
+  std::unique_ptr<EmbeddedElf> ee = ApkInspector::FindElfInApkByName(apk_path, elf_filename);
+  if (ee == nullptr) {
+    return ElfStatus::FILE_NOT_FOUND;
+  }
+  return ParseSymbolsFromEmbeddedElfFile(apk_path, ee->entry_offset(), ee->entry_size(),
+                                         expected_build_id, callback);
+}
diff --git a/simpleperf/read_apk.h b/simpleperf/read_apk.h
new file mode 100644
index 0000000..65b5df8
--- /dev/null
+++ b/simpleperf/read_apk.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_READ_APK_H_
+#define SIMPLE_PERF_READ_APK_H_
+
+#include <stdint.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+
+#include "read_elf.h"
+
+// Container for info an on ELF file embedded into an APK file
+class EmbeddedElf {
+ public:
+  EmbeddedElf()
+      : entry_offset_(0)
+      , entry_size_(0)
+  {
+  }
+
+  EmbeddedElf(std::string filepath,
+              std::string entry_name,
+              size_t entry_offset,
+              size_t entry_size)
+      : filepath_(filepath)
+      , entry_name_(entry_name)
+      , entry_offset_(entry_offset)
+      , entry_size_(entry_size)
+  {
+  }
+
+  // Path to APK file
+  const std::string &filepath() const { return filepath_; }
+
+  // Entry name within zip archive
+  const std::string &entry_name() const { return entry_name_; }
+
+  // Offset of zip entry from start of containing APK file
+  uint64_t entry_offset() const { return entry_offset_; }
+
+  // Size of zip entry (length of embedded ELF)
+  uint32_t entry_size() const { return entry_size_; }
+
+ private:
+  std::string filepath_; // containing APK path
+  std::string entry_name_; // name of entry in zip index of embedded elf file
+  uint64_t entry_offset_; // offset of ELF from start of containing APK file
+  uint32_t entry_size_;  // size of ELF file in zip
+};
+
+// APK inspector helper class
+class ApkInspector {
+ public:
+  // Given an APK/ZIP/JAR file and an offset into that file, if the
+  // corresponding region of the APK corresponds to an uncompressed
+  // ELF file, then return pertinent info on the ELF.
+  static EmbeddedElf* FindElfInApkByOffset(const std::string& apk_path, uint64_t file_offset);
+  static std::unique_ptr<EmbeddedElf> FindElfInApkByName(const std::string& apk_path,
+                                                         const std::string& elf_filename);
+
+ private:
+  static std::unique_ptr<EmbeddedElf> FindElfInApkByOffsetWithoutCache(const std::string& apk_path,
+                                                                       uint64_t file_offset);
+
+  // First component of pair is APK file path, second is offset into APK.
+  typedef std::pair<std::string, uint64_t> ApkOffset;
+
+  static std::map<ApkOffset, std::unique_ptr<EmbeddedElf>> embedded_elf_cache_;
+};
+
+// Export for test only.
+bool IsValidApkPath(const std::string& apk_path);
+
+std::string GetUrlInApk(const std::string& apk_path, const std::string& elf_filename);
+std::tuple<bool, std::string, std::string> SplitUrlInApk(const std::string& path);
+
+ElfStatus GetBuildIdFromApkFile(const std::string& apk_path, const std::string& elf_filename,
+                                BuildId* build_id);
+
+ElfStatus ParseSymbolsFromApkFile(const std::string& apk_path, const std::string& elf_filename,
+                                  const BuildId& expected_build_id,
+                                  const std::function<void(const ElfFileSymbol&)>& callback);
+
+
+#endif  // SIMPLE_PERF_READ_APK_H_
diff --git a/simpleperf/read_apk_test.cpp b/simpleperf/read_apk_test.cpp
new file mode 100644
index 0000000..651e17e
--- /dev/null
+++ b/simpleperf/read_apk_test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "read_apk.h"
+
+#include <gtest/gtest.h>
+#include "get_test_data.h"
+#include "test_util.h"
+
+
+TEST(read_apk, IsValidApkPath) {
+  ASSERT_FALSE(IsValidApkPath("/dev/zero"));
+  ASSERT_FALSE(IsValidApkPath(GetTestData(ELF_FILE)));
+  ASSERT_TRUE(IsValidApkPath(GetTestData(APK_FILE)));
+}
+
+TEST(read_apk, FindElfInApkByOffset) {
+  ApkInspector inspector;
+  ASSERT_TRUE(inspector.FindElfInApkByOffset("/dev/null", 0) == nullptr);
+  ASSERT_TRUE(inspector.FindElfInApkByOffset(GetTestData(APK_FILE), 0) == nullptr);
+  // Test if we can read the EmbeddedElf using an offset inside its [offset, offset+size] range
+  // in the apk file.
+  EmbeddedElf* ee = inspector.FindElfInApkByOffset(GetTestData(APK_FILE),
+                                                   NATIVELIB_OFFSET_IN_APK + NATIVELIB_SIZE_IN_APK / 2);
+  ASSERT_TRUE(ee != nullptr);
+  ASSERT_EQ(NATIVELIB_IN_APK, ee->entry_name());
+  ASSERT_EQ(NATIVELIB_OFFSET_IN_APK, ee->entry_offset());
+  ASSERT_EQ(NATIVELIB_SIZE_IN_APK, ee->entry_size());
+}
+
+TEST(read_apk, FindElfInApkByName) {
+  ASSERT_TRUE(ApkInspector::FindElfInApkByName("/dev/null", "") == nullptr);
+  ASSERT_TRUE(ApkInspector::FindElfInApkByName(GetTestData(APK_FILE), "") == nullptr);
+  auto ee = ApkInspector::FindElfInApkByName(GetTestData(APK_FILE), NATIVELIB_IN_APK);
+  ASSERT_TRUE(ee != nullptr);
+  ASSERT_EQ(NATIVELIB_OFFSET_IN_APK, ee->entry_offset());
+  ASSERT_EQ(NATIVELIB_SIZE_IN_APK, ee->entry_size());
+}
+
+TEST(read_apk, GetBuildIdFromApkFile) {
+  BuildId build_id;
+  ASSERT_EQ(ElfStatus::NO_ERROR, GetBuildIdFromApkFile(GetTestData(APK_FILE), NATIVELIB_IN_APK, &build_id));
+  ASSERT_EQ(build_id, native_lib_build_id);
+}
+
+TEST(read_apk, ParseSymbolsFromApkFile) {
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_EQ(ElfStatus::NO_SYMBOL_TABLE,
+            ParseSymbolsFromApkFile(GetTestData(APK_FILE), NATIVELIB_IN_APK, native_lib_build_id,
+                                    std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+  CheckElfFileSymbols(symbols);
+}
diff --git a/simpleperf/read_elf.cpp b/simpleperf/read_elf.cpp
new file mode 100644
index 0000000..71d055e
--- /dev/null
+++ b/simpleperf/read_elf.cpp
@@ -0,0 +1,487 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "read_elf.h"
+#include "read_apk.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <limits>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-parameter"
+
+#include <llvm/ADT/StringRef.h>
+#include <llvm/Object/Binary.h>
+#include <llvm/Object/ELFObjectFile.h>
+#include <llvm/Object/ObjectFile.h>
+
+#pragma clang diagnostic pop
+
+#include "utils.h"
+
+#define ELF_NOTE_GNU "GNU"
+#define NT_GNU_BUILD_ID 3
+
+std::ostream& operator<<(std::ostream& os, const ElfStatus& status) {
+  switch (status) {
+    case ElfStatus::NO_ERROR:
+      os << "No error";
+      break;
+    case ElfStatus::FILE_NOT_FOUND:
+      os << "File not found";
+      break;
+    case ElfStatus::READ_FAILED:
+      os << "Read failed";
+      break;
+    case ElfStatus::FILE_MALFORMED:
+      os << "Malformed file";
+      break;
+    case ElfStatus::NO_SYMBOL_TABLE:
+      os << "No symbol table";
+      break;
+    case ElfStatus::NO_BUILD_ID:
+      os << "No build id";
+      break;
+    case ElfStatus::BUILD_ID_MISMATCH:
+      os << "Build id mismatch";
+      break;
+    case ElfStatus::SECTION_NOT_FOUND:
+      os << "Section not found";
+      break;
+  }
+  return os;
+}
+
+ElfStatus IsValidElfFile(int fd) {
+  static const char elf_magic[] = {0x7f, 'E', 'L', 'F'};
+  char buf[4];
+  if (!android::base::ReadFully(fd, buf, 4)) {
+    return ElfStatus::READ_FAILED;
+  }
+  if (memcmp(buf, elf_magic, 4) != 0) {
+    return ElfStatus::FILE_MALFORMED;
+  }
+  return ElfStatus::NO_ERROR;
+}
+
+ElfStatus IsValidElfPath(const std::string& filename) {
+  if (!IsRegularFile(filename)) {
+    return ElfStatus::FILE_NOT_FOUND;
+  }
+  std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
+  FILE* fp = fopen(filename.c_str(), mode.c_str());
+  if (fp == nullptr) {
+    return ElfStatus::READ_FAILED;
+  }
+  ElfStatus result = IsValidElfFile(fileno(fp));
+  fclose(fp);
+  return result;
+}
+
+static bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) {
+  const char* p = section;
+  const char* end = p + section_size;
+  while (p < end) {
+    CHECK_LE(p + 12, end);
+    size_t namesz = *reinterpret_cast<const uint32_t*>(p);
+    p += 4;
+    size_t descsz = *reinterpret_cast<const uint32_t*>(p);
+    p += 4;
+    uint32_t type = *reinterpret_cast<const uint32_t*>(p);
+    p += 4;
+    namesz = Align(namesz, 4);
+    descsz = Align(descsz, 4);
+    CHECK_LE(p + namesz + descsz, end);
+    if ((type == NT_GNU_BUILD_ID) && (strcmp(p, ELF_NOTE_GNU) == 0)) {
+      *build_id = BuildId(p + namesz, descsz);
+      return true;
+    }
+    p += namesz + descsz;
+  }
+  return false;
+}
+
+ElfStatus GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) {
+  std::string content;
+  if (!android::base::ReadFileToString(filename, &content)) {
+    return ElfStatus::READ_FAILED;
+  }
+  if (!GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id)) {
+    return ElfStatus::NO_BUILD_ID;
+  }
+  return ElfStatus::NO_ERROR;
+}
+
+template <class ELFT>
+ElfStatus GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, BuildId* build_id) {
+  for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
+    const llvm::object::ELFSectionRef& section_ref = *it;
+    if (section_ref.getType() == llvm::ELF::SHT_NOTE) {
+      llvm::StringRef data;
+      if (it->getContents(data)) {
+        return ElfStatus::READ_FAILED;
+      }
+      if (GetBuildIdFromNoteSection(reinterpret_cast<const char*>(data.data()),
+                                    data.size(), build_id)) {
+        return ElfStatus::NO_ERROR;
+      }
+    }
+  }
+  return ElfStatus::NO_BUILD_ID;
+}
+
+static ElfStatus GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
+  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
+    return GetBuildIdFromELFFile(elf, build_id);
+  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
+    return GetBuildIdFromELFFile(elf, build_id);
+  }
+  return ElfStatus::FILE_MALFORMED;
+}
+
+struct BinaryWrapper {
+  llvm::object::OwningBinary<llvm::object::Binary> binary;
+  llvm::object::ObjectFile* obj;
+
+  BinaryWrapper() : obj(nullptr) {
+  }
+};
+
+static ElfStatus OpenObjectFile(const std::string& filename, uint64_t file_offset,
+                                uint64_t file_size, BinaryWrapper* wrapper) {
+  FileHelper fhelper = FileHelper::OpenReadOnly(filename);
+  if (!fhelper) {
+    return ElfStatus::READ_FAILED;
+  }
+  if (file_size == 0) {
+    file_size = GetFileSize(filename);
+    if (file_size == 0) {
+      return ElfStatus::READ_FAILED;
+    }
+  }
+  auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset);
+  if (!buffer_or_err) {
+    return ElfStatus::READ_FAILED;
+  }
+  auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef());
+  if (!binary_or_err) {
+    return ElfStatus::READ_FAILED;
+  }
+  wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
+                                                                        std::move(buffer_or_err.get()));
+  wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
+  if (wrapper->obj == nullptr) {
+    return ElfStatus::FILE_MALFORMED;
+  }
+  return ElfStatus::NO_ERROR;
+}
+
+static ElfStatus OpenObjectFileFromString(const std::string& s, BinaryWrapper* wrapper) {
+  auto buffer = llvm::MemoryBuffer::getMemBuffer(s);
+  auto binary_or_err = llvm::object::createBinary(buffer->getMemBufferRef());
+  if (!binary_or_err) {
+    return ElfStatus::FILE_MALFORMED;
+  }
+  wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
+                                                                std::move(buffer));
+  wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
+  if (wrapper->obj == nullptr) {
+    return ElfStatus::FILE_MALFORMED;
+  }
+  return ElfStatus::NO_ERROR;
+}
+
+ElfStatus GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
+  ElfStatus result = IsValidElfPath(filename);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  return GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id);
+}
+
+ElfStatus GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
+                                        uint32_t file_size, BuildId* build_id) {
+  BinaryWrapper wrapper;
+  ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  return GetBuildIdFromObjectFile(wrapper.obj, build_id);
+}
+
+template <class ELFT>
+ElfStatus ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, const std::string& section_name,
+                                 std::string* content) {
+  for (llvm::object::section_iterator it = elf->section_begin(); it != elf->section_end(); ++it) {
+    llvm::StringRef name;
+    if (it->getName(name) || name != section_name) {
+      continue;
+    }
+    llvm::StringRef data;
+    std::error_code err = it->getContents(data);
+    if (err) {
+      return ElfStatus::READ_FAILED;
+    }
+    *content = data;
+    return ElfStatus::NO_ERROR;
+  }
+  return ElfStatus::SECTION_NOT_FOUND;
+}
+
+bool IsArmMappingSymbol(const char* name) {
+  // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and
+  // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol
+  // is ^\$(a|d|t|x)(\..*)?$
+  return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.');
+}
+
+void ReadSymbolTable(llvm::object::symbol_iterator sym_begin,
+                     llvm::object::symbol_iterator sym_end,
+                     const std::function<void(const ElfFileSymbol&)>& callback,
+                     bool is_arm) {
+  for (; sym_begin != sym_end; ++sym_begin) {
+    ElfFileSymbol symbol;
+    auto symbol_ref = static_cast<const llvm::object::ELFSymbolRef*>(&*sym_begin);
+    llvm::ErrorOr<llvm::object::section_iterator> section_it_or_err = symbol_ref->getSection();
+    if (!section_it_or_err) {
+      continue;
+    }
+
+    llvm::StringRef section_name;
+    if (section_it_or_err.get()->getName(section_name) || section_name.empty()) {
+      continue;
+    }
+    if (section_name == ".text") {
+      symbol.is_in_text_section = true;
+    }
+    llvm::ErrorOr<llvm::StringRef> symbol_name_or_err = symbol_ref->getName();
+    if (!symbol_name_or_err || symbol_name_or_err.get().empty()) {
+      continue;
+    }
+
+    symbol.name = symbol_name_or_err.get();
+    symbol.vaddr = symbol_ref->getValue();
+    if ((symbol.vaddr & 1) != 0 && is_arm) {
+      // Arm sets bit 0 to mark it as thumb code, remove the flag.
+      symbol.vaddr &= ~1;
+    }
+    symbol.len = symbol_ref->getSize();
+    llvm::object::SymbolRef::Type symbol_type = symbol_ref->getType();
+    if (symbol_type == llvm::object::SymbolRef::ST_Function) {
+      symbol.is_func = true;
+    } else if (symbol_type == llvm::object::SymbolRef::ST_Unknown) {
+      if (symbol.is_in_text_section) {
+        symbol.is_label = true;
+        if (is_arm) {
+          // Remove mapping symbols in arm.
+          const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0)
+                              ? symbol.name.c_str() + linker_prefix.size()
+                              : symbol.name.c_str();
+          if (IsArmMappingSymbol(p)) {
+            symbol.is_label = false;
+          }
+        }
+      }
+    }
+
+    callback(symbol);
+  }
+}
+
+template <class ELFT>
+void AddSymbolForPltSection(const llvm::object::ELFObjectFile<ELFT>* elf,
+                            const std::function<void(const ElfFileSymbol&)>& callback) {
+  // We may sample instructions in .plt section if the program
+  // calls functions from shared libraries. Different architectures use
+  // different formats to store .plt section, so it needs a lot of work to match
+  // instructions in .plt section to symbols. As samples in .plt section rarely
+  // happen, and .plt section can hardly be a performance bottleneck, we can
+  // just use a symbol @plt to represent instructions in .plt section.
+  for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
+    const llvm::object::ELFSectionRef& section_ref = *it;
+    llvm::StringRef section_name;
+    std::error_code err = section_ref.getName(section_name);
+    if (err || section_name != ".plt") {
+      continue;
+    }
+    const auto* shdr = elf->getSection(section_ref.getRawDataRefImpl());
+    if (shdr == nullptr) {
+      return;
+    }
+    ElfFileSymbol symbol;
+    symbol.vaddr = shdr->sh_addr;
+    symbol.len = shdr->sh_size;
+    symbol.is_func = true;
+    symbol.is_label = true;
+    symbol.is_in_text_section = true;
+    symbol.name = "@plt";
+    callback(symbol);
+    return;
+  }
+}
+
+template <class ELFT>
+ElfStatus ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf,
+                                  const std::function<void(const ElfFileSymbol&)>& callback) {
+  auto machine = elf->getELFFile()->getHeader()->e_machine;
+  bool is_arm = (machine == llvm::ELF::EM_ARM || machine == llvm::ELF::EM_AARCH64);
+  AddSymbolForPltSection(elf, callback);
+  if (elf->symbol_begin() != elf->symbol_end()) {
+    ReadSymbolTable(elf->symbol_begin(), elf->symbol_end(), callback, is_arm);
+    return ElfStatus::NO_ERROR;
+  } else if (elf->dynamic_symbol_begin()->getRawDataRefImpl() != llvm::object::DataRefImpl()) {
+    ReadSymbolTable(elf->dynamic_symbol_begin(), elf->dynamic_symbol_end(), callback, is_arm);
+  }
+  std::string debugdata;
+  ElfStatus result = ReadSectionFromELFFile(elf, ".gnu_debugdata", &debugdata);
+  if (result == ElfStatus::SECTION_NOT_FOUND) {
+    return ElfStatus::NO_SYMBOL_TABLE;
+  } else if (result == ElfStatus::NO_ERROR) {
+    std::string decompressed_data;
+    if (XzDecompress(debugdata, &decompressed_data)) {
+      BinaryWrapper wrapper;
+      result = OpenObjectFileFromString(decompressed_data, &wrapper);
+      if (result == ElfStatus::NO_ERROR) {
+        if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
+          return ParseSymbolsFromELFFile(elf, callback);
+        } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
+          return ParseSymbolsFromELFFile(elf, callback);
+        } else {
+          return ElfStatus::FILE_MALFORMED;
+        }
+      }
+    }
+  }
+  return result;
+}
+
+ElfStatus MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id) {
+  if (expected_build_id.IsEmpty()) {
+    return ElfStatus::NO_ERROR;
+  }
+  BuildId real_build_id;
+  ElfStatus result = GetBuildIdFromObjectFile(obj, &real_build_id);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  if (expected_build_id != real_build_id) {
+    return ElfStatus::BUILD_ID_MISMATCH;
+  }
+  return ElfStatus::NO_ERROR;
+}
+
+ElfStatus ParseSymbolsFromElfFile(const std::string& filename,
+                                  const BuildId& expected_build_id,
+                                  const std::function<void(const ElfFileSymbol&)>& callback) {
+  ElfStatus result = IsValidElfPath(filename);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback);
+}
+
+ElfStatus ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
+                                     uint32_t file_size, const BuildId& expected_build_id,
+                                     const std::function<void(const ElfFileSymbol&)>& callback) {
+  BinaryWrapper wrapper;
+  ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  result = MatchBuildId(wrapper.obj, expected_build_id);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
+    return ParseSymbolsFromELFFile(elf, callback);
+  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
+    return ParseSymbolsFromELFFile(elf, callback);
+  }
+  return ElfStatus::FILE_MALFORMED;
+}
+
+template <class ELFT>
+ElfStatus ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) {
+  bool has_vaddr = false;
+  uint64_t min_addr = std::numeric_limits<uint64_t>::max();
+  for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) {
+    if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) {
+      if (it->p_vaddr < min_addr) {
+        min_addr = it->p_vaddr;
+        has_vaddr = true;
+      }
+    }
+  }
+  if (!has_vaddr) {
+    return ElfStatus::FILE_MALFORMED;
+  }
+  *p_vaddr = min_addr;
+  return ElfStatus::NO_ERROR;
+}
+
+ElfStatus ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename,
+                                                     const BuildId& expected_build_id,
+                                                     uint64_t* min_vaddr) {
+  ElfStatus result = IsValidElfPath(filename);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  BinaryWrapper wrapper;
+  result = OpenObjectFile(filename, 0, 0, &wrapper);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  result = MatchBuildId(wrapper.obj, expected_build_id);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+
+  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
+    return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
+  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
+    return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
+  } else {
+    return ElfStatus::FILE_MALFORMED;
+  }
+}
+
+ElfStatus ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
+                                 std::string* content) {
+  ElfStatus result = IsValidElfPath(filename);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  BinaryWrapper wrapper;
+  result = OpenObjectFile(filename, 0, 0, &wrapper);
+  if (result != ElfStatus::NO_ERROR) {
+    return result;
+  }
+  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
+    return ReadSectionFromELFFile(elf, section_name, content);
+  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
+    return ReadSectionFromELFFile(elf, section_name, content);
+  } else {
+    return ElfStatus::FILE_MALFORMED;
+  }
+}
diff --git a/simpleperf/read_elf.h b/simpleperf/read_elf.h
new file mode 100644
index 0000000..5a916ad
--- /dev/null
+++ b/simpleperf/read_elf.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_READ_ELF_H_
+#define SIMPLE_PERF_READ_ELF_H_
+
+#include <functional>
+#include <ostream>
+#include <string>
+#include "build_id.h"
+
+// Read ELF functions are called in different situations, so it is hard to
+// decide whether to report error or not. So read ELF functions don't report
+// error when something wrong happens, instead they return ElfStatus, which
+// identifies different errors met while reading elf file.
+enum ElfStatus {
+  NO_ERROR,
+  FILE_NOT_FOUND,
+  READ_FAILED,
+  FILE_MALFORMED,
+  NO_SYMBOL_TABLE,
+  NO_BUILD_ID,
+  BUILD_ID_MISMATCH,
+  SECTION_NOT_FOUND,
+};
+
+std::ostream& operator<<(std::ostream& os, const ElfStatus& status);
+
+ElfStatus GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id);
+ElfStatus GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id);
+ElfStatus GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
+                                        uint32_t file_size, BuildId* build_id);
+
+// The symbol prefix used to indicate that the symbol belongs to android linker.
+static const std::string linker_prefix = "__dl_";
+
+struct ElfFileSymbol {
+  uint64_t vaddr;
+  uint64_t len;
+  bool is_func;
+  bool is_label;
+  bool is_in_text_section;
+  std::string name;
+
+  ElfFileSymbol() : vaddr(0), len(0), is_func(false), is_label(false), is_in_text_section(false) {
+  }
+};
+
+ElfStatus ParseSymbolsFromElfFile(const std::string& filename,
+                                  const BuildId& expected_build_id,
+                                  const std::function<void(const ElfFileSymbol&)>& callback);
+ElfStatus ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
+                                          uint32_t file_size, const BuildId& expected_build_id,
+                                          const std::function<void(const ElfFileSymbol&)>& callback);
+
+ElfStatus ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename,
+                                                     const BuildId& expected_build_id,
+                                                     uint64_t* min_addr);
+
+ElfStatus ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
+                                 std::string* content);
+
+// Expose the following functions for unit tests.
+bool IsArmMappingSymbol(const char* name);
+ElfStatus IsValidElfFile(int fd);
+ElfStatus IsValidElfPath(const std::string& filename);
+
+#endif  // SIMPLE_PERF_READ_ELF_H_
diff --git a/simpleperf/read_elf_test.cpp b/simpleperf/read_elf_test.cpp
new file mode 100644
index 0000000..f60b552
--- /dev/null
+++ b/simpleperf/read_elf_test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "read_elf.h"
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include <android-base/file.h>
+#include <android-base/test_utils.h>
+
+#include "get_test_data.h"
+#include "test_util.h"
+
+TEST(read_elf, GetBuildIdFromElfFile) {
+  BuildId build_id;
+  ASSERT_EQ(ElfStatus::NO_ERROR, GetBuildIdFromElfFile(GetTestData(ELF_FILE), &build_id));
+  ASSERT_EQ(build_id, BuildId(elf_file_build_id));
+}
+
+TEST(read_elf, GetBuildIdFromEmbeddedElfFile) {
+  BuildId build_id;
+  ASSERT_EQ(ElfStatus::NO_ERROR, GetBuildIdFromEmbeddedElfFile(GetTestData(APK_FILE), NATIVELIB_OFFSET_IN_APK,
+                                            NATIVELIB_SIZE_IN_APK, &build_id));
+  ASSERT_EQ(build_id, native_lib_build_id);
+}
+
+void ParseSymbol(const ElfFileSymbol& symbol, std::map<std::string, ElfFileSymbol>* symbols) {
+  (*symbols)[symbol.name] = symbol;
+}
+
+static void CheckGlobalVariableSymbols(const std::map<std::string, ElfFileSymbol>& symbols) {
+  auto pos = symbols.find("GlobalVar");
+  ASSERT_NE(pos, symbols.end());
+  ASSERT_FALSE(pos->second.is_func);
+}
+
+static void CheckFunctionSymbols(const std::map<std::string, ElfFileSymbol>& symbols) {
+  auto pos = symbols.find("GlobalFunc");
+  ASSERT_NE(pos, symbols.end());
+  ASSERT_TRUE(pos->second.is_func);
+  ASSERT_TRUE(pos->second.is_in_text_section);
+}
+
+void CheckElfFileSymbols(const std::map<std::string, ElfFileSymbol>& symbols) {
+  CheckGlobalVariableSymbols(symbols);
+  CheckFunctionSymbols(symbols);
+}
+
+TEST(read_elf, parse_symbols_from_elf_file_with_correct_build_id) {
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_EQ(ElfStatus::NO_ERROR, ParseSymbolsFromElfFile(GetTestData(ELF_FILE), elf_file_build_id,
+                                      std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+  CheckElfFileSymbols(symbols);
+}
+
+TEST(read_elf, parse_symbols_from_elf_file_without_build_id) {
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_EQ(ElfStatus::NO_ERROR, ParseSymbolsFromElfFile(GetTestData(ELF_FILE), BuildId(),
+                                      std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+  CheckElfFileSymbols(symbols);
+}
+
+TEST(read_elf, parse_symbols_from_elf_file_with_wrong_build_id) {
+  BuildId build_id("01010101010101010101");
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_EQ(ElfStatus::BUILD_ID_MISMATCH, ParseSymbolsFromElfFile(GetTestData(ELF_FILE), build_id,
+                                       std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+}
+
+TEST(read_elf, ParseSymbolsFromEmbeddedElfFile) {
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_EQ(ElfStatus::NO_SYMBOL_TABLE, ParseSymbolsFromEmbeddedElfFile(GetTestData(APK_FILE), NATIVELIB_OFFSET_IN_APK,
+                                              NATIVELIB_SIZE_IN_APK, native_lib_build_id,
+                                              std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+  CheckElfFileSymbols(symbols);
+}
+
+TEST(read_elf, ParseSymbolFromMiniDebugInfoElfFile) {
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_EQ(ElfStatus::NO_ERROR, ParseSymbolsFromElfFile(GetTestData(ELF_FILE_WITH_MINI_DEBUG_INFO), BuildId(),
+                                      std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+  CheckFunctionSymbols(symbols);
+}
+
+TEST(read_elf, arm_mapping_symbol) {
+  ASSERT_TRUE(IsArmMappingSymbol("$a"));
+  ASSERT_FALSE(IsArmMappingSymbol("$b"));
+  ASSERT_TRUE(IsArmMappingSymbol("$a.anything"));
+  ASSERT_FALSE(IsArmMappingSymbol("$a_no_dot"));
+}
+
+TEST(read_elf, IsValidElfPath) {
+  ASSERT_NE(ElfStatus::NO_ERROR, IsValidElfPath("/dev/zero"));
+  TemporaryFile tmp_file;
+  ASSERT_EQ(ElfStatus::READ_FAILED, IsValidElfPath(tmp_file.path));
+  ASSERT_TRUE(android::base::WriteStringToFile("wrong format for elf", tmp_file.path));
+  ASSERT_EQ(ElfStatus::FILE_MALFORMED, IsValidElfPath(tmp_file.path));
+  ASSERT_EQ(ElfStatus::NO_ERROR, IsValidElfPath(GetTestData(ELF_FILE)));
+}
+
+TEST(read_elf, check_symbol_for_plt_section) {
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_EQ(ElfStatus::NO_ERROR, ParseSymbolsFromElfFile(GetTestData(ELF_FILE), BuildId(),
+                                      std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+  ASSERT_NE(symbols.find("@plt"), symbols.end());
+}
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
new file mode 100644
index 0000000..587d23d
--- /dev/null
+++ b/simpleperf/record.cpp
@@ -0,0 +1,907 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "record.h"
+
+#include <inttypes.h>
+#include <algorithm>
+#include <unordered_map>
+
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+
+#include "dso.h"
+#include "environment.h"
+#include "perf_regs.h"
+#include "tracing.h"
+#include "utils.h"
+
+static std::string RecordTypeToString(int record_type) {
+  static std::unordered_map<int, std::string> record_type_names = {
+      {PERF_RECORD_MMAP, "mmap"},
+      {PERF_RECORD_LOST, "lost"},
+      {PERF_RECORD_COMM, "comm"},
+      {PERF_RECORD_EXIT, "exit"},
+      {PERF_RECORD_THROTTLE, "throttle"},
+      {PERF_RECORD_UNTHROTTLE, "unthrottle"},
+      {PERF_RECORD_FORK, "fork"},
+      {PERF_RECORD_READ, "read"},
+      {PERF_RECORD_SAMPLE, "sample"},
+      {PERF_RECORD_BUILD_ID, "build_id"},
+      {PERF_RECORD_MMAP2, "mmap2"},
+      {PERF_RECORD_TRACING_DATA, "tracing_data"},
+      {SIMPLE_PERF_RECORD_KERNEL_SYMBOL, "kernel_symbol"},
+      {SIMPLE_PERF_RECORD_DSO, "dso"},
+      {SIMPLE_PERF_RECORD_SYMBOL, "symbol"},
+  };
+
+  auto it = record_type_names.find(record_type);
+  if (it != record_type_names.end()) {
+    return it->second;
+  }
+  return android::base::StringPrintf("unknown(%d)", record_type);
+}
+
+template <class T>
+void MoveFromBinaryFormat(T* data_p, size_t n, const char*& p) {
+  size_t size = n * sizeof(T);
+  memcpy(data_p, p, size);
+  p += size;
+}
+
+template <class T>
+void MoveToBinaryFormat(const T& data, char*& p) {
+  *reinterpret_cast<T*>(p) = data;
+  p += sizeof(T);
+}
+
+template <>
+void MoveToBinaryFormat(const RecordHeader& data, char*& p) {
+  data.MoveToBinaryFormat(p);
+}
+
+template <class T>
+void MoveToBinaryFormat(const T* data_p, size_t n, char*& p) {
+  size_t size = n * sizeof(T);
+  memcpy(p, data_p, size);
+  p += size;
+}
+
+SampleId::SampleId() { memset(this, 0, sizeof(SampleId)); }
+
+// Return sample_id size in binary format.
+size_t SampleId::CreateContent(const perf_event_attr& attr, uint64_t event_id) {
+  sample_id_all = attr.sample_id_all;
+  sample_type = attr.sample_type;
+  id_data.id = event_id;
+  // Other data are not necessary. TODO: Set missing SampleId data.
+  return Size();
+}
+
+void SampleId::ReadFromBinaryFormat(const perf_event_attr& attr, const char* p,
+                                    const char* end) {
+  sample_id_all = attr.sample_id_all;
+  sample_type = attr.sample_type;
+  if (sample_id_all) {
+    if (sample_type & PERF_SAMPLE_TID) {
+      MoveFromBinaryFormat(tid_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_TIME) {
+      MoveFromBinaryFormat(time_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_ID) {
+      MoveFromBinaryFormat(id_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_STREAM_ID) {
+      MoveFromBinaryFormat(stream_id_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_CPU) {
+      MoveFromBinaryFormat(cpu_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_IDENTIFIER) {
+      MoveFromBinaryFormat(id_data, p);
+    }
+  }
+  CHECK_LE(p, end);
+  if (p < end) {
+    LOG(DEBUG) << "Record SampleId part has " << end - p << " bytes left\n";
+  }
+}
+
+void SampleId::WriteToBinaryFormat(char*& p) const {
+  if (sample_id_all) {
+    if (sample_type & PERF_SAMPLE_TID) {
+      MoveToBinaryFormat(tid_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_TIME) {
+      MoveToBinaryFormat(time_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_ID) {
+      MoveToBinaryFormat(id_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_STREAM_ID) {
+      MoveToBinaryFormat(stream_id_data, p);
+    }
+    if (sample_type & PERF_SAMPLE_CPU) {
+      MoveToBinaryFormat(cpu_data, p);
+    }
+  }
+}
+
+void SampleId::Dump(size_t indent) const {
+  if (sample_id_all) {
+    if (sample_type & PERF_SAMPLE_TID) {
+      PrintIndented(indent, "sample_id: pid %u, tid %u\n", tid_data.pid,
+                    tid_data.tid);
+    }
+    if (sample_type & PERF_SAMPLE_TIME) {
+      PrintIndented(indent, "sample_id: time %" PRId64 "\n", time_data.time);
+    }
+    if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
+      PrintIndented(indent, "sample_id: id %" PRId64 "\n", id_data.id);
+    }
+    if (sample_type & PERF_SAMPLE_STREAM_ID) {
+      PrintIndented(indent, "sample_id: stream_id %" PRId64 "\n",
+                    stream_id_data.stream_id);
+    }
+    if (sample_type & PERF_SAMPLE_CPU) {
+      PrintIndented(indent, "sample_id: cpu %u, res %u\n", cpu_data.cpu,
+                    cpu_data.res);
+    }
+  }
+}
+
+size_t SampleId::Size() const {
+  size_t size = 0;
+  if (sample_id_all) {
+    if (sample_type & PERF_SAMPLE_TID) {
+      size += sizeof(PerfSampleTidType);
+    }
+    if (sample_type & PERF_SAMPLE_TIME) {
+      size += sizeof(PerfSampleTimeType);
+    }
+    if (sample_type & PERF_SAMPLE_ID) {
+      size += sizeof(PerfSampleIdType);
+    }
+    if (sample_type & PERF_SAMPLE_STREAM_ID) {
+      size += sizeof(PerfSampleStreamIdType);
+    }
+    if (sample_type & PERF_SAMPLE_CPU) {
+      size += sizeof(PerfSampleCpuType);
+    }
+    if (sample_type & PERF_SAMPLE_IDENTIFIER) {
+      size += sizeof(PerfSampleIdType);
+    }
+  }
+  return size;
+}
+
+Record::Record(Record&& other) {
+  header = other.header;
+  sample_id = other.sample_id;
+  binary_ = other.binary_;
+  own_binary_ = other.own_binary_;
+  other.binary_ = nullptr;
+  other.own_binary_ = false;
+}
+
+void Record::Dump(size_t indent) const {
+  PrintIndented(indent, "record %s: type %u, misc %u, size %u\n",
+                RecordTypeToString(type()).c_str(), type(), misc(), size());
+  DumpData(indent + 1);
+  sample_id.Dump(indent + 1);
+}
+
+uint64_t Record::Timestamp() const { return sample_id.time_data.time; }
+uint32_t Record::Cpu() const { return sample_id.cpu_data.cpu; }
+
+void Record::UpdateBinary(const char* new_binary) {
+  if (own_binary_) {
+    delete[] binary_;
+  }
+  own_binary_ = true;
+  binary_ = new_binary;
+}
+
+MmapRecord::MmapRecord(const perf_event_attr& attr, const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  data = reinterpret_cast<const MmapRecordDataType*>(p);
+  p += sizeof(*data);
+  filename = p;
+  p += Align(strlen(filename) + 1, 8);
+  CHECK_LE(p, end);
+  sample_id.ReadFromBinaryFormat(attr, p, end);
+}
+
+MmapRecord::MmapRecord(const perf_event_attr& attr, bool in_kernel,
+                       uint32_t pid, uint32_t tid, uint64_t addr, uint64_t len,
+                       uint64_t pgoff, const std::string& filename,
+                       uint64_t event_id, uint64_t time) {
+  SetTypeAndMisc(PERF_RECORD_MMAP,
+                 in_kernel ? PERF_RECORD_MISC_KERNEL : PERF_RECORD_MISC_USER);
+  sample_id.CreateContent(attr, event_id);
+  sample_id.time_data.time = time;
+  MmapRecordDataType data;
+  data.pid = pid;
+  data.tid = tid;
+  data.addr = addr;
+  data.len = len;
+  data.pgoff = pgoff;
+  SetDataAndFilename(data, filename);
+}
+
+void MmapRecord::SetDataAndFilename(const MmapRecordDataType& data,
+                                    const std::string& filename) {
+  SetSize(header_size() + sizeof(data) + Align(filename.size() + 1, 8) +
+          sample_id.Size());
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  this->data = reinterpret_cast<MmapRecordDataType*>(p);
+  MoveToBinaryFormat(data, p);
+  this->filename = p;
+  strcpy(p, filename.c_str());
+  p += Align(filename.size() + 1, 8);
+  sample_id.WriteToBinaryFormat(p);
+  UpdateBinary(new_binary);
+}
+
+void MmapRecord::DumpData(size_t indent) const {
+  PrintIndented(indent,
+                "pid %u, tid %u, addr 0x%" PRIx64 ", len 0x%" PRIx64 "\n",
+                data->pid, data->tid, data->addr, data->len);
+  PrintIndented(indent, "pgoff 0x%" PRIx64 ", filename %s\n", data->pgoff,
+                filename);
+}
+
+Mmap2Record::Mmap2Record(const perf_event_attr& attr, const char* p)
+    : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  MoveFromBinaryFormat(data, p);
+  filename = p;
+  p += Align(strlen(filename) + 1, 8);
+  CHECK_LE(p, end);
+  sample_id.ReadFromBinaryFormat(attr, p, end);
+}
+
+void Mmap2Record::SetDataAndFilename(const Mmap2RecordDataType& data,
+                                     const std::string& filename) {
+  SetSize(header_size() + sizeof(data) + Align(filename.size() + 1, 8) +
+          sample_id.Size());
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  this->data = reinterpret_cast<Mmap2RecordDataType*>(p);
+  MoveToBinaryFormat(data, p);
+  this->filename = p;
+  strcpy(p, filename.c_str());
+  p += Align(filename.size() + 1, 8);
+  sample_id.WriteToBinaryFormat(p);
+  UpdateBinary(new_binary);
+}
+
+void Mmap2Record::DumpData(size_t indent) const {
+  PrintIndented(indent,
+                "pid %u, tid %u, addr 0x%" PRIx64 ", len 0x%" PRIx64 "\n",
+                data->pid, data->tid, data->addr, data->len);
+  PrintIndented(indent, "pgoff 0x" PRIx64 ", maj %u, min %u, ino %" PRId64
+                        ", ino_generation %" PRIu64 "\n",
+                data->pgoff, data->maj, data->min, data->ino,
+                data->ino_generation);
+  PrintIndented(indent, "prot %u, flags %u, filenames %s\n", data->prot,
+                data->flags, filename);
+}
+
+CommRecord::CommRecord(const perf_event_attr& attr, const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  data = reinterpret_cast<const CommRecordDataType*>(p);
+  p += sizeof(*data);
+  comm = p;
+  p += Align(strlen(p) + 1, 8);
+  CHECK_LE(p, end);
+  sample_id.ReadFromBinaryFormat(attr, p, end);
+}
+
+CommRecord::CommRecord(const perf_event_attr& attr, uint32_t pid, uint32_t tid,
+                       const std::string& comm, uint64_t event_id) {
+  SetTypeAndMisc(PERF_RECORD_COMM, 0);
+  CommRecordDataType data;
+  data.pid = pid;
+  data.tid = tid;
+  size_t sample_id_size = sample_id.CreateContent(attr, event_id);
+  SetSize(header_size() + sizeof(data) + Align(comm.size() + 1, 8) +
+          sample_id_size);
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  this->data = reinterpret_cast<CommRecordDataType*>(p);
+  MoveToBinaryFormat(data, p);
+  this->comm = p;
+  strcpy(p, comm.c_str());
+  p += Align(comm.size() + 1, 8);
+  sample_id.WriteToBinaryFormat(p);
+  UpdateBinary(new_binary);
+}
+
+void CommRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "pid %u, tid %u, comm %s\n", data->pid, data->tid,
+                comm);
+}
+
+ExitOrForkRecord::ExitOrForkRecord(const perf_event_attr& attr, const char* p)
+    : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  data = reinterpret_cast<const ExitOrForkRecordDataType*>(p);
+  p += sizeof(*data);
+  CHECK_LE(p, end);
+  sample_id.ReadFromBinaryFormat(attr, p, end);
+}
+
+void ExitOrForkRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "pid %u, ppid %u, tid %u, ptid %u\n", data->pid,
+                data->ppid, data->tid, data->ptid);
+}
+
+ForkRecord::ForkRecord(const perf_event_attr& attr, uint32_t pid, uint32_t tid,
+                       uint32_t ppid, uint32_t ptid, uint64_t event_id) {
+  SetTypeAndMisc(PERF_RECORD_FORK, 0);
+  ExitOrForkRecordDataType data;
+  data.pid = pid;
+  data.ppid = ppid;
+  data.tid = tid;
+  data.ptid = ptid;
+  data.time = 0;
+  size_t sample_id_size = sample_id.CreateContent(attr, event_id);
+  SetSize(header_size() + sizeof(data) + sample_id_size);
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  this->data = reinterpret_cast<ExitOrForkRecordDataType*>(p);
+  MoveToBinaryFormat(data, p);
+  sample_id.WriteToBinaryFormat(p);
+  UpdateBinary(new_binary);
+}
+
+LostRecord::LostRecord(const perf_event_attr& attr, const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  MoveFromBinaryFormat(id, p);
+  MoveFromBinaryFormat(lost, p);
+  CHECK_LE(p, end);
+  sample_id.ReadFromBinaryFormat(attr, p, end);
+}
+
+void LostRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "id %" PRIu64 ", lost %" PRIu64 "\n", id, lost);
+}
+
+SampleRecord::SampleRecord(const perf_event_attr& attr, const char* p)
+    : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  sample_type = attr.sample_type;
+
+  if (sample_type & PERF_SAMPLE_IDENTIFIER) {
+    MoveFromBinaryFormat(id_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_IP) {
+    MoveFromBinaryFormat(ip_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_TID) {
+    MoveFromBinaryFormat(tid_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_TIME) {
+    MoveFromBinaryFormat(time_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_ADDR) {
+    MoveFromBinaryFormat(addr_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_ID) {
+    MoveFromBinaryFormat(id_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_STREAM_ID) {
+    MoveFromBinaryFormat(stream_id_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_CPU) {
+    MoveFromBinaryFormat(cpu_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_PERIOD) {
+    MoveFromBinaryFormat(period_data, p);
+  }
+  if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+    MoveFromBinaryFormat(callchain_data.ip_nr, p);
+    callchain_data.ips = reinterpret_cast<const uint64_t*>(p);
+    p += callchain_data.ip_nr * sizeof(uint64_t);
+  }
+  if (sample_type & PERF_SAMPLE_RAW) {
+    MoveFromBinaryFormat(raw_data.size, p);
+    raw_data.data = p;
+    p += raw_data.size;
+  }
+  if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+    MoveFromBinaryFormat(branch_stack_data.stack_nr, p);
+    branch_stack_data.stack = reinterpret_cast<const BranchStackItemType*>(p);
+    p += branch_stack_data.stack_nr * sizeof(BranchStackItemType);
+  }
+  if (sample_type & PERF_SAMPLE_REGS_USER) {
+    MoveFromBinaryFormat(regs_user_data.abi, p);
+    if (regs_user_data.abi == 0) {
+      regs_user_data.reg_mask = 0;
+    } else {
+      regs_user_data.reg_mask = attr.sample_regs_user;
+      size_t bit_nr = 0;
+      for (size_t i = 0; i < 64; ++i) {
+        if ((regs_user_data.reg_mask >> i) & 1) {
+          bit_nr++;
+        }
+      }
+      regs_user_data.reg_nr = bit_nr;
+      regs_user_data.regs = reinterpret_cast<const uint64_t*>(p);
+      p += bit_nr * sizeof(uint64_t);
+    }
+  }
+  if (sample_type & PERF_SAMPLE_STACK_USER) {
+    MoveFromBinaryFormat(stack_user_data.size, p);
+    if (stack_user_data.size == 0) {
+      stack_user_data.dyn_size = 0;
+    } else {
+      stack_user_data.data = p;
+      p += stack_user_data.size;
+      MoveFromBinaryFormat(stack_user_data.dyn_size, p);
+    }
+  }
+  // TODO: Add parsing of other PERF_SAMPLE_*.
+  CHECK_LE(p, end);
+  if (p < end) {
+    LOG(DEBUG) << "Record has " << end - p << " bytes left\n";
+  }
+}
+
+void SampleRecord::ReplaceRegAndStackWithCallChain(
+    const std::vector<uint64_t>& ips) {
+  uint32_t size_added_in_callchain = sizeof(uint64_t) * (ips.size() + 1);
+  uint32_t size_reduced_in_reg_stack =
+      regs_user_data.reg_nr * sizeof(uint64_t) + stack_user_data.size +
+      sizeof(uint64_t);
+  CHECK_LE(size_added_in_callchain, size_reduced_in_reg_stack);
+  uint32_t size_reduced = size_reduced_in_reg_stack - size_added_in_callchain;
+  SetSize(size() - size_reduced);
+  char* p = const_cast<char*>(binary_);
+  MoveToBinaryFormat(header, p);
+  p = const_cast<char*>(stack_user_data.data + stack_user_data.size +
+                        sizeof(uint64_t)) -
+      (size_reduced_in_reg_stack - size_added_in_callchain);
+  stack_user_data.size = 0;
+  regs_user_data.abi = 0;
+  p -= sizeof(uint64_t);
+  *reinterpret_cast<uint64_t*>(p) = stack_user_data.size;
+  p -= sizeof(uint64_t);
+  *reinterpret_cast<uint64_t*>(p) = regs_user_data.abi;
+  if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+    p -= branch_stack_data.stack_nr * sizeof(BranchStackItemType);
+    memmove(p, branch_stack_data.stack,
+            branch_stack_data.stack_nr * sizeof(BranchStackItemType));
+    p -= sizeof(uint64_t);
+    *reinterpret_cast<uint64_t*>(p) = branch_stack_data.stack_nr;
+  }
+  if (sample_type & PERF_SAMPLE_RAW) {
+    p -= raw_data.size;
+    memmove(p, raw_data.data, raw_data.size);
+    p -= sizeof(uint32_t);
+    *reinterpret_cast<uint32_t*>(p) = raw_data.size;
+  }
+  p -= ips.size() * sizeof(uint64_t);
+  memcpy(p, ips.data(), ips.size() * sizeof(uint64_t));
+  p -= sizeof(uint64_t);
+  *reinterpret_cast<uint64_t*>(p) = PERF_CONTEXT_USER;
+  p -= sizeof(uint64_t) * (callchain_data.ip_nr);
+  callchain_data.ips = reinterpret_cast<uint64_t*>(p);
+  callchain_data.ip_nr += ips.size() + 1;
+  p -= sizeof(uint64_t);
+  *reinterpret_cast<uint64_t*>(p) = callchain_data.ip_nr;
+}
+
+void SampleRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "sample_type: 0x%" PRIx64 "\n", sample_type);
+  if (sample_type & PERF_SAMPLE_IP) {
+    PrintIndented(indent, "ip %p\n", reinterpret_cast<void*>(ip_data.ip));
+  }
+  if (sample_type & PERF_SAMPLE_TID) {
+    PrintIndented(indent, "pid %u, tid %u\n", tid_data.pid, tid_data.tid);
+  }
+  if (sample_type & PERF_SAMPLE_TIME) {
+    PrintIndented(indent, "time %" PRId64 "\n", time_data.time);
+  }
+  if (sample_type & PERF_SAMPLE_ADDR) {
+    PrintIndented(indent, "addr %p\n", reinterpret_cast<void*>(addr_data.addr));
+  }
+  if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
+    PrintIndented(indent, "id %" PRId64 "\n", id_data.id);
+  }
+  if (sample_type & PERF_SAMPLE_STREAM_ID) {
+    PrintIndented(indent, "stream_id %" PRId64 "\n", stream_id_data.stream_id);
+  }
+  if (sample_type & PERF_SAMPLE_CPU) {
+    PrintIndented(indent, "cpu %u, res %u\n", cpu_data.cpu, cpu_data.res);
+  }
+  if (sample_type & PERF_SAMPLE_PERIOD) {
+    PrintIndented(indent, "period %" PRId64 "\n", period_data.period);
+  }
+  if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+    PrintIndented(indent, "callchain nr=%" PRIu64 "\n", callchain_data.ip_nr);
+    for (uint64_t i = 0; i < callchain_data.ip_nr; ++i) {
+      PrintIndented(indent + 1, "0x%" PRIx64 "\n", callchain_data.ips[i]);
+    }
+  }
+  if (sample_type & PERF_SAMPLE_RAW) {
+    PrintIndented(indent, "raw size=%zu\n", raw_data.size);
+    const uint32_t* data = reinterpret_cast<const uint32_t*>(raw_data.data);
+    size_t size = raw_data.size / sizeof(uint32_t);
+    for (size_t i = 0; i < size; ++i) {
+      PrintIndented(indent + 1, "0x%08x (%zu)\n", data[i], data[i]);
+    }
+  }
+  if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+    PrintIndented(indent, "branch_stack nr=%" PRIu64 "\n",
+                  branch_stack_data.stack_nr);
+    for (uint64_t i = 0; i < branch_stack_data.stack_nr; ++i) {
+      auto& item = branch_stack_data.stack[i];
+      PrintIndented(indent + 1, "from 0x%" PRIx64 ", to 0x%" PRIx64
+                                ", flags 0x%" PRIx64 "\n",
+                    item.from, item.to, item.flags);
+    }
+  }
+  if (sample_type & PERF_SAMPLE_REGS_USER) {
+    PrintIndented(indent, "user regs: abi=%" PRId64 "\n", regs_user_data.abi);
+    for (size_t i = 0, pos = 0; i < 64; ++i) {
+      if ((regs_user_data.reg_mask >> i) & 1) {
+        PrintIndented(
+            indent + 1, "reg (%s) 0x%016" PRIx64 "\n",
+            GetRegName(i, ScopedCurrentArch::GetCurrentArch()).c_str(),
+            regs_user_data.regs[pos++]);
+      }
+    }
+  }
+  if (sample_type & PERF_SAMPLE_STACK_USER) {
+    PrintIndented(indent, "user stack: size %zu dyn_size %" PRIu64 "\n",
+                  stack_user_data.size, stack_user_data.dyn_size);
+    const uint64_t* p = reinterpret_cast<const uint64_t*>(stack_user_data.data);
+    const uint64_t* end = p + (stack_user_data.size / sizeof(uint64_t));
+    while (p < end) {
+      PrintIndented(indent + 1, "");
+      for (size_t i = 0; i < 4 && p < end; ++i, ++p) {
+        printf(" %016" PRIx64, *p);
+      }
+      printf("\n");
+    }
+    printf("\n");
+  }
+}
+
+uint64_t SampleRecord::Timestamp() const { return time_data.time; }
+uint32_t SampleRecord::Cpu() const { return cpu_data.cpu; }
+
+BuildIdRecord::BuildIdRecord(const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  MoveFromBinaryFormat(pid, p);
+  build_id = BuildId(p, BUILD_ID_SIZE);
+  p += Align(build_id.Size(), 8);
+  filename = p;
+  p += Align(strlen(filename) + 1, 64);
+  CHECK_EQ(p, end);
+}
+
+void BuildIdRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "pid %u\n", pid);
+  PrintIndented(indent, "build_id %s\n", build_id.ToString().c_str());
+  PrintIndented(indent, "filename %s\n", filename);
+}
+
+BuildIdRecord::BuildIdRecord(bool in_kernel, pid_t pid, const BuildId& build_id,
+                             const std::string& filename) {
+  SetTypeAndMisc(PERF_RECORD_BUILD_ID,
+                 in_kernel ? PERF_RECORD_MISC_KERNEL : PERF_RECORD_MISC_USER);
+  this->pid = pid;
+  this->build_id = build_id;
+  SetSize(header_size() + sizeof(pid) + Align(build_id.Size(), 8) +
+          Align(filename.size() + 1, 64));
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  MoveToBinaryFormat(pid, p);
+  memcpy(p, build_id.Data(), build_id.Size());
+  p += Align(build_id.Size(), 8);
+  this->filename = p;
+  strcpy(p, filename.c_str());
+  UpdateBinary(new_binary);
+}
+
+KernelSymbolRecord::KernelSymbolRecord(const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  MoveFromBinaryFormat(kallsyms_size, p);
+  kallsyms = p;
+  p += Align(kallsyms_size, 8);
+  CHECK_EQ(p, end);
+}
+
+void KernelSymbolRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "kallsyms: %s\n",
+                std::string(kallsyms, kallsyms + kallsyms_size).c_str());
+}
+
+KernelSymbolRecord::KernelSymbolRecord(const std::string& kallsyms) {
+  SetTypeAndMisc(SIMPLE_PERF_RECORD_KERNEL_SYMBOL, 0);
+  kallsyms_size = kallsyms.size();
+  SetSize(header_size() + 4 + Align(kallsyms.size(), 8));
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  MoveToBinaryFormat(kallsyms_size, p);
+  this->kallsyms = p;
+  memcpy(p, kallsyms.data(), kallsyms_size);
+  UpdateBinary(new_binary);
+}
+
+DsoRecord::DsoRecord(const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  MoveFromBinaryFormat(dso_type, p);
+  MoveFromBinaryFormat(dso_id, p);
+  MoveFromBinaryFormat(min_vaddr, p);
+  dso_name = p;
+  p += Align(strlen(dso_name) + 1, 8);
+  CHECK_EQ(p, end);
+}
+
+DsoRecord::DsoRecord(uint64_t dso_type, uint64_t dso_id,
+                     const std::string& dso_name, uint64_t min_vaddr) {
+  SetTypeAndMisc(SIMPLE_PERF_RECORD_DSO, 0);
+  this->dso_type = dso_type;
+  this->dso_id = dso_id;
+  this->min_vaddr = min_vaddr;
+  SetSize(header_size() + 3 * sizeof(uint64_t) + Align(dso_name.size() + 1, 8));
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  MoveToBinaryFormat(dso_type, p);
+  MoveToBinaryFormat(dso_id, p);
+  MoveToBinaryFormat(min_vaddr, p);
+  this->dso_name = p;
+  strcpy(p, dso_name.c_str());
+  UpdateBinary(new_binary);
+}
+
+void DsoRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "dso_type: %s(%" PRIu64 ")\n",
+                DsoTypeToString(static_cast<DsoType>(dso_type)), dso_type);
+  PrintIndented(indent, "dso_id: %" PRIu64 "\n", dso_id);
+  PrintIndented(indent, "min_vaddr: 0x%" PRIx64 "\n", min_vaddr);
+  PrintIndented(indent, "dso_name: %s\n", dso_name);
+}
+
+SymbolRecord::SymbolRecord(const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  MoveFromBinaryFormat(addr, p);
+  MoveFromBinaryFormat(len, p);
+  MoveFromBinaryFormat(dso_id, p);
+  name = p;
+  p += Align(strlen(name) + 1, 8);
+  CHECK_EQ(p, end);
+}
+
+SymbolRecord::SymbolRecord(uint64_t addr, uint64_t len, const std::string& name,
+                           uint64_t dso_id) {
+  SetTypeAndMisc(SIMPLE_PERF_RECORD_SYMBOL, 0);
+  this->addr = addr;
+  this->len = len;
+  this->dso_id = dso_id;
+  SetSize(header_size() + 3 * sizeof(uint64_t) + Align(name.size() + 1, 8));
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  MoveToBinaryFormat(addr, p);
+  MoveToBinaryFormat(len, p);
+  MoveToBinaryFormat(dso_id, p);
+  this->name = p;
+  strcpy(p, name.c_str());
+  UpdateBinary(new_binary);
+}
+
+void SymbolRecord::DumpData(size_t indent) const {
+  PrintIndented(indent, "name: %s\n", name);
+  PrintIndented(indent, "addr: 0x%" PRIx64 "\n", addr);
+  PrintIndented(indent, "len: 0x%" PRIx64 "\n", len);
+  PrintIndented(indent, "dso_id: %" PRIu64 "\n", dso_id);
+}
+
+TracingDataRecord::TracingDataRecord(const char* p) : Record(p) {
+  const char* end = p + size();
+  p += header_size();
+  MoveFromBinaryFormat(data_size, p);
+  data = p;
+  p += Align(data_size, 64);
+  CHECK_EQ(p, end);
+}
+
+TracingDataRecord::TracingDataRecord(const std::vector<char>& tracing_data) {
+  SetTypeAndMisc(PERF_RECORD_TRACING_DATA, 0);
+  data_size = tracing_data.size();
+  SetSize(header_size() + sizeof(uint32_t) + Align(tracing_data.size(), 64));
+  char* new_binary = new char[size()];
+  char* p = new_binary;
+  MoveToBinaryFormat(header, p);
+  MoveToBinaryFormat(data_size, p);
+  data = p;
+  memcpy(p, tracing_data.data(), data_size);
+  UpdateBinary(new_binary);
+}
+
+void TracingDataRecord::DumpData(size_t indent) const {
+  Tracing tracing(std::vector<char>(data, data + data_size));
+  tracing.Dump(indent);
+}
+
+UnknownRecord::UnknownRecord(const char* p) : Record(p) {
+  p += header_size();
+  data = p;
+}
+
+void UnknownRecord::DumpData(size_t) const {}
+
+std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr,
+                                             uint32_t type, const char* p) {
+  switch (type) {
+    case PERF_RECORD_MMAP:
+      return std::unique_ptr<Record>(new MmapRecord(attr, p));
+    case PERF_RECORD_MMAP2:
+      return std::unique_ptr<Record>(new Mmap2Record(attr, p));
+    case PERF_RECORD_COMM:
+      return std::unique_ptr<Record>(new CommRecord(attr, p));
+    case PERF_RECORD_EXIT:
+      return std::unique_ptr<Record>(new ExitRecord(attr, p));
+    case PERF_RECORD_FORK:
+      return std::unique_ptr<Record>(new ForkRecord(attr, p));
+    case PERF_RECORD_LOST:
+      return std::unique_ptr<Record>(new LostRecord(attr, p));
+    case PERF_RECORD_SAMPLE:
+      return std::unique_ptr<Record>(new SampleRecord(attr, p));
+    case PERF_RECORD_TRACING_DATA:
+      return std::unique_ptr<Record>(new TracingDataRecord(p));
+    case SIMPLE_PERF_RECORD_KERNEL_SYMBOL:
+      return std::unique_ptr<Record>(new KernelSymbolRecord(p));
+    case SIMPLE_PERF_RECORD_DSO:
+      return std::unique_ptr<Record>(new DsoRecord(p));
+    case SIMPLE_PERF_RECORD_SYMBOL:
+      return std::unique_ptr<Record>(new SymbolRecord(p));
+    default:
+      return std::unique_ptr<Record>(new UnknownRecord(p));
+  }
+}
+
+std::unique_ptr<Record> ReadRecordFromOwnedBuffer(const perf_event_attr& attr,
+                                                  uint32_t type,
+                                                  const char* p) {
+  std::unique_ptr<Record> record = ReadRecordFromBuffer(attr, type, p);
+  if (record != nullptr) {
+    record->OwnBinary();
+  } else {
+    delete[] p;
+  }
+  return record;
+}
+
+std::vector<std::unique_ptr<Record>> ReadRecordsFromBuffer(
+    const perf_event_attr& attr, const char* buf, size_t buf_size) {
+  std::vector<std::unique_ptr<Record>> result;
+  const char* p = buf;
+  const char* end = buf + buf_size;
+  while (p < end) {
+    RecordHeader header(p);
+    CHECK_LE(p + header.size, end);
+    CHECK_NE(0u, header.size);
+    result.push_back(ReadRecordFromBuffer(attr, header.type, p));
+    p += header.size;
+  }
+  return result;
+}
+
+bool RecordCache::RecordWithSeq::IsHappensBefore(
+    const RecordWithSeq& other) const {
+  bool is_sample = (record->type() == PERF_RECORD_SAMPLE);
+  bool is_other_sample = (other.record->type() == PERF_RECORD_SAMPLE);
+  uint64_t time = record->Timestamp();
+  uint64_t other_time = other.record->Timestamp();
+  // The record with smaller time happens first.
+  if (time != other_time) {
+    return time < other_time;
+  }
+  // If happening at the same time, make non-sample records before sample
+  // records, because non-sample records may contain useful information to
+  // parse sample records.
+  if (is_sample != is_other_sample) {
+    return is_sample ? false : true;
+  }
+  // Otherwise, use the same order as they enter the cache.
+  return seq < other.seq;
+}
+
+bool RecordCache::RecordComparator::operator()(const RecordWithSeq& r1,
+                                               const RecordWithSeq& r2) {
+  return r2.IsHappensBefore(r1);
+}
+
+RecordCache::RecordCache(bool has_timestamp, size_t min_cache_size,
+                         uint64_t min_time_diff_in_ns)
+    : has_timestamp_(has_timestamp),
+      min_cache_size_(min_cache_size),
+      min_time_diff_in_ns_(min_time_diff_in_ns),
+      last_time_(0),
+      cur_seq_(0),
+      queue_(RecordComparator()) {}
+
+RecordCache::~RecordCache() { PopAll(); }
+
+void RecordCache::Push(std::unique_ptr<Record> record) {
+  if (has_timestamp_) {
+    last_time_ = std::max(last_time_, record->Timestamp());
+  }
+  queue_.push(RecordWithSeq(cur_seq_++, record.release()));
+}
+
+void RecordCache::Push(std::vector<std::unique_ptr<Record>> records) {
+  for (auto& r : records) {
+    Push(std::move(r));
+  }
+}
+
+std::unique_ptr<Record> RecordCache::Pop() {
+  if (queue_.size() < min_cache_size_) {
+    return nullptr;
+  }
+  Record* r = queue_.top().record;
+  if (has_timestamp_) {
+    if (r->Timestamp() + min_time_diff_in_ns_ > last_time_) {
+      return nullptr;
+    }
+  }
+  queue_.pop();
+  return std::unique_ptr<Record>(r);
+}
+
+std::vector<std::unique_ptr<Record>> RecordCache::PopAll() {
+  std::vector<std::unique_ptr<Record>> result;
+  while (!queue_.empty()) {
+    result.emplace_back(queue_.top().record);
+    queue_.pop();
+  }
+  return result;
+}
+
+std::unique_ptr<Record> RecordCache::ForcedPop() {
+  if (queue_.empty()) {
+    return nullptr;
+  }
+  Record* r = queue_.top().record;
+  queue_.pop();
+  return std::unique_ptr<Record>(r);
+}
diff --git a/simpleperf/record.h b/simpleperf/record.h
new file mode 100644
index 0000000..ef26391
--- /dev/null
+++ b/simpleperf/record.h
@@ -0,0 +1,549 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_RECORD_H_
+#define SIMPLE_PERF_RECORD_H_
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <memory>
+#include <queue>
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "build_id.h"
+#include "perf_event.h"
+
+struct KernelMmap;
+struct ModuleMmap;
+struct ThreadComm;
+struct ThreadMmap;
+
+enum user_record_type {
+  PERF_RECORD_USER_DEFINED_TYPE_START = 64,
+  PERF_RECORD_ATTR = 64,
+  PERF_RECORD_EVENT_TYPE,
+  PERF_RECORD_TRACING_DATA,
+  PERF_RECORD_BUILD_ID,
+  PERF_RECORD_FINISHED_ROUND,
+
+  SIMPLE_PERF_RECORD_TYPE_START = 32768,
+  SIMPLE_PERF_RECORD_KERNEL_SYMBOL,
+  SIMPLE_PERF_RECORD_DSO,
+  SIMPLE_PERF_RECORD_SYMBOL,
+  SIMPLE_PERF_RECORD_SPLIT,
+  SIMPLE_PERF_RECORD_SPLIT_END,
+};
+
+// perf_event_header uses u16 to store record size. However, that is not
+// enough for storing records like KERNEL_SYMBOL or TRACING_DATA. So define
+// a simpleperf_record_header struct to store record header for simpleperf
+// defined records (type > SIMPLE_PERF_RECORD_TYPE_START).
+struct simpleperf_record_header {
+  uint32_t type;
+  uint16_t size1;
+  uint16_t size0;
+};
+
+static_assert(
+    sizeof(simpleperf_record_header) == sizeof(perf_event_header),
+    "simpleperf_record_header should have the same size as perf_event_header");
+
+struct PerfSampleIpType {
+  uint64_t ip;
+};
+
+struct PerfSampleTidType {
+  uint32_t pid, tid;
+};
+
+struct PerfSampleTimeType {
+  uint64_t time;
+};
+
+struct PerfSampleAddrType {
+  uint64_t addr;
+};
+
+struct PerfSampleIdType {
+  uint64_t id;
+};
+
+struct PerfSampleStreamIdType {
+  uint64_t stream_id;
+};
+
+struct PerfSampleCpuType {
+  uint32_t cpu, res;
+};
+
+struct PerfSamplePeriodType {
+  uint64_t period;
+};
+
+struct PerfSampleCallChainType {
+  uint64_t ip_nr;
+  const uint64_t* ips;
+};
+
+struct PerfSampleRawType {
+  uint32_t size;
+  const char* data;
+};
+
+struct BranchStackItemType {
+  uint64_t from;
+  uint64_t to;
+  uint64_t flags;
+};
+
+struct PerfSampleBranchStackType {
+  uint64_t stack_nr;
+  const BranchStackItemType* stack;
+};
+
+struct PerfSampleRegsUserType {
+  uint64_t abi;
+  uint64_t reg_mask;
+  uint64_t reg_nr;
+  const uint64_t* regs;
+};
+
+struct PerfSampleStackUserType {
+  uint64_t size;
+  const char* data;
+  uint64_t dyn_size;
+};
+
+struct RecordHeader {
+ public:
+  uint32_t type;
+  uint16_t misc;
+  uint32_t size;
+
+  RecordHeader() : type(0), misc(0), size(0) {}
+
+  explicit RecordHeader(const char* p) {
+    auto pheader = reinterpret_cast<const perf_event_header*>(p);
+    if (pheader->type < SIMPLE_PERF_RECORD_TYPE_START) {
+      type = pheader->type;
+      misc = pheader->misc;
+      size = pheader->size;
+    } else {
+      auto sheader = reinterpret_cast<const simpleperf_record_header*>(p);
+      type = sheader->type;
+      misc = 0;
+      size = (sheader->size1 << 16) | sheader->size0;
+    }
+  }
+
+  void MoveToBinaryFormat(char*& p) const {
+    if (type < SIMPLE_PERF_RECORD_TYPE_START) {
+      auto pheader = reinterpret_cast<perf_event_header*>(p);
+      pheader->type = type;
+      pheader->misc = misc;
+      CHECK_LT(size, 1u << 16);
+      pheader->size = static_cast<uint16_t>(size);
+    } else {
+      auto sheader = reinterpret_cast<simpleperf_record_header*>(p);
+      sheader->type = type;
+      CHECK_EQ(misc, 0u);
+      sheader->size1 = size >> 16;
+      sheader->size0 = size & 0xffff;
+    }
+    p += sizeof(perf_event_header);
+  }
+};
+
+// SampleId is optional at the end of a record in binary format. Its content is
+// determined by sample_id_all and sample_type in perf_event_attr. To avoid the
+// complexity of referring to perf_event_attr each time, we copy sample_id_all
+// and sample_type inside the SampleId structure.
+struct SampleId {
+  bool sample_id_all;
+  uint64_t sample_type;
+
+  PerfSampleTidType tid_data;    // Valid if sample_id_all && PERF_SAMPLE_TID.
+  PerfSampleTimeType time_data;  // Valid if sample_id_all && PERF_SAMPLE_TIME.
+  PerfSampleIdType id_data;      // Valid if sample_id_all && PERF_SAMPLE_ID.
+  PerfSampleStreamIdType
+      stream_id_data;  // Valid if sample_id_all && PERF_SAMPLE_STREAM_ID.
+  PerfSampleCpuType cpu_data;  // Valid if sample_id_all && PERF_SAMPLE_CPU.
+
+  SampleId();
+
+  // Create the content of sample_id. It depends on the attr we use.
+  size_t CreateContent(const perf_event_attr& attr, uint64_t event_id);
+
+  // Parse sample_id from binary format in the buffer pointed by p.
+  void ReadFromBinaryFormat(const perf_event_attr& attr, const char* p,
+                            const char* end);
+
+  // Write the binary format of sample_id to the buffer pointed by p.
+  void WriteToBinaryFormat(char*& p) const;
+  void Dump(size_t indent) const;
+  size_t Size() const;
+};
+
+// Usually one record contains the following three parts in order in binary
+// format:
+//   RecordHeader (at the head of a record, containing type and size info)
+//   data depends on the record type
+//   SampleId (optional part at the end of a record)
+// We hold the common parts (RecordHeader and SampleId) in the base class
+// Record, and hold the type specific data part in classes derived from Record.
+struct Record {
+  RecordHeader header;
+  SampleId sample_id;
+
+  Record() : binary_(nullptr), own_binary_(false) {}
+  explicit Record(const char* p) : header(p), binary_(p), own_binary_(false) {}
+  Record(Record&& other);
+
+  virtual ~Record() {
+    if (own_binary_) {
+      delete[] binary_;
+    }
+  }
+
+  void OwnBinary() { own_binary_ = true; }
+
+  uint32_t type() const { return header.type; }
+
+  uint16_t misc() const { return header.misc; }
+
+  uint32_t size() const { return header.size; }
+
+  static uint32_t header_size() { return sizeof(perf_event_header); }
+
+  bool InKernel() const {
+    return (header.misc & PERF_RECORD_MISC_CPUMODE_MASK) ==
+           PERF_RECORD_MISC_KERNEL;
+  }
+
+  void SetTypeAndMisc(uint32_t type, uint16_t misc) {
+    header.type = type;
+    header.misc = misc;
+  }
+
+  void SetSize(uint32_t size) { header.size = size; }
+
+  void Dump(size_t indent = 0) const;
+
+  const char* Binary() const { return binary_; }
+
+  virtual uint64_t Timestamp() const;
+  virtual uint32_t Cpu() const;
+
+ protected:
+  void UpdateBinary(const char* new_binary);
+  virtual void DumpData(size_t) const = 0;
+
+  const char* binary_;
+  bool own_binary_;
+
+  DISALLOW_COPY_AND_ASSIGN(Record);
+};
+
+struct MmapRecord : public Record {
+  struct MmapRecordDataType {
+    uint32_t pid, tid;
+    uint64_t addr;
+    uint64_t len;
+    uint64_t pgoff;
+  };
+  const MmapRecordDataType* data;
+  const char* filename;
+
+  MmapRecord(const perf_event_attr& attr, const char* p);
+
+  MmapRecord(const perf_event_attr& attr, bool in_kernel, uint32_t pid,
+             uint32_t tid, uint64_t addr, uint64_t len, uint64_t pgoff,
+             const std::string& filename, uint64_t event_id, uint64_t time = 0);
+
+  void SetDataAndFilename(const MmapRecordDataType& data,
+                          const std::string& filename);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct Mmap2Record : public Record {
+  struct Mmap2RecordDataType {
+    uint32_t pid, tid;
+    uint64_t addr;
+    uint64_t len;
+    uint64_t pgoff;
+    uint32_t maj;
+    uint32_t min;
+    uint64_t ino;
+    uint64_t ino_generation;
+    uint32_t prot, flags;
+  };
+  const Mmap2RecordDataType* data;
+  const char* filename;
+
+  Mmap2Record(const perf_event_attr& attr, const char* p);
+
+  void SetDataAndFilename(const Mmap2RecordDataType& data,
+                          const std::string& filename);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct CommRecord : public Record {
+  struct CommRecordDataType {
+    uint32_t pid, tid;
+  };
+  const CommRecordDataType* data;
+  const char* comm;
+
+  CommRecord(const perf_event_attr& attr, const char* p);
+
+  CommRecord(const perf_event_attr& attr, uint32_t pid, uint32_t tid,
+             const std::string& comm, uint64_t event_id);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct ExitOrForkRecord : public Record {
+  struct ExitOrForkRecordDataType {
+    uint32_t pid, ppid;
+    uint32_t tid, ptid;
+    uint64_t time;
+  };
+  const ExitOrForkRecordDataType* data;
+
+  ExitOrForkRecord(const perf_event_attr& attr, const char* p);
+
+  ExitOrForkRecord() : data(nullptr) {}
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct ExitRecord : public ExitOrForkRecord {
+  ExitRecord(const perf_event_attr& attr, const char* p)
+      : ExitOrForkRecord(attr, p) {}
+};
+
+struct ForkRecord : public ExitOrForkRecord {
+  ForkRecord(const perf_event_attr& attr, const char* p)
+      : ExitOrForkRecord(attr, p) {}
+
+  ForkRecord(const perf_event_attr& attr, uint32_t pid, uint32_t tid,
+             uint32_t ppid, uint32_t ptid, uint64_t event_id);
+};
+
+struct LostRecord : public Record {
+  uint64_t id;
+  uint64_t lost;
+
+  LostRecord(const perf_event_attr& attr, const char* p);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct SampleRecord : public Record {
+  uint64_t sample_type;  // sample_type is a bit mask determining which fields
+                         // below are valid.
+
+  PerfSampleIpType ip_data;               // Valid if PERF_SAMPLE_IP.
+  PerfSampleTidType tid_data;             // Valid if PERF_SAMPLE_TID.
+  PerfSampleTimeType time_data;           // Valid if PERF_SAMPLE_TIME.
+  PerfSampleAddrType addr_data;           // Valid if PERF_SAMPLE_ADDR.
+  PerfSampleIdType id_data;               // Valid if PERF_SAMPLE_ID.
+  PerfSampleStreamIdType stream_id_data;  // Valid if PERF_SAMPLE_STREAM_ID.
+  PerfSampleCpuType cpu_data;             // Valid if PERF_SAMPLE_CPU.
+  PerfSamplePeriodType period_data;       // Valid if PERF_SAMPLE_PERIOD.
+
+  PerfSampleCallChainType callchain_data;  // Valid if PERF_SAMPLE_CALLCHAIN.
+  PerfSampleRawType raw_data;              // Valid if PERF_SAMPLE_RAW.
+  PerfSampleBranchStackType
+      branch_stack_data;                  // Valid if PERF_SAMPLE_BRANCH_STACK.
+  PerfSampleRegsUserType regs_user_data;  // Valid if PERF_SAMPLE_REGS_USER.
+  PerfSampleStackUserType stack_user_data;  // Valid if PERF_SAMPLE_STACK_USER.
+
+  SampleRecord(const perf_event_attr& attr, const char* p);
+  void ReplaceRegAndStackWithCallChain(const std::vector<uint64_t>& ips);
+  uint64_t Timestamp() const override;
+  uint32_t Cpu() const override;
+
+  uint64_t GetValidStackSize() const {
+    // If stack_user_data.dyn_size == 0, it may be because the kernel misses
+    // the patch to update dyn_size, like in N9 (See b/22612370). So assume
+    // all stack data is valid if dyn_size == 0.
+    if (stack_user_data.dyn_size == 0) {
+      return stack_user_data.size;
+    }
+    return stack_user_data.dyn_size;
+  }
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+// BuildIdRecord is defined in user-space, stored in BuildId feature section in
+// record file.
+struct BuildIdRecord : public Record {
+  uint32_t pid;
+  BuildId build_id;
+  const char* filename;
+
+  explicit BuildIdRecord(const char* p);
+
+  BuildIdRecord(bool in_kernel, pid_t pid, const BuildId& build_id,
+                const std::string& filename);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct KernelSymbolRecord : public Record {
+  uint32_t kallsyms_size;
+  const char* kallsyms;
+
+  explicit KernelSymbolRecord(const char* p);
+
+  explicit KernelSymbolRecord(const std::string& kallsyms);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct DsoRecord : public Record {
+  uint64_t dso_type;
+  uint64_t dso_id;
+  uint64_t min_vaddr;
+  const char* dso_name;
+
+  explicit DsoRecord(const char* p);
+
+  DsoRecord(uint64_t dso_type, uint64_t dso_id, const std::string& dso_name,
+            uint64_t min_vaddr);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct SymbolRecord : public Record {
+  uint64_t addr;
+  uint64_t len;
+  uint64_t dso_id;
+  const char* name;
+
+  explicit SymbolRecord(const char* p);
+
+  SymbolRecord(uint64_t addr, uint64_t len, const std::string& name,
+               uint64_t dso_id);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+struct TracingDataRecord : public Record {
+  uint32_t data_size;
+  const char* data;
+
+  explicit TracingDataRecord(const char* p);
+
+  explicit TracingDataRecord(const std::vector<char>& tracing_data);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+// UnknownRecord is used for unknown record types, it makes sure all unknown
+// records are not changed when modifying perf.data.
+struct UnknownRecord : public Record {
+  const char* data;
+
+  explicit UnknownRecord(const char* p);
+
+ protected:
+  void DumpData(size_t indent) const override;
+};
+
+// Read record from the buffer pointed by [p]. But the record doesn't own
+// the buffer.
+std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr,
+                                             uint32_t type, const char* p);
+
+// Read record from the buffer pointed by [p]. And the record owns the buffer.
+std::unique_ptr<Record> ReadRecordFromOwnedBuffer(const perf_event_attr& attr,
+                                                  uint32_t type, const char* p);
+
+// Read records from the buffer pointed by [buf]. None of the records own
+// the buffer.
+std::vector<std::unique_ptr<Record>> ReadRecordsFromBuffer(
+    const perf_event_attr& attr, const char* buf, size_t buf_size);
+
+// RecordCache is a cache used when receiving records from the kernel.
+// It sorts received records based on type and timestamp, and pops records
+// in sorted order. Records from the kernel need to be sorted because
+// records may come from different cpus at the same time, and it is affected
+// by the order in which we collect records from different cpus.
+// RecordCache pushes records and pops sorted record online. It uses two checks
+// to help ensure that records are popped in order. Each time we pop a record A,
+// it is the earliest record among all records in the cache. In addition, we
+// have checks for min_cache_size and min_time_diff. For min_cache_size check,
+// we check if the cache size >= min_cache_size, which is based on the
+// assumption that if we have received (min_cache_size - 1) records after
+// record A, we are not likely to receive a record earlier than A. For
+// min_time_diff check, we check if record A is generated min_time_diff ns
+// earlier than the latest record, which is based on the assumption that if we
+// have received a record for time t, we are not likely to receive a record for
+// time (t - min_time_diff) or earlier.
+class RecordCache {
+ public:
+  explicit RecordCache(bool has_timestamp, size_t min_cache_size = 1000u,
+                       uint64_t min_time_diff_in_ns = 1000000u);
+  ~RecordCache();
+  void Push(std::unique_ptr<Record> record);
+  void Push(std::vector<std::unique_ptr<Record>> records);
+  std::unique_ptr<Record> Pop();
+  std::vector<std::unique_ptr<Record>> PopAll();
+  std::unique_ptr<Record> ForcedPop();
+
+ private:
+  struct RecordWithSeq {
+    uint32_t seq;
+    Record* record;
+
+    RecordWithSeq(uint32_t seq, Record* record) : seq(seq), record(record) {}
+    bool IsHappensBefore(const RecordWithSeq& other) const;
+  };
+
+  struct RecordComparator {
+    bool operator()(const RecordWithSeq& r1, const RecordWithSeq& r2);
+  };
+
+  bool has_timestamp_;
+  size_t min_cache_size_;
+  uint64_t min_time_diff_in_ns_;
+  uint64_t last_time_;
+  uint32_t cur_seq_;
+  std::priority_queue<RecordWithSeq, std::vector<RecordWithSeq>,
+                      RecordComparator> queue_;
+};
+
+#endif  // SIMPLE_PERF_RECORD_H_
diff --git a/simpleperf/record_equal_test.h b/simpleperf/record_equal_test.h
new file mode 100644
index 0000000..bf0568e
--- /dev/null
+++ b/simpleperf/record_equal_test.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+static void CheckMmapRecordDataEqual(const MmapRecord& r1, const MmapRecord& r2) {
+  ASSERT_EQ(0, memcmp(r1.data, r2.data, sizeof(*r1.data)));
+  ASSERT_STREQ(r1.filename, r2.filename);
+}
+
+static void CheckCommRecordDataEqual(const CommRecord& r1, const CommRecord& r2) {
+  ASSERT_EQ(0, memcmp(r1.data, r2.data, sizeof(*r1.data)));
+  ASSERT_STREQ(r1.comm, r2.comm);
+}
+
+static void CheckBuildIdRecordDataEqual(const BuildIdRecord& r1, const BuildIdRecord& r2) {
+  ASSERT_EQ(r1.pid, r2.pid);
+  ASSERT_EQ(r1.build_id, r2.build_id);
+  ASSERT_STREQ(r1.filename, r2.filename);
+}
+
+static void CheckRecordEqual(const Record& r1, const Record& r2) {
+  ASSERT_EQ(r1.type(), r2.type());
+  ASSERT_EQ(r1.misc(), r2.misc());
+  ASSERT_EQ(r1.size(), r2.size());
+  ASSERT_EQ(0, memcmp(&r1.sample_id, &r2.sample_id, sizeof(r1.sample_id)));
+  if (r1.type() == PERF_RECORD_MMAP) {
+    CheckMmapRecordDataEqual(static_cast<const MmapRecord&>(r1), static_cast<const MmapRecord&>(r2));
+  } else if (r1.type() == PERF_RECORD_COMM) {
+    CheckCommRecordDataEqual(static_cast<const CommRecord&>(r1), static_cast<const CommRecord&>(r2));
+  } else if (r1.type() == PERF_RECORD_BUILD_ID) {
+    CheckBuildIdRecordDataEqual(static_cast<const BuildIdRecord&>(r1),
+                                static_cast<const BuildIdRecord&>(r2));
+  }
+}
diff --git a/simpleperf/record_file.h b/simpleperf/record_file.h
new file mode 100644
index 0000000..89e66e5
--- /dev/null
+++ b/simpleperf/record_file.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_RECORD_FILE_H_
+#define SIMPLE_PERF_RECORD_FILE_H_
+
+#include <stdio.h>
+
+#include <functional>
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <android-base/macros.h>
+
+#include "perf_event.h"
+#include "record.h"
+#include "record_file_format.h"
+
+struct AttrWithId {
+  const perf_event_attr* attr;
+  std::vector<uint64_t> ids;
+};
+
+// RecordFileWriter writes to a perf record file, like perf.data.
+class RecordFileWriter {
+ public:
+  static std::unique_ptr<RecordFileWriter> CreateInstance(const std::string& filename);
+
+  ~RecordFileWriter();
+
+  bool WriteAttrSection(const std::vector<AttrWithId>& attr_ids);
+  bool WriteRecord(const Record& record);
+  bool SortDataSection();
+
+  bool WriteFeatureHeader(size_t feature_count);
+  bool WriteBuildIdFeature(const std::vector<BuildIdRecord>& build_id_records);
+  bool WriteFeatureString(int feature, const std::string& s);
+  bool WriteCmdlineFeature(const std::vector<std::string>& cmdline);
+  bool WriteBranchStackFeature();
+
+  // Normally, Close() should be called after writing. But if something
+  // wrong happens and we need to finish in advance, the destructor
+  // will take care of calling Close().
+  bool Close();
+
+ private:
+  RecordFileWriter(const std::string& filename, FILE* fp);
+  void GetHitModulesInBuffer(const char* p, const char* end,
+                             std::vector<std::string>* hit_kernel_modules,
+                             std::vector<std::string>* hit_user_files);
+  bool WriteFileHeader();
+  bool WriteData(const void* buf, size_t len);
+  bool Write(const void* buf, size_t len);
+  std::unique_ptr<Record> ReadRecordFromFile(FILE* fp, std::vector<char>& buf);
+  bool WriteRecordToFile(FILE* fp, std::unique_ptr<Record> r);
+  bool SeekFileEnd(uint64_t* file_end);
+  bool WriteFeatureBegin(uint64_t* start_offset);
+  bool WriteFeatureEnd(int feature, uint64_t start_offset);
+
+  const std::string filename_;
+  FILE* record_fp_;
+
+  perf_event_attr event_attr_;
+  uint64_t attr_section_offset_;
+  uint64_t attr_section_size_;
+  uint64_t data_section_offset_;
+  uint64_t data_section_size_;
+
+  std::vector<int> features_;
+  int feature_count_;
+  int current_feature_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(RecordFileWriter);
+};
+
+// RecordFileReader read contents from a perf record file, like perf.data.
+class RecordFileReader {
+ public:
+  static std::unique_ptr<RecordFileReader> CreateInstance(const std::string& filename);
+
+  ~RecordFileReader();
+
+  const PerfFileFormat::FileHeader& FileHeader() const {
+    return header_;
+  }
+
+  std::vector<AttrWithId> AttrSection() const {
+    std::vector<AttrWithId> result(file_attrs_.size());
+    for (size_t i = 0; i < file_attrs_.size(); ++i) {
+      result[i].attr = &file_attrs_[i].attr;
+      result[i].ids = event_ids_for_file_attrs_[i];
+    }
+    return result;
+  }
+
+  const std::map<int, PerfFileFormat::SectionDesc>& FeatureSectionDescriptors() const {
+    return feature_section_descriptors_;
+  }
+  bool HasFeature(int feature) const {
+    return feature_section_descriptors_.find(feature) != feature_section_descriptors_.end();
+  }
+  bool ReadFeatureSection(int feature, std::vector<char>* data);
+  // If sorted is true, sort records before passing them to callback function.
+  bool ReadDataSection(const std::function<bool(std::unique_ptr<Record>)>& callback,
+                       bool sorted = true);
+  std::vector<std::string> ReadCmdlineFeature();
+  std::vector<BuildIdRecord> ReadBuildIdFeature();
+  std::string ReadFeatureString(int feature);
+  bool Close();
+
+  // For testing only.
+  std::vector<std::unique_ptr<Record>> DataSection();
+
+ private:
+  RecordFileReader(const std::string& filename, FILE* fp);
+  bool ReadHeader();
+  bool ReadAttrSection();
+  bool ReadIdsForAttr(const PerfFileFormat::FileAttr& attr, std::vector<uint64_t>* ids);
+  bool ReadFeatureSectionDescriptors();
+  std::unique_ptr<Record> ReadRecord(size_t* nbytes_read);
+  bool Read(void* buf, size_t len);
+
+  const std::string filename_;
+  FILE* record_fp_;
+
+  PerfFileFormat::FileHeader header_;
+  std::vector<PerfFileFormat::FileAttr> file_attrs_;
+  std::vector<std::vector<uint64_t>> event_ids_for_file_attrs_;
+  std::unordered_map<uint64_t, perf_event_attr*> event_id_to_attr_map_;
+  std::map<int, PerfFileFormat::SectionDesc> feature_section_descriptors_;
+
+  size_t event_id_pos_in_sample_records_;
+  size_t event_id_reverse_pos_in_non_sample_records_;
+
+  DISALLOW_COPY_AND_ASSIGN(RecordFileReader);
+};
+
+#endif  // SIMPLE_PERF_RECORD_FILE_H_
diff --git a/simpleperf/record_file_format.h b/simpleperf/record_file_format.h
new file mode 100644
index 0000000..da6434b
--- /dev/null
+++ b/simpleperf/record_file_format.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_RECORD_FILE_FORMAT_H_
+#define SIMPLE_PERF_RECORD_FILE_FORMAT_H_
+
+#include "perf_event.h"
+
+// The file structure of perf.data:
+//    file_header
+//    id_section
+//    attr section
+//    data section
+//    feature section
+//
+//  The feature section has the following structure:
+//    a section descriptor array, each element contains the section information of one add_feature.
+//    data section of feature 1
+//    data section of feature 2
+//    ....
+
+namespace PerfFileFormat {
+
+enum {
+  FEAT_RESERVED = 0,
+  FEAT_FIRST_FEATURE = 1,
+  FEAT_TRACING_DATA = 1,
+  FEAT_BUILD_ID,
+  FEAT_HOSTNAME,
+  FEAT_OSRELEASE,
+  FEAT_VERSION,
+  FEAT_ARCH,
+  FEAT_NRCPUS,
+  FEAT_CPUDESC,
+  FEAT_CPUID,
+  FEAT_TOTAL_MEM,
+  FEAT_CMDLINE,
+  FEAT_EVENT_DESC,
+  FEAT_CPU_TOPOLOGY,
+  FEAT_NUMA_TOPOLOGY,
+  FEAT_BRANCH_STACK,
+  FEAT_PMU_MAPPINGS,
+  FEAT_GROUP_DESC,
+  FEAT_LAST_FEATURE,
+  FEAT_MAX_NUM = 256,
+};
+
+struct SectionDesc {
+  uint64_t offset;
+  uint64_t size;
+};
+
+constexpr char PERF_MAGIC[] = "PERFILE2";
+
+struct FileHeader {
+  char magic[8];
+  uint64_t header_size;
+  uint64_t attr_size;
+  SectionDesc attrs;
+  SectionDesc data;
+  SectionDesc event_types;
+  unsigned char features[FEAT_MAX_NUM / 8];
+};
+
+struct FileAttr {
+  perf_event_attr attr;
+  SectionDesc ids;
+};
+
+}  // namespace PerfFileFormat
+
+#endif  // SIMPLE_PERF_RECORD_FILE_FORMAT_H_
diff --git a/simpleperf/record_file_reader.cpp b/simpleperf/record_file_reader.cpp
new file mode 100644
index 0000000..13ae6ae
--- /dev/null
+++ b/simpleperf/record_file_reader.cpp
@@ -0,0 +1,361 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "record_file.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <set>
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "event_attr.h"
+#include "record.h"
+#include "utils.h"
+
+using namespace PerfFileFormat;
+
+std::unique_ptr<RecordFileReader> RecordFileReader::CreateInstance(const std::string& filename) {
+  std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
+  FILE* fp = fopen(filename.c_str(), mode.c_str());
+  if (fp == nullptr) {
+    PLOG(ERROR) << "failed to open record file '" << filename << "'";
+    return nullptr;
+  }
+  auto reader = std::unique_ptr<RecordFileReader>(new RecordFileReader(filename, fp));
+  if (!reader->ReadHeader() || !reader->ReadAttrSection() ||
+      !reader->ReadFeatureSectionDescriptors()) {
+    return nullptr;
+  }
+  return reader;
+}
+
+RecordFileReader::RecordFileReader(const std::string& filename, FILE* fp)
+    : filename_(filename), record_fp_(fp), event_id_pos_in_sample_records_(0),
+      event_id_reverse_pos_in_non_sample_records_(0) {
+}
+
+RecordFileReader::~RecordFileReader() {
+  if (record_fp_ != nullptr) {
+    Close();
+  }
+}
+
+bool RecordFileReader::Close() {
+  bool result = true;
+  if (fclose(record_fp_) != 0) {
+    PLOG(ERROR) << "failed to close record file '" << filename_ << "'";
+    result = false;
+  }
+  record_fp_ = nullptr;
+  return result;
+}
+
+bool RecordFileReader::ReadHeader() {
+  return Read(&header_, sizeof(header_));
+}
+
+bool RecordFileReader::ReadAttrSection() {
+  size_t attr_count = header_.attrs.size / header_.attr_size;
+  if (header_.attr_size != sizeof(FileAttr)) {
+    LOG(DEBUG) << "attr size (" << header_.attr_size << ") in " << filename_
+                 << " doesn't match expected size (" << sizeof(FileAttr) << ")";
+  }
+  if (attr_count == 0) {
+    LOG(ERROR) << "no attr in file " << filename_;
+    return false;
+  }
+  if (fseek(record_fp_, header_.attrs.offset, SEEK_SET) != 0) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  for (size_t i = 0; i < attr_count; ++i) {
+    std::vector<char> buf(header_.attr_size);
+    if (!Read(buf.data(), buf.size())) {
+      return false;
+    }
+    // The size of perf_event_attr is changing between different linux kernel versions.
+    // Make sure we copy correct data to memory.
+    FileAttr attr;
+    memset(&attr, 0, sizeof(attr));
+    size_t section_desc_size = sizeof(attr.ids);
+    size_t perf_event_attr_size = header_.attr_size - section_desc_size;
+    memcpy(&attr.attr, &buf[0], std::min(sizeof(attr.attr), perf_event_attr_size));
+    memcpy(&attr.ids, &buf[perf_event_attr_size], section_desc_size);
+    file_attrs_.push_back(attr);
+  }
+  if (file_attrs_.size() > 1) {
+    std::vector<perf_event_attr> attrs;
+    for (const auto& file_attr : file_attrs_) {
+      attrs.push_back(file_attr.attr);
+    }
+    if (!GetCommonEventIdPositionsForAttrs(attrs, &event_id_pos_in_sample_records_,
+                                               &event_id_reverse_pos_in_non_sample_records_)) {
+      return false;
+    }
+  }
+  for (size_t i = 0; i < file_attrs_.size(); ++i) {
+    std::vector<uint64_t> ids;
+    if (!ReadIdsForAttr(file_attrs_[i], &ids)) {
+      return false;
+    }
+    event_ids_for_file_attrs_.push_back(ids);
+    for (auto id : ids) {
+      event_id_to_attr_map_[id] = &file_attrs_[i].attr;
+    }
+  }
+  return true;
+}
+
+bool RecordFileReader::ReadFeatureSectionDescriptors() {
+  std::vector<int> features;
+  for (size_t i = 0; i < sizeof(header_.features); ++i) {
+    for (size_t j = 0; j < 8; ++j) {
+      if (header_.features[i] & (1 << j)) {
+        features.push_back(i * 8 + j);
+      }
+    }
+  }
+  uint64_t feature_section_offset = header_.data.offset + header_.data.size;
+  if (fseek(record_fp_, feature_section_offset, SEEK_SET) != 0) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  for (const auto& id : features) {
+    SectionDesc desc;
+    if (!Read(&desc, sizeof(desc))) {
+      return false;
+    }
+    feature_section_descriptors_.emplace(id, desc);
+  }
+  return true;
+}
+
+bool RecordFileReader::ReadIdsForAttr(const FileAttr& attr, std::vector<uint64_t>* ids) {
+  size_t id_count = attr.ids.size / sizeof(uint64_t);
+  if (fseek(record_fp_, attr.ids.offset, SEEK_SET) != 0) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  ids->resize(id_count);
+  if (!Read(ids->data(), attr.ids.size)) {
+    return false;
+  }
+  return true;
+}
+
+bool RecordFileReader::ReadDataSection(
+    const std::function<bool(std::unique_ptr<Record>)>& callback, bool sorted) {
+  if (fseek(record_fp_, header_.data.offset, SEEK_SET) != 0) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  bool has_timestamp = true;
+  for (const auto& attr : file_attrs_) {
+    if (!IsTimestampSupported(attr.attr)) {
+      has_timestamp = false;
+      break;
+    }
+  }
+  RecordCache cache(has_timestamp);
+  for (size_t nbytes_read = 0; nbytes_read < header_.data.size;) {
+    std::unique_ptr<Record> record = ReadRecord(&nbytes_read);
+    if (record == nullptr) {
+      return false;
+    }
+    if (sorted) {
+      cache.Push(std::move(record));
+      record = cache.Pop();
+      if (record != nullptr) {
+        if (!callback(std::move(record))) {
+          return false;
+        }
+      }
+    } else {
+      if (!callback(std::move(record))) {
+        return false;
+      }
+    }
+  }
+  std::vector<std::unique_ptr<Record>> records = cache.PopAll();
+  for (auto& record : records) {
+    if (!callback(std::move(record))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::unique_ptr<Record> RecordFileReader::ReadRecord(size_t* nbytes_read) {
+  char header_buf[Record::header_size()];
+  if (!Read(header_buf, Record::header_size())) {
+    return nullptr;
+  }
+  RecordHeader header(header_buf);
+  std::unique_ptr<char[]> p;
+  if (header.type == SIMPLE_PERF_RECORD_SPLIT) {
+    // Read until meeting a RECORD_SPLIT_END record.
+    std::vector<char> buf;
+    size_t cur_size = 0;
+    char header_buf[Record::header_size()];
+    while (header.type == SIMPLE_PERF_RECORD_SPLIT) {
+      size_t bytes_to_read = header.size - Record::header_size();
+      buf.resize(cur_size + bytes_to_read);
+      if (!Read(&buf[cur_size], bytes_to_read)) {
+        return nullptr;
+      }
+      cur_size += bytes_to_read;
+      *nbytes_read += header.size;
+      if (!Read(header_buf, Record::header_size())) {
+        return nullptr;
+      }
+      header = RecordHeader(header_buf);
+    }
+    if (header.type != SIMPLE_PERF_RECORD_SPLIT_END) {
+      LOG(ERROR) << "SPLIT records are not followed by a SPLIT_END record.";
+      return nullptr;
+    }
+    *nbytes_read += header.size;
+    header = RecordHeader(buf.data());
+    p.reset(new char[header.size]);
+    memcpy(p.get(), buf.data(), buf.size());
+  } else {
+    p.reset(new char[header.size]);
+    memcpy(p.get(), header_buf, Record::header_size());
+    if (header.size > Record::header_size()) {
+      if (!Read(p.get() + Record::header_size(), header.size - Record::header_size())) {
+        return nullptr;
+      }
+    }
+    *nbytes_read += header.size;
+  }
+
+  const perf_event_attr* attr = &file_attrs_[0].attr;
+  if (file_attrs_.size() > 1 && header.type < PERF_RECORD_USER_DEFINED_TYPE_START) {
+    bool has_event_id = false;
+    uint64_t event_id;
+    if (header.type == PERF_RECORD_SAMPLE) {
+      if (header.size > event_id_pos_in_sample_records_ + sizeof(uint64_t)) {
+        has_event_id = true;
+        event_id = *reinterpret_cast<uint64_t*>(p.get() + event_id_pos_in_sample_records_);
+      }
+    } else {
+      if (header.size > event_id_reverse_pos_in_non_sample_records_) {
+        has_event_id = true;
+        event_id = *reinterpret_cast<uint64_t*>(p.get() + header.size - event_id_reverse_pos_in_non_sample_records_);
+      }
+    }
+    if (has_event_id) {
+      auto it = event_id_to_attr_map_.find(event_id);
+      if (it != event_id_to_attr_map_.end()) {
+        attr = it->second;
+      }
+    }
+  }
+  return ReadRecordFromOwnedBuffer(*attr, header.type, p.release());
+}
+
+bool RecordFileReader::Read(void* buf, size_t len) {
+  if (fread(buf, len, 1, record_fp_) != 1) {
+    PLOG(FATAL) << "failed to read file " << filename_;
+    return false;
+  }
+  return true;
+}
+
+bool RecordFileReader::ReadFeatureSection(int feature, std::vector<char>* data) {
+  const std::map<int, SectionDesc>& section_map = FeatureSectionDescriptors();
+  auto it = section_map.find(feature);
+  if (it == section_map.end()) {
+    return false;
+  }
+  SectionDesc section = it->second;
+  data->resize(section.size);
+  if (section.size == 0) {
+    return true;
+  }
+  if (fseek(record_fp_, section.offset, SEEK_SET) != 0) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  if (!Read(data->data(), data->size())) {
+    return false;
+  }
+  return true;
+}
+
+std::vector<std::string> RecordFileReader::ReadCmdlineFeature() {
+  std::vector<char> buf;
+  if (!ReadFeatureSection(FEAT_CMDLINE, &buf)) {
+    return std::vector<std::string>();
+  }
+  const char* p = buf.data();
+  const char* end = buf.data() + buf.size();
+  std::vector<std::string> cmdline;
+  uint32_t arg_count;
+  MoveFromBinaryFormat(arg_count, p);
+  CHECK_LE(p, end);
+  for (size_t i = 0; i < arg_count; ++i) {
+    uint32_t len;
+    MoveFromBinaryFormat(len, p);
+    CHECK_LE(p + len, end);
+    cmdline.push_back(p);
+    p += len;
+  }
+  return cmdline;
+}
+
+std::vector<BuildIdRecord> RecordFileReader::ReadBuildIdFeature() {
+  std::vector<char> buf;
+  if (!ReadFeatureSection(FEAT_BUILD_ID, &buf)) {
+    return std::vector<BuildIdRecord>();
+  }
+  const char* p = buf.data();
+  const char* end = buf.data() + buf.size();
+  std::vector<BuildIdRecord> result;
+  while (p < end) {
+    BuildIdRecord record(p);
+    // Set type explicitly as the perf.data produced by perf doesn't set it.
+    record.SetTypeAndMisc(PERF_RECORD_BUILD_ID, record.misc());
+    CHECK_LE(p + record.size(), end);
+    p += record.size();
+    result.push_back(std::move(record));
+  }
+  return result;
+}
+
+std::string RecordFileReader::ReadFeatureString(int feature) {
+  std::vector<char> buf;
+  if (!ReadFeatureSection(feature, &buf)) {
+    return std::string();
+  }
+  const char* p = buf.data();
+  const char* end = buf.data() + buf.size();
+  uint32_t len;
+  MoveFromBinaryFormat(len, p);
+  CHECK_LE(p + len, end);
+  return p;
+}
+
+std::vector<std::unique_ptr<Record>> RecordFileReader::DataSection() {
+  std::vector<std::unique_ptr<Record>> records;
+  ReadDataSection([&](std::unique_ptr<Record> record) {
+    records.push_back(std::move(record));
+    return true;
+  });
+  return records;
+}
diff --git a/simpleperf/record_file_test.cpp b/simpleperf/record_file_test.cpp
new file mode 100644
index 0000000..f67894d
--- /dev/null
+++ b/simpleperf/record_file_test.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <string.h>
+
+#include <memory>
+
+#include <android-base/test_utils.h>
+
+#include "environment.h"
+#include "event_attr.h"
+#include "event_type.h"
+#include "record.h"
+#include "record_file.h"
+
+#include "record_equal_test.h"
+
+using namespace PerfFileFormat;
+
+class RecordFileTest : public ::testing::Test {
+ protected:
+  void AddEventType(const std::string& event_type_str) {
+    std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType(event_type_str);
+    ASSERT_TRUE(event_type_modifier != nullptr);
+    perf_event_attr attr = CreateDefaultPerfEventAttr(event_type_modifier->event_type);
+    attr.sample_id_all = 1;
+    attrs_.push_back(std::unique_ptr<perf_event_attr>(new perf_event_attr(attr)));
+    AttrWithId attr_id;
+    attr_id.attr = attrs_.back().get();
+    attr_id.ids.push_back(attrs_.size());  // Fake id.
+    attr_ids_.push_back(attr_id);
+  }
+
+  TemporaryFile tmpfile_;
+  std::vector<std::unique_ptr<perf_event_attr>> attrs_;
+  std::vector<AttrWithId> attr_ids_;
+};
+
+TEST_F(RecordFileTest, smoke) {
+  // Write to a record file.
+  std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(tmpfile_.path);
+  ASSERT_TRUE(writer != nullptr);
+
+  // Write attr section.
+  AddEventType("cpu-cycles");
+  ASSERT_TRUE(writer->WriteAttrSection(attr_ids_));
+
+  // Write data section.
+  MmapRecord mmap_record(*(attr_ids_[0].attr), true, 1, 1, 0x1000, 0x2000,
+                         0x3000, "mmap_record_example", attr_ids_[0].ids[0]);
+  ASSERT_TRUE(writer->WriteRecord(mmap_record));
+
+  // Write feature section.
+  ASSERT_TRUE(writer->WriteFeatureHeader(1));
+  char p[BuildId::Size()];
+  for (size_t i = 0; i < BuildId::Size(); ++i) {
+    p[i] = i;
+  }
+  BuildId build_id(p);
+  std::vector<BuildIdRecord> build_id_records;
+  build_id_records.push_back(BuildIdRecord(false, getpid(), build_id, "init"));
+  ASSERT_TRUE(writer->WriteBuildIdFeature(build_id_records));
+  ASSERT_TRUE(writer->Close());
+
+  // Read from a record file.
+  std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile_.path);
+  ASSERT_TRUE(reader != nullptr);
+  std::vector<AttrWithId> attrs = reader->AttrSection();
+  ASSERT_EQ(1u, attrs.size());
+  ASSERT_EQ(0, memcmp(attrs[0].attr, attr_ids_[0].attr, sizeof(perf_event_attr)));
+  ASSERT_EQ(attrs[0].ids, attr_ids_[0].ids);
+
+  // Read and check data section.
+  std::vector<std::unique_ptr<Record>> records = reader->DataSection();
+  ASSERT_EQ(1u, records.size());
+  CheckRecordEqual(mmap_record, *records[0]);
+
+  // Read and check feature section.
+  std::vector<BuildIdRecord> read_build_id_records = reader->ReadBuildIdFeature();
+  ASSERT_EQ(1u, read_build_id_records.size());
+  CheckRecordEqual(read_build_id_records[0], build_id_records[0]);
+
+  ASSERT_TRUE(reader->Close());
+}
+
+TEST_F(RecordFileTest, records_sorted_by_time) {
+  // Write to a record file.
+  std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(tmpfile_.path);
+  ASSERT_TRUE(writer != nullptr);
+
+  // Write attr section.
+  AddEventType("cpu-cycles");
+  attrs_[0]->sample_id_all = 1;
+  attrs_[0]->sample_type |= PERF_SAMPLE_TIME;
+  ASSERT_TRUE(writer->WriteAttrSection(attr_ids_));
+
+  // Write data section.
+  MmapRecord r1(*(attr_ids_[0].attr), true, 1, 1, 0x100, 0x2000, 0x3000, "mmap_record1",
+                attr_ids_[0].ids[0], 2);
+  MmapRecord r2(*(attr_ids_[0].attr), true, 1, 1, 0x100, 0x2000, 0x3000, "mmap_record1",
+                attr_ids_[0].ids[0], 1);
+  MmapRecord r3(*(attr_ids_[0].attr), true, 1, 1, 0x100, 0x2000, 0x3000, "mmap_record1",
+                attr_ids_[0].ids[0], 3);
+  ASSERT_TRUE(writer->WriteRecord(r1));
+  ASSERT_TRUE(writer->WriteRecord(r2));
+  ASSERT_TRUE(writer->WriteRecord(r3));
+  ASSERT_TRUE(writer->Close());
+
+  // Read from a record file.
+  std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile_.path);
+  ASSERT_TRUE(reader != nullptr);
+  std::vector<std::unique_ptr<Record>> records = reader->DataSection();
+  ASSERT_EQ(3u, records.size());
+  CheckRecordEqual(r2, *records[0]);
+  CheckRecordEqual(r1, *records[1]);
+  CheckRecordEqual(r3, *records[2]);
+
+  ASSERT_TRUE(reader->Close());
+}
+
+TEST_F(RecordFileTest, record_more_than_one_attr) {
+  // Write to a record file.
+  std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(tmpfile_.path);
+  ASSERT_TRUE(writer != nullptr);
+
+  // Write attr section.
+  AddEventType("cpu-cycles");
+  AddEventType("cpu-clock");
+  AddEventType("task-clock");
+  ASSERT_TRUE(writer->WriteAttrSection(attr_ids_));
+
+  ASSERT_TRUE(writer->Close());
+
+  // Read from a record file.
+  std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile_.path);
+  ASSERT_TRUE(reader != nullptr);
+  std::vector<AttrWithId> attrs = reader->AttrSection();
+  ASSERT_EQ(3u, attrs.size());
+  for (size_t i = 0; i < attrs.size(); ++i) {
+    ASSERT_EQ(0, memcmp(attrs[i].attr, attr_ids_[i].attr, sizeof(perf_event_attr)));
+    ASSERT_EQ(attrs[i].ids, attr_ids_[i].ids);
+  }
+}
diff --git a/simpleperf/record_file_writer.cpp b/simpleperf/record_file_writer.cpp
new file mode 100644
index 0000000..94de4a7
--- /dev/null
+++ b/simpleperf/record_file_writer.cpp
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "record_file.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+
+#include "event_attr.h"
+#include "perf_event.h"
+#include "record.h"
+#include "utils.h"
+
+using namespace PerfFileFormat;
+
+std::unique_ptr<RecordFileWriter> RecordFileWriter::CreateInstance(const std::string& filename) {
+  // Remove old perf.data to avoid file ownership problems.
+  std::string err;
+  if (!android::base::RemoveFileIfExists(filename, &err)) {
+    LOG(ERROR) << "failed to remove file " << filename << ": " << err;
+    return nullptr;
+  }
+  FILE* fp = fopen(filename.c_str(), "web+");
+  if (fp == nullptr) {
+    PLOG(ERROR) << "failed to open record file '" << filename << "'";
+    return nullptr;
+  }
+
+  return std::unique_ptr<RecordFileWriter>(new RecordFileWriter(filename, fp));
+}
+
+RecordFileWriter::RecordFileWriter(const std::string& filename, FILE* fp)
+    : filename_(filename),
+      record_fp_(fp),
+      attr_section_offset_(0),
+      attr_section_size_(0),
+      data_section_offset_(0),
+      data_section_size_(0),
+      feature_count_(0),
+      current_feature_index_(0) {
+}
+
+RecordFileWriter::~RecordFileWriter() {
+  if (record_fp_ != nullptr) {
+    Close();
+  }
+}
+
+bool RecordFileWriter::WriteAttrSection(const std::vector<AttrWithId>& attr_ids) {
+  if (attr_ids.empty()) {
+    return false;
+  }
+
+  // Skip file header part.
+  if (fseek(record_fp_, sizeof(FileHeader), SEEK_SET) == -1) {
+    return false;
+  }
+
+  // Write id section.
+  off_t id_section_offset = ftello(record_fp_);
+  if (id_section_offset == -1) {
+    return false;
+  }
+  for (auto& attr_id : attr_ids) {
+    if (!Write(attr_id.ids.data(), attr_id.ids.size() * sizeof(uint64_t))) {
+      return false;
+    }
+  }
+
+  // Write attr section.
+  off_t attr_section_offset = ftello(record_fp_);
+  if (attr_section_offset == -1) {
+    return false;
+  }
+  for (auto& attr_id : attr_ids) {
+    FileAttr file_attr;
+    file_attr.attr = *attr_id.attr;
+    file_attr.ids.offset = id_section_offset;
+    file_attr.ids.size = attr_id.ids.size() * sizeof(uint64_t);
+    id_section_offset += file_attr.ids.size;
+    if (!Write(&file_attr, sizeof(file_attr))) {
+      return false;
+    }
+  }
+
+  off_t data_section_offset = ftello(record_fp_);
+  if (data_section_offset == -1) {
+    return false;
+  }
+
+  attr_section_offset_ = attr_section_offset;
+  attr_section_size_ = data_section_offset - attr_section_offset;
+  data_section_offset_ = data_section_offset;
+
+  // Save event_attr for use when reading records.
+  event_attr_ = *attr_ids[0].attr;
+  return true;
+}
+
+bool RecordFileWriter::WriteRecord(const Record& record) {
+  // linux-tools-perf only accepts records with size <= 65535 bytes. To make
+  // perf.data generated by simpleperf be able to be parsed by linux-tools-perf,
+  // Split simpleperf custom records which are > 65535 into a bunch of
+  // RECORD_SPLIT records, followed by a RECORD_SPLIT_END record.
+  constexpr uint32_t RECORD_SIZE_LIMIT = 65535;
+  if (record.size() <= RECORD_SIZE_LIMIT) {
+    WriteData(record.Binary(), record.size());
+    return true;
+  }
+  CHECK_GT(record.type(), SIMPLE_PERF_RECORD_TYPE_START);
+  const char* p = record.Binary();
+  uint32_t left_bytes = static_cast<uint32_t>(record.size());
+  RecordHeader header;
+  header.type = SIMPLE_PERF_RECORD_SPLIT;
+  char header_buf[Record::header_size()];
+  char* header_p;
+  while (left_bytes > 0) {
+    uint32_t bytes_to_write = std::min(RECORD_SIZE_LIMIT - Record::header_size(), left_bytes);
+    header.size = bytes_to_write + Record::header_size();
+    header_p = header_buf;
+    header.MoveToBinaryFormat(header_p);
+    if (!WriteData(header_buf, Record::header_size())) {
+      return false;
+    }
+    if (!WriteData(p, bytes_to_write)) {
+      return false;
+    }
+    p += bytes_to_write;
+    left_bytes -= bytes_to_write;
+  }
+  header.type = SIMPLE_PERF_RECORD_SPLIT_END;
+  header.size = Record::header_size();
+  header_p = header_buf;
+  header.MoveToBinaryFormat(header_p);
+  return WriteData(header_buf, Record::header_size());
+}
+
+bool RecordFileWriter::WriteData(const void* buf, size_t len) {
+  if (!Write(buf, len)) {
+    return false;
+  }
+  data_section_size_ += len;
+  return true;
+}
+
+bool RecordFileWriter::Write(const void* buf, size_t len) {
+  if (fwrite(buf, len, 1, record_fp_) != 1) {
+    PLOG(ERROR) << "failed to write to record file '" << filename_ << "'";
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<Record> RecordFileWriter::ReadRecordFromFile(FILE* fp, std::vector<char>& buf) {
+  if (buf.size() < sizeof(perf_event_header)) {
+    buf.resize(sizeof(perf_event_header));
+  }
+  auto pheader = reinterpret_cast<perf_event_header*>(buf.data());
+  if (fread(pheader, sizeof(*pheader), 1, fp) != 1) {
+    PLOG(ERROR) << "read failed";
+    return nullptr;
+  }
+  if (pheader->size > sizeof(*pheader)) {
+    if (pheader->size > buf.size()) {
+      buf.resize(pheader->size);
+    }
+    pheader = reinterpret_cast<perf_event_header*>(buf.data());
+    if (fread(pheader + 1, pheader->size - sizeof(*pheader), 1, fp) != 1) {
+      PLOG(ERROR) << "read failed";
+      return nullptr;
+    }
+  }
+  return ReadRecordFromBuffer(event_attr_, pheader->type, buf.data());
+}
+
+bool RecordFileWriter::WriteRecordToFile(FILE* fp, std::unique_ptr<Record> r) {
+  if (fwrite(r->Binary(), r->size(), 1, fp) != 1) {
+    PLOG(ERROR) << "write failed";
+    return false;
+  }
+  return true;
+}
+
+// SortDataSection() sorts records in data section in time order.
+// This method is suitable for the situation that there is only one buffer
+// between kernel and simpleperf for each cpu. The order of records in each
+// cpu buffer is already sorted, so we only need to merge records from different
+// cpu buffers.
+// 1. Create one temporary file for each cpu, and write records to different
+//    temporary files according to their cpu value.
+// 2. Use RecordCache to merge records from different temporary files.
+bool RecordFileWriter::SortDataSection() {
+  if (!IsTimestampSupported(event_attr_) || !IsCpuSupported(event_attr_)) {
+    // Omit the sort if either timestamp or cpu is not recorded.
+    return true;
+  }
+  struct CpuData {
+    std::string path;
+    FILE* fp;
+    std::vector<char> buf;
+    uint64_t data_size;
+
+    explicit CpuData(const std::string& path) : path(path), fp(nullptr), data_size(0) {
+      fp = fopen(path.c_str(), "web+");
+    }
+    ~CpuData() {
+      fclose(fp);
+      unlink(path.c_str());
+    }
+  };
+  std::unordered_map<uint32_t, std::unique_ptr<CpuData>> cpu_map;
+  if (fseek(record_fp_, data_section_offset_, SEEK_SET) == -1) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  uint64_t cur_size = 0;
+  std::vector<char> global_buf;
+  while (cur_size < data_section_size_) {
+    std::unique_ptr<Record> r = ReadRecordFromFile(record_fp_, global_buf);
+    if (r == nullptr) {
+      return false;
+    }
+    cur_size += r->size();
+    std::unique_ptr<CpuData>& cpu_data = cpu_map[r->Cpu()];
+    if (cpu_data == nullptr) {
+      // Create temporary file in the same directory as filename_, because we
+      // may not have permission to create temporary file in other directories.
+      cpu_data.reset(new CpuData(filename_ + "." + std::to_string(r->Cpu())));
+      if (cpu_data->fp == nullptr) {
+        PLOG(ERROR) << "failed to open tmpfile " << cpu_data->path;
+        return false;
+      }
+    }
+    cpu_data->data_size += r->size();
+    if (!WriteRecordToFile(cpu_data->fp, std::move(r))) {
+      return false;
+    }
+  }
+  if (fseek(record_fp_, data_section_offset_, SEEK_SET) == -1) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  RecordCache global_cache(true);
+  for (auto it = cpu_map.begin(); it != cpu_map.end(); ++it) {
+    if (fseek(it->second->fp, 0, SEEK_SET) == -1) {
+      PLOG(ERROR) << "fseek() failed";
+      return false;
+    }
+    std::unique_ptr<Record> r = ReadRecordFromFile(it->second->fp, it->second->buf);
+    if (r == nullptr) {
+      return false;
+    }
+    it->second->data_size -= r->size();
+    global_cache.Push(std::move(r));
+  }
+  while (true) {
+    std::unique_ptr<Record> r = global_cache.ForcedPop();
+    if (r == nullptr) {
+      break;
+    }
+    uint32_t cpu = r->Cpu();
+    if (!WriteRecordToFile(record_fp_, std::move(r))) {
+      return false;
+    }
+    // Each time writing one record of a cpu, push the next record from the
+    // temporary file belong to that cpu into the record cache.
+    std::unique_ptr<CpuData>& cpu_data = cpu_map[cpu];
+    if (cpu_data->data_size > 0) {
+      r = ReadRecordFromFile(cpu_data->fp, cpu_data->buf);
+      if (r == nullptr) {
+        return false;
+      }
+      cpu_data->data_size -= r->size();
+      global_cache.Push(std::move(r));
+    }
+  }
+  return true;
+}
+
+bool RecordFileWriter::SeekFileEnd(uint64_t* file_end) {
+  if (fseek(record_fp_, 0, SEEK_END) == -1) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  off_t offset = ftello(record_fp_);
+  if (offset == -1) {
+    PLOG(ERROR) << "ftello() failed";
+    return false;
+  }
+  *file_end = static_cast<uint64_t>(offset);
+  return true;
+}
+
+bool RecordFileWriter::WriteFeatureHeader(size_t feature_count) {
+  feature_count_ = feature_count;
+  current_feature_index_ = 0;
+  uint64_t feature_header_size = feature_count * sizeof(SectionDesc);
+
+  // Reserve enough space in the record file for the feature header.
+  std::vector<unsigned char> zero_data(feature_header_size);
+  if (fseek(record_fp_, data_section_offset_ + data_section_size_, SEEK_SET) == -1) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  return Write(zero_data.data(), zero_data.size());
+}
+
+bool RecordFileWriter::WriteBuildIdFeature(const std::vector<BuildIdRecord>& build_id_records) {
+  uint64_t start_offset;
+  if (!WriteFeatureBegin(&start_offset)) {
+    return false;
+  }
+  for (auto& record : build_id_records) {
+    if (!Write(record.Binary(), record.size())) {
+      return false;
+    }
+  }
+  return WriteFeatureEnd(FEAT_BUILD_ID, start_offset);
+}
+
+bool RecordFileWriter::WriteFeatureString(int feature, const std::string& s) {
+  uint64_t start_offset;
+  if (!WriteFeatureBegin(&start_offset)) {
+    return false;
+  }
+  uint32_t len = static_cast<uint32_t>(Align(s.size() + 1, 64));
+  if (!Write(&len, sizeof(len))) {
+    return false;
+  }
+  std::vector<char> v(len, '\0');
+  std::copy(s.begin(), s.end(), v.begin());
+  if (!Write(v.data(), v.size())) {
+    return false;
+  }
+  return WriteFeatureEnd(feature, start_offset);
+}
+
+bool RecordFileWriter::WriteCmdlineFeature(const std::vector<std::string>& cmdline) {
+  uint64_t start_offset;
+  if (!WriteFeatureBegin(&start_offset)) {
+    return false;
+  }
+  uint32_t arg_count = cmdline.size();
+  if (!Write(&arg_count, sizeof(arg_count))) {
+    return false;
+  }
+  for (auto& arg : cmdline) {
+    uint32_t len = static_cast<uint32_t>(Align(arg.size() + 1, 64));
+    if (!Write(&len, sizeof(len))) {
+      return false;
+    }
+    std::vector<char> array(len, '\0');
+    std::copy(arg.begin(), arg.end(), array.begin());
+    if (!Write(array.data(), array.size())) {
+      return false;
+    }
+  }
+  return WriteFeatureEnd(FEAT_CMDLINE, start_offset);
+}
+
+bool RecordFileWriter::WriteBranchStackFeature() {
+  uint64_t start_offset;
+  if (!WriteFeatureBegin(&start_offset)) {
+    return false;
+  }
+  return WriteFeatureEnd(FEAT_BRANCH_STACK, start_offset);
+}
+
+bool RecordFileWriter::WriteFeatureBegin(uint64_t* start_offset) {
+  CHECK_LT(current_feature_index_, feature_count_);
+  if (!SeekFileEnd(start_offset)) {
+    return false;
+  }
+  return true;
+}
+
+bool RecordFileWriter::WriteFeatureEnd(int feature, uint64_t start_offset) {
+  uint64_t end_offset;
+  if (!SeekFileEnd(&end_offset)) {
+    return false;
+  }
+  SectionDesc desc;
+  desc.offset = start_offset;
+  desc.size = end_offset - start_offset;
+  uint64_t feature_offset = data_section_offset_ + data_section_size_;
+  if (fseek(record_fp_, feature_offset + current_feature_index_ * sizeof(SectionDesc), SEEK_SET) ==
+      -1) {
+    PLOG(ERROR) << "fseek() failed";
+    return false;
+  }
+  if (!Write(&desc, sizeof(SectionDesc))) {
+    return false;
+  }
+  ++current_feature_index_;
+  features_.push_back(feature);
+  return true;
+}
+
+bool RecordFileWriter::WriteFileHeader() {
+  FileHeader header;
+  memset(&header, 0, sizeof(header));
+  memcpy(header.magic, PERF_MAGIC, sizeof(header.magic));
+  header.header_size = sizeof(header);
+  header.attr_size = sizeof(FileAttr);
+  header.attrs.offset = attr_section_offset_;
+  header.attrs.size = attr_section_size_;
+  header.data.offset = data_section_offset_;
+  header.data.size = data_section_size_;
+  for (auto& feature : features_) {
+    int i = feature / 8;
+    int j = feature % 8;
+    header.features[i] |= (1 << j);
+  }
+
+  if (fseek(record_fp_, 0, SEEK_SET) == -1) {
+    return false;
+  }
+  if (!Write(&header, sizeof(header))) {
+    return false;
+  }
+  return true;
+}
+
+bool RecordFileWriter::Close() {
+  CHECK(record_fp_ != nullptr);
+  bool result = true;
+
+  // Write file header. We gather enough information to write file header only after
+  // writing data section and feature section.
+  if (!WriteFileHeader()) {
+    result = false;
+  }
+
+  if (fclose(record_fp_) != 0) {
+    PLOG(ERROR) << "failed to close record file '" << filename_ << "'";
+    result = false;
+  }
+  record_fp_ = nullptr;
+  return result;
+}
diff --git a/simpleperf/record_test.cpp b/simpleperf/record_test.cpp
new file mode 100644
index 0000000..da5a3ce
--- /dev/null
+++ b/simpleperf/record_test.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "event_attr.h"
+#include "event_type.h"
+#include "record.h"
+#include "record_equal_test.h"
+
+class RecordTest : public ::testing::Test {
+ protected:
+  virtual void SetUp() {
+    const EventType* type = FindEventTypeByName("cpu-cycles");
+    ASSERT_TRUE(type != nullptr);
+    event_attr = CreateDefaultPerfEventAttr(*type);
+  }
+
+  void CheckRecordMatchBinary(const Record& record) {
+    const char* p = record.Binary();
+    std::vector<std::unique_ptr<Record>> records =
+        ReadRecordsFromBuffer(event_attr, p, record.size());
+    ASSERT_EQ(1u, records.size());
+    CheckRecordEqual(record, *records[0]);
+  }
+
+  perf_event_attr event_attr;
+};
+
+TEST_F(RecordTest, MmapRecordMatchBinary) {
+  MmapRecord record(event_attr, true, 1, 2, 0x1000, 0x2000, 0x3000,
+                    "MmapRecord", 0);
+  CheckRecordMatchBinary(record);
+}
+
+TEST_F(RecordTest, CommRecordMatchBinary) {
+  CommRecord record(event_attr, 1, 2, "CommRecord", 0);
+  CheckRecordMatchBinary(record);
+}
+
+TEST_F(RecordTest, RecordCache_smoke) {
+  event_attr.sample_id_all = 1;
+  event_attr.sample_type |= PERF_SAMPLE_TIME;
+  RecordCache cache(true, 2, 2);
+  MmapRecord* r1 = new MmapRecord(event_attr, true, 1, 1, 0x100, 0x200, 0x300,
+                                  "mmap_record1", 0, 3);
+  MmapRecord* r2 = new MmapRecord(event_attr, true, 1, 1, 0x100, 0x200, 0x300,
+                                  "mmap_record1", 0, 1);
+  MmapRecord* r3 = new MmapRecord(event_attr, true, 1, 1, 0x100, 0x200, 0x300,
+                                  "mmap_record1", 0, 4);
+  MmapRecord* r4 = new MmapRecord(event_attr, true, 1, 1, 0x100, 0x200, 0x300,
+                                  "mmap_record1", 0, 6);
+  // Push r1.
+  cache.Push(std::unique_ptr<Record>(r1));
+  ASSERT_EQ(nullptr, cache.Pop());
+  // Push r2.
+  cache.Push(std::unique_ptr<Record>(r2));
+  // Pop r2.
+  std::unique_ptr<Record> popped_r = cache.Pop();
+  ASSERT_TRUE(popped_r != nullptr);
+  ASSERT_EQ(r2, popped_r.get());
+  ASSERT_EQ(nullptr, cache.Pop());
+  // Push r3.
+  cache.Push(std::unique_ptr<Record>(r3));
+  ASSERT_EQ(nullptr, cache.Pop());
+  // Push r4.
+  cache.Push(std::unique_ptr<Record>(r4));
+  // Pop r1.
+  popped_r = cache.Pop();
+  ASSERT_TRUE(popped_r != nullptr);
+  ASSERT_EQ(r1, popped_r.get());
+  // Pop r3.
+  popped_r = cache.Pop();
+  ASSERT_TRUE(popped_r != nullptr);
+  ASSERT_EQ(r3, popped_r.get());
+  ASSERT_EQ(nullptr, cache.Pop());
+  // Pop r4.
+  std::vector<std::unique_ptr<Record>> last_records = cache.PopAll();
+  ASSERT_EQ(1u, last_records.size());
+  ASSERT_EQ(r4, last_records[0].get());
+}
+
+TEST_F(RecordTest, RecordCache_FIFO) {
+  event_attr.sample_id_all = 1;
+  event_attr.sample_type |= PERF_SAMPLE_TIME;
+  RecordCache cache(true, 2, 2);
+  std::vector<MmapRecord*> records;
+  for (size_t i = 0; i < 10; ++i) {
+    records.push_back(new MmapRecord(event_attr, true, 1, i, 0x100, 0x200,
+                                     0x300, "mmap_record1", 0));
+    cache.Push(std::unique_ptr<Record>(records.back()));
+  }
+  std::vector<std::unique_ptr<Record>> out_records = cache.PopAll();
+  ASSERT_EQ(records.size(), out_records.size());
+  for (size_t i = 0; i < records.size(); ++i) {
+    ASSERT_EQ(records[i], out_records[i].get());
+  }
+}
+
+TEST_F(RecordTest, RecordCache_PushRecordVector) {
+  event_attr.sample_id_all = 1;
+  event_attr.sample_type |= PERF_SAMPLE_TIME;
+  RecordCache cache(true, 2, 2);
+  MmapRecord* r1 = new MmapRecord(event_attr, true, 1, 1, 0x100, 0x200, 0x300,
+                                  "mmap_record1", 0, 1);
+  MmapRecord* r2 = new MmapRecord(event_attr, true, 1, 1, 0x100, 0x200, 0x300,
+                                  "mmap_record1", 0, 3);
+  std::vector<std::unique_ptr<Record>> records;
+  records.push_back(std::unique_ptr<Record>(r1));
+  records.push_back(std::unique_ptr<Record>(r2));
+  cache.Push(std::move(records));
+  std::unique_ptr<Record> popped_r = cache.Pop();
+  ASSERT_TRUE(popped_r != nullptr);
+  ASSERT_EQ(r1, popped_r.get());
+  std::vector<std::unique_ptr<Record>> last_records = cache.PopAll();
+  ASSERT_EQ(1u, last_records.size());
+  ASSERT_EQ(r2, last_records[0].get());
+}
diff --git a/simpleperf/report_sample.proto b/simpleperf/report_sample.proto
new file mode 100644
index 0000000..d6c42e6
--- /dev/null
+++ b/simpleperf/report_sample.proto
@@ -0,0 +1,48 @@
+// The file format generated by report_sample.proto is as below:
+// LittleEndian32(record_size_0)
+// message Record(record_0) (having record_size_0 bytes)
+// LittleEndian32(record_size_1)
+// message Record(record_1) (having record_size_1 bytes)
+// ...
+// LittleEndian32(record_size_N)
+// message Record(record_N) (having record_size_N bytes)
+// LittleEndian32(0)
+
+syntax = "proto2";
+option optimize_for = LITE_RUNTIME;
+package simpleperf_report_proto;
+option java_package = "com.android.tools.profiler.proto";
+option java_outer_classname = "SimpleperfReport";
+
+message Sample {
+  optional uint64 time = 1;
+
+  message CallChainEntry {
+    optional uint64 ip = 1;
+    optional string symbol = 2;
+    optional string file = 3;
+  }
+
+  repeated CallChainEntry callchain = 2;
+  optional int32 thread_id = 3;
+}
+
+message LostSituation {
+  optional uint64 sample_count = 1;
+  optional uint64 lost_count = 2;
+}
+
+message Record {
+  enum Type {
+    UNKOWN = 0;
+    SAMPLE = 1;
+    LOST_SITUATION = 2;
+  }
+
+  // Identifies which field is filled in.
+  optional Type type = 1;
+
+  // One of the following will be filled in.
+  optional Sample sample = 2;
+  optional LostSituation lost = 3;
+}
\ No newline at end of file
diff --git a/simpleperf/runtest/Android.build.mk b/simpleperf/runtest/Android.build.mk
new file mode 100644
index 0000000..8520765
--- /dev/null
+++ b/simpleperf/runtest/Android.build.mk
@@ -0,0 +1,38 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# -O0 is need to prevent optimizations like function inlining in runtest executables.
+simpleperf_runtest_cppflags := -Wall -Wextra -Werror -Wunused \
+                               -O0 \
+
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_CPPFLAGS := $(simpleperf_runtest_cppflags)
+LOCAL_SRC_FILES := $(module_src_files)
+LOCAL_MODULE := $(module)
+LOCAL_STRIP_MODULE := false
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.build.mk
+include $(BUILD_EXECUTABLE)
+
+ifeq ($(HOST_OS),linux)
+include $(CLEAR_VARS)
+LOCAL_CLANG := true
+LOCAL_CPPFLAGS := $(simpleperf_runtest_cppflags)
+LOCAL_SRC_FILES := $(module_src_files)
+LOCAL_MODULE := $(module)
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.build.mk
+include $(BUILD_HOST_EXECUTABLE)
+endif
\ No newline at end of file
diff --git a/simpleperf/runtest/Android.mk b/simpleperf/runtest/Android.mk
new file mode 100644
index 0000000..55bf3b7
--- /dev/null
+++ b/simpleperf/runtest/Android.mk
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+module := simpleperf_runtest_one_function
+module_src_files := one_function.cpp
+include $(LOCAL_PATH)/Android.build.mk
+
+module := simpleperf_runtest_two_functions
+module_src_files := two_functions.cpp
+include $(LOCAL_PATH)/Android.build.mk
+
+module := simpleperf_runtest_function_fork
+module_src_files := function_fork.cpp
+include $(LOCAL_PATH)/Android.build.mk
+
+module := simpleperf_runtest_function_pthread
+module_src_files := function_pthread.cpp
+include $(LOCAL_PATH)/Android.build.mk
+
+module := simpleperf_runtest_comm_change
+module_src_files := comm_change.cpp
+include $(LOCAL_PATH)/Android.build.mk
+
+module := simpleperf_runtest_function_recursive
+module_src_files := function_recursive.cpp
+include $(LOCAL_PATH)/Android.build.mk
+
+module := simpleperf_runtest_function_indirect_recursive
+module_src_files := function_indirect_recursive.cpp
+include $(LOCAL_PATH)/Android.build.mk
\ No newline at end of file
diff --git a/simpleperf/runtest/comm_change.cpp b/simpleperf/runtest/comm_change.cpp
new file mode 100644
index 0000000..12d64fa
--- /dev/null
+++ b/simpleperf/runtest/comm_change.cpp
@@ -0,0 +1,16 @@
+#include <sys/prctl.h>
+
+constexpr int LOOP_COUNT = 100000000;
+
+void Function1() {
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+}
+
+int main() {
+  prctl(PR_SET_NAME, reinterpret_cast<unsigned long>("RUN_COMM1"), 0, 0, 0); // NOLINT
+  Function1();
+  prctl(PR_SET_NAME, reinterpret_cast<unsigned long>("RUN_COMM2"), 0, 0, 0); // NOLINT
+  Function1();
+  return 0;
+}
diff --git a/simpleperf/runtest/function_fork.cpp b/simpleperf/runtest/function_fork.cpp
new file mode 100644
index 0000000..b1477a6
--- /dev/null
+++ b/simpleperf/runtest/function_fork.cpp
@@ -0,0 +1,30 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+constexpr int LOOP_COUNT = 100000000;
+
+volatile int a[2];
+void ParentFunction() {
+  volatile int* p = a + atoi("0");
+  for (int i = 0; i < LOOP_COUNT; ++i) {
+    *p = i;
+  }
+}
+
+void ChildFunction() {
+  volatile int* p = a + atoi("1");
+  for (int i = 0; i < LOOP_COUNT; ++i) {
+    *p = i;
+  }
+}
+
+int main() {
+  pid_t pid = fork();
+  if (pid == 0) {
+    ChildFunction();
+    return 0;
+  } else {
+    ParentFunction();
+  }
+  return 0;
+}
diff --git a/simpleperf/runtest/function_indirect_recursive.cpp b/simpleperf/runtest/function_indirect_recursive.cpp
new file mode 100644
index 0000000..5e70fd3
--- /dev/null
+++ b/simpleperf/runtest/function_indirect_recursive.cpp
@@ -0,0 +1,24 @@
+constexpr int LOOP_COUNT = 5000000;
+
+void FunctionRecursiveTwo(int loop);
+
+void FunctionRecursiveOne(int loop) {
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+  if (loop >= 0) {
+    FunctionRecursiveTwo(loop);
+  }
+}
+
+void FunctionRecursiveTwo(int loop) {
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+  if (loop > 0) {
+    FunctionRecursiveOne(loop - 1);
+  }
+}
+
+int main() {
+  FunctionRecursiveOne(10);
+  return 0;
+}
diff --git a/simpleperf/runtest/function_pthread.cpp b/simpleperf/runtest/function_pthread.cpp
new file mode 100644
index 0000000..02fc0a5
--- /dev/null
+++ b/simpleperf/runtest/function_pthread.cpp
@@ -0,0 +1,33 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+constexpr int LOOP_COUNT = 100000000;
+
+void* ChildThreadFunction(void*) {
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+  return nullptr;
+}
+
+void MainThreadFunction() {
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+}
+
+int main() {
+  pthread_t thread;
+  int ret = pthread_create(&thread, nullptr, ChildThreadFunction, nullptr);
+  if (ret != 0) {
+    fprintf(stderr, "pthread_create failed: %s\n", strerror(ret));
+    exit(1);
+  }
+  MainThreadFunction();
+  ret = pthread_join(thread, nullptr);
+  if (ret != 0) {
+    fprintf(stderr, "pthread_join failed: %s\n", strerror(ret));
+    exit(1);
+  }
+  return 0;
+}
diff --git a/simpleperf/runtest/function_recursive.cpp b/simpleperf/runtest/function_recursive.cpp
new file mode 100644
index 0000000..d8d28bc
--- /dev/null
+++ b/simpleperf/runtest/function_recursive.cpp
@@ -0,0 +1,16 @@
+constexpr int LOOP_COUNT = 5000000;
+
+void FunctionRecursive(int loop) {
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+  if (loop > 0) {
+    FunctionRecursive(loop - 1);
+  }
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+}
+
+int main() {
+  FunctionRecursive(10);
+  return 0;
+}
diff --git a/simpleperf/runtest/one_function.cpp b/simpleperf/runtest/one_function.cpp
new file mode 100644
index 0000000..49090ac
--- /dev/null
+++ b/simpleperf/runtest/one_function.cpp
@@ -0,0 +1,11 @@
+constexpr int LOOP_COUNT = 100000000;
+
+void Function1() {
+  for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+  }
+}
+
+int main() {
+  Function1();
+  return 0;
+}
diff --git a/simpleperf/runtest/runtest.conf b/simpleperf/runtest/runtest.conf
new file mode 100644
index 0000000..863ea99
--- /dev/null
+++ b/simpleperf/runtest/runtest.conf
@@ -0,0 +1,205 @@
+<runtests>
+<test name="one_function">
+  <executable name="simpleperf_runtest_one_function"/>
+
+  <symbol_overhead>
+    <symbol name="Function1()" min="90" max="100"/>
+  </symbol_overhead>
+
+  <symbol_children_overhead>
+    <symbol name="main" min="90" max="100"/>
+  </symbol_children_overhead>
+
+  <symbol_callgraph_relation>
+    <symbol name="Function1()">
+      <symbol name="main"/>
+    </symbol>
+  </symbol_callgraph_relation>
+</test>
+
+<test name="two_functions">
+  <executable name="simpleperf_runtest_two_functions"/>
+
+  <symbol_overhead>
+    <symbol name="Function1()" min="30" max="70"/>
+    <symbol name="Function2()" min="30" max="70"/>
+  </symbol_overhead>
+
+  <symbol_children_overhead>
+    <symbol name="main" min="90" max="100"/>
+  </symbol_children_overhead>
+
+  <symbol_callgraph_relation>
+    <symbol name="Function1()">
+      <symbol name="main"/>
+    </symbol>
+    <symbol name="Function2()">
+      <symbol name="main"/>
+    </symbol>
+  </symbol_callgraph_relation>
+</test>
+
+<test name="function_fork">
+  <executable name="simpleperf_runtest_function_fork"/>
+
+  <symbol_overhead>
+    <symbol name="ParentFunction()" min="10" max="90"/>
+    <symbol name="ChildFunction()" min="10" max="90"/>
+  </symbol_overhead>
+
+  <symbol_children_overhead>
+    <symbol name="main" min="10" max="90"/>
+  </symbol_children_overhead>
+
+  <symbol_callgraph_relation>
+    <symbol name="ParentFunction()">
+      <symbol name="main"/>
+    </symbol>
+    <symbol name="ChildFunction()">
+      <symbol name="main"/>
+    </symbol>
+  </symbol_callgraph_relation>
+</test>
+
+<test name="function_pthread">
+  <executable name="simpleperf_runtest_function_pthread"/>
+
+  <symbol_overhead>
+    <symbol name="MainThreadFunction()" min="20" max="80"/>
+    <symbol name="ChildThreadFunction(void*)" min="20" max="80"/>
+  </symbol_overhead>
+
+  <symbol_children_overhead>
+    <symbol name="main" min="20" max="80"/>
+  </symbol_children_overhead>
+
+  <symbol_callgraph_relation>
+    <symbol name="MainThreadFunction()">
+      <symbol name="main"/>
+    </symbol>
+  </symbol_callgraph_relation>
+</test>
+
+<test name="comm_change">
+  <executable name="simpleperf_runtest_comm_change"/>
+
+  <symbol_overhead>
+    <symbol name="Function1()" comm="RUN_COMM1" min="30" max="70"/>
+    <symbol name="Function1()" comm="RUN_COMM2" min="30" max="70"/>
+  </symbol_overhead>
+
+  <symbol_children_overhead>
+    <symbol name="main" comm="RUN_COMM1" min="30" max="70"/>
+    <symbol name="main" comm="RUN_COMM2" min="30" max="70"/>
+  </symbol_children_overhead>
+
+  <symbol_callgraph_relation>
+    <symbol name="Function1()" comm="RUN_COMM1">
+      <symbol name="main"/>
+    </symbol>
+    <symbol name="Function1()" comm="RUN_COMM2">
+      <symbol name="main"/>
+    </symbol>
+  </symbol_callgraph_relation>
+
+</test>
+
+<test name="function_recursive">
+  <executable name="simpleperf_runtest_function_recursive"/>
+
+  <symbol_overhead>
+    <symbol name="FunctionRecursive(int)" min="90"/>
+  </symbol_overhead>
+
+  <symbol_children_overhead>
+    <symbol name="main" min="90"/>
+  </symbol_children_overhead>
+
+  <symbol_callgraph_relation>
+    <symbol name="FunctionRecursive(int)">
+      <symbol name="FunctionRecursive(int)">
+        <symbol name="FunctionRecursive(int)">
+          <symbol name="FunctionRecursive(int)">
+            <symbol name="FunctionRecursive(int)">
+              <symbol name="FunctionRecursive(int)">
+                <symbol name="FunctionRecursive(int)">
+                  <symbol name="FunctionRecursive(int)">
+                    <symbol name="FunctionRecursive(int)">
+                      <symbol name="FunctionRecursive(int)">
+                        <symbol name="FunctionRecursive(int)">
+                          <symbol name="main"/>
+                        </symbol>
+                        <symbol name="main"/>
+                      </symbol>
+                      <symbol name="main"/>
+                    </symbol>
+                    <symbol name="main"/>
+                  </symbol>
+                  <symbol name="main"/>
+                </symbol>
+                <symbol name="main"/>
+              </symbol>
+              <symbol name="main"/>
+            </symbol>
+            <symbol name="main"/>
+          </symbol>
+          <symbol name="main"/>
+        </symbol>
+        <symbol name="main"/>
+      </symbol>
+      <symbol name="main"/>
+    </symbol>
+  </symbol_callgraph_relation>
+</test>
+
+<test name="function_indirect_recursive">
+  <executable name="simpleperf_runtest_function_indirect_recursive"/>
+
+  <symbol_overhead>
+    <symbol name="FunctionRecursiveOne(int)" min="30" max="70"/>
+    <symbol name="FunctionRecursiveTwo(int)" min="30" max="70"/>
+  </symbol_overhead>
+
+  <symbol_children_overhead>
+    <symbol name="FunctionRecursiveOne(int)" min="90"/>
+    <symbol name="FunctionRecursiveTwo(int)" min="80"/>
+  </symbol_children_overhead>
+
+  <symbol_callgraph_relation>
+    <symbol name="FunctionRecursiveOne(int)">
+      <symbol name="FunctionRecursiveTwo(int)">
+        <symbol name="FunctionRecursiveOne(int)">
+          <symbol name="FunctionRecursiveTwo(int)">
+            <symbol name="FunctionRecursiveOne(int)"/>
+          </symbol>
+          <symbol name="main"/>
+        </symbol>
+      </symbol>
+      <symbol name="main"/>
+    </symbol>
+
+    <symbol name="FunctionRecursiveTwo(int)">
+      <symbol name="FunctionRecursiveOne(int)">
+        <symbol name="FunctionRecursiveTwo(int)">
+          <symbol name="FunctionRecursiveOne(int)">
+            <symbol name="FunctionRecursiveTwo(int)">
+            </symbol>
+            <symbol name="main"/>
+          </symbol>
+        </symbol>
+        <symbol name="main"/>
+      </symbol>
+    </symbol>
+  </symbol_callgraph_relation>
+</test>
+
+<test name="selected_comm">
+  <executable name="simpleperf_runtest_comm_change"/>
+  <report option="--comms RUN_COMM1"/>
+
+  <symbol_overhead>
+    <symbol comm="RUN_COMM1" min="100" max="100"/>
+  </symbol_overhead>
+</test>
+
+</runtests>
diff --git a/simpleperf/runtest/runtest.py b/simpleperf/runtest/runtest.py
new file mode 100644
index 0000000..bbfdc48
--- /dev/null
+++ b/simpleperf/runtest/runtest.py
@@ -0,0 +1,615 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Simpleperf runtest runner: run simpleperf runtests on host or on device.
+
+For a simpleperf runtest like one_function test, it contains following steps:
+1. Run simpleperf record command to record simpleperf_runtest_one_function's
+   running samples, which is generated in perf.data.
+2. Run simpleperf report command to parse perf.data, generate perf.report.
+4. Parse perf.report and see if it matches expectation.
+
+The information of all runtests is stored in runtest.conf.
+"""
+
+import os
+import os.path
+import re
+import subprocess
+import sys
+import xml.etree.ElementTree as ET
+
+
+class CallTreeNode(object):
+
+  def __init__(self, name):
+    self.name = name
+    self.children = []
+
+  def add_child(self, child):
+    self.children.append(child)
+
+  def __str__(self):
+    return 'CallTreeNode:\n' + '\n'.join(self._dump(1))
+
+  def _dump(self, indent):
+    indent_str = '  ' * indent
+    strs = [indent_str + self.name]
+    for child in self.children:
+      strs.extend(child._dump(indent + 1))
+    return strs
+
+
+class Symbol(object):
+
+  def __init__(self, name, comm, overhead, children_overhead):
+    self.name = name
+    self.comm = comm
+    self.overhead = overhead
+    # children_overhead is the overhead sum of this symbol and functions
+    # called by this symbol.
+    self.children_overhead = children_overhead
+    self.call_tree = None
+
+  def set_call_tree(self, call_tree):
+    self.call_tree = call_tree
+
+  def __str__(self):
+    strs = []
+    strs.append('Symbol name=%s comm=%s overhead=%f children_overhead=%f' % (
+        self.name, self.comm, self.overhead, self.children_overhead))
+    if self.call_tree:
+      strs.append('\t%s' % self.call_tree)
+    return '\n'.join(strs)
+
+
+class SymbolOverheadRequirement(object):
+
+  def __init__(self, symbol_name=None, comm=None, min_overhead=None,
+               max_overhead=None):
+    self.symbol_name = symbol_name
+    self.comm = comm
+    self.min_overhead = min_overhead
+    self.max_overhead = max_overhead
+
+  def __str__(self):
+    strs = []
+    strs.append('SymbolOverheadRequirement')
+    if self.symbol_name is not None:
+      strs.append('symbol_name=%s' % self.symbol_name)
+    if self.comm is not None:
+      strs.append('comm=%s' % self.comm)
+    if self.min_overhead is not None:
+      strs.append('min_overhead=%f' % self.min_overhead)
+    if self.max_overhead is not None:
+      strs.append('max_overhead=%f' % self.max_overhead)
+    return ' '.join(strs)
+
+  def is_match(self, symbol):
+    if self.symbol_name is not None:
+      if self.symbol_name != symbol.name:
+        return False
+    if self.comm is not None:
+      if self.comm != symbol.comm:
+        return False
+    return True
+
+  def check_overhead(self, overhead):
+    if self.min_overhead is not None:
+      if self.min_overhead > overhead:
+        return False
+    if self.max_overhead is not None:
+      if self.max_overhead < overhead:
+        return False
+    return True
+
+
+class SymbolRelationRequirement(object):
+
+  def __init__(self, symbol_name, comm=None):
+    self.symbol_name = symbol_name
+    self.comm = comm
+    self.children = []
+
+  def add_child(self, child):
+    self.children.append(child)
+
+  def __str__(self):
+    return 'SymbolRelationRequirement:\n' + '\n'.join(self._dump(1))
+
+  def _dump(self, indent):
+    indent_str = '  ' * indent
+    strs = [indent_str + self.symbol_name +
+            (' ' + self.comm if self.comm else '')]
+    for child in self.children:
+      strs.extend(child._dump(indent + 1))
+    return strs
+
+  def is_match(self, symbol):
+    if symbol.name != self.symbol_name:
+      return False
+    if self.comm is not None:
+      if symbol.comm != self.comm:
+        return False
+    return True
+
+  def check_relation(self, call_tree):
+    if not call_tree:
+      return False
+    if self.symbol_name != call_tree.name:
+      return False
+    for child in self.children:
+      child_matched = False
+      for node in call_tree.children:
+        if child.check_relation(node):
+          child_matched = True
+          break
+      if not child_matched:
+        return False
+    return True
+
+
+class Test(object):
+
+  def __init__(
+          self,
+          test_name,
+          executable_name,
+          report_options,
+          symbol_overhead_requirements,
+          symbol_children_overhead_requirements,
+          symbol_relation_requirements):
+    self.test_name = test_name
+    self.executable_name = executable_name
+    self.report_options = report_options
+    self.symbol_overhead_requirements = symbol_overhead_requirements
+    self.symbol_children_overhead_requirements = (
+        symbol_children_overhead_requirements)
+    self.symbol_relation_requirements = symbol_relation_requirements
+
+  def __str__(self):
+    strs = []
+    strs.append('Test test_name=%s' % self.test_name)
+    strs.append('\texecutable_name=%s' % self.executable_name)
+    strs.append('\treport_options=%s' % (' '.join(self.report_options)))
+    strs.append('\tsymbol_overhead_requirements:')
+    for req in self.symbol_overhead_requirements:
+      strs.append('\t\t%s' % req)
+    strs.append('\tsymbol_children_overhead_requirements:')
+    for req in self.symbol_children_overhead_requirements:
+      strs.append('\t\t%s' % req)
+    strs.append('\tsymbol_relation_requirements:')
+    for req in self.symbol_relation_requirements:
+      strs.append('\t\t%s' % req)
+    return '\n'.join(strs)
+
+
+def load_config_file(config_file):
+  tests = []
+  tree = ET.parse(config_file)
+  root = tree.getroot()
+  assert root.tag == 'runtests'
+  for test in root:
+    assert test.tag == 'test'
+    test_name = test.attrib['name']
+    executable_name = None
+    report_options = []
+    symbol_overhead_requirements = []
+    symbol_children_overhead_requirements = []
+    symbol_relation_requirements = []
+    for test_item in test:
+      if test_item.tag == 'executable':
+        executable_name = test_item.attrib['name']
+      elif test_item.tag == 'report':
+        report_options = test_item.attrib['option'].split()
+      elif (test_item.tag == 'symbol_overhead' or
+              test_item.tag == 'symbol_children_overhead'):
+        for symbol_item in test_item:
+          assert symbol_item.tag == 'symbol'
+          symbol_name = None
+          if 'name' in symbol_item.attrib:
+            symbol_name = symbol_item.attrib['name']
+          comm = None
+          if 'comm' in symbol_item.attrib:
+            comm = symbol_item.attrib['comm']
+          overhead_min = None
+          if 'min' in symbol_item.attrib:
+            overhead_min = float(symbol_item.attrib['min'])
+          overhead_max = None
+          if 'max' in symbol_item.attrib:
+            overhead_max = float(symbol_item.attrib['max'])
+
+          if test_item.tag == 'symbol_overhead':
+            symbol_overhead_requirements.append(
+                SymbolOverheadRequirement(
+                    symbol_name,
+                    comm,
+                    overhead_min,
+                    overhead_max)
+            )
+          else:
+            symbol_children_overhead_requirements.append(
+                SymbolOverheadRequirement(
+                    symbol_name,
+                    comm,
+                    overhead_min,
+                    overhead_max))
+      elif test_item.tag == 'symbol_callgraph_relation':
+        for symbol_item in test_item:
+          req = load_symbol_relation_requirement(symbol_item)
+          symbol_relation_requirements.append(req)
+
+    tests.append(
+        Test(
+            test_name,
+            executable_name,
+            report_options,
+            symbol_overhead_requirements,
+            symbol_children_overhead_requirements,
+            symbol_relation_requirements))
+  return tests
+
+
+def load_symbol_relation_requirement(symbol_item):
+  symbol_name = symbol_item.attrib['name']
+  comm = None
+  if 'comm' in symbol_item.attrib:
+    comm = symbol_item.attrib['comm']
+  req = SymbolRelationRequirement(symbol_name, comm)
+  for item in symbol_item:
+    child_req = load_symbol_relation_requirement(item)
+    req.add_child(child_req)
+  return req
+
+
+class Runner(object):
+
+  def __init__(self, perf_path):
+    self.perf_path = perf_path
+
+  def record(self, test_executable_name, record_file, additional_options=[]):
+    call_args = [self.perf_path,
+                 'record'] + additional_options + ['-e',
+                                                   'cpu-cycles:u',
+                                                   '-o',
+                                                   record_file,
+                                                   test_executable_name]
+    self._call(call_args)
+
+  def report(self, record_file, report_file, additional_options=[]):
+    call_args = [self.perf_path,
+                 'report'] + additional_options + ['-i',
+                                                   record_file]
+    self._call(call_args, report_file)
+
+  def _call(self, args, output_file=None):
+    pass
+
+
+class HostRunner(Runner):
+
+  """Run perf test on host."""
+
+  def _call(self, args, output_file=None):
+    output_fh = None
+    if output_file is not None:
+      output_fh = open(output_file, 'w')
+    subprocess.check_call(args, stdout=output_fh)
+    if output_fh is not None:
+      output_fh.close()
+
+
+class DeviceRunner(Runner):
+
+  """Run perf test on device."""
+
+  def __init__(self, perf_path):
+    self.tmpdir = '/data/local/tmp/'
+    self._download(os.environ['OUT'] + '/system/xbin/' + perf_path, self.tmpdir)
+    self.perf_path = self.tmpdir + perf_path
+
+  def _call(self, args, output_file=None):
+    output_fh = None
+    if output_file is not None:
+      output_fh = open(output_file, 'w')
+    args_with_adb = ['adb', 'shell']
+    args_with_adb.extend(args)
+    subprocess.check_call(args_with_adb, stdout=output_fh)
+    if output_fh is not None:
+      output_fh.close()
+
+  def _download(self, file, to_dir):
+    args = ['adb', 'push', file, to_dir]
+    subprocess.check_call(args)
+
+  def record(self, test_executable_name, record_file, additional_options=[]):
+    self._download(os.environ['OUT'] + '/system/bin/' + test_executable_name,
+                   self.tmpdir)
+    super(DeviceRunner, self).record(self.tmpdir + test_executable_name,
+                                     self.tmpdir + record_file,
+                                     additional_options)
+
+  def report(self, record_file, report_file, additional_options=[]):
+    super(DeviceRunner, self).report(self.tmpdir + record_file,
+                                     report_file,
+                                     additional_options)
+
+class ReportAnalyzer(object):
+
+  """Check if perf.report matches expectation in Configuration."""
+
+  def _read_report_file(self, report_file, has_callgraph):
+    fh = open(report_file, 'r')
+    lines = fh.readlines()
+    fh.close()
+
+    lines = [x.rstrip() for x in lines]
+    blank_line_index = -1
+    for i in range(len(lines)):
+      if not lines[i]:
+        blank_line_index = i
+    assert blank_line_index != -1
+    assert blank_line_index + 1 < len(lines)
+    title_line = lines[blank_line_index + 1]
+    report_item_lines = lines[blank_line_index + 2:]
+
+    if has_callgraph:
+      assert re.search(r'^Children\s+Self\s+Command.+Symbol$', title_line)
+    else:
+      assert re.search(r'^Overhead\s+Command.+Symbol$', title_line)
+
+    return self._parse_report_items(report_item_lines, has_callgraph)
+
+  def _parse_report_items(self, lines, has_callgraph):
+    symbols = []
+    cur_symbol = None
+    call_tree_stack = {}
+    vertical_columns = []
+    last_node = None
+    last_depth = -1
+
+    for line in lines:
+      if not line:
+        continue
+      if not line[0].isspace():
+        if has_callgraph:
+          m = re.search(r'^([\d\.]+)%\s+([\d\.]+)%\s+(\S+).*\s+(\S+)$', line)
+          children_overhead = float(m.group(1))
+          overhead = float(m.group(2))
+          comm = m.group(3)
+          symbol_name = m.group(4)
+          cur_symbol = Symbol(symbol_name, comm, overhead, children_overhead)
+          symbols.append(cur_symbol)
+        else:
+          m = re.search(r'^([\d\.]+)%\s+(\S+).*\s+(\S+)$', line)
+          overhead = float(m.group(1))
+          comm = m.group(2)
+          symbol_name = m.group(3)
+          cur_symbol = Symbol(symbol_name, comm, overhead, 0)
+          symbols.append(cur_symbol)
+        # Each report item can have different column depths.
+        vertical_columns = []
+      else:
+        for i in range(len(line)):
+          if line[i] == '|':
+            if not vertical_columns or vertical_columns[-1] < i:
+              vertical_columns.append(i)
+
+        if not line.strip('| \t'):
+          continue
+        if line.find('-') == -1:
+          function_name = line.strip('| \t')
+          node = CallTreeNode(function_name)
+          last_node.add_child(node)
+          last_node = node
+          call_tree_stack[last_depth] = node
+        else:
+          pos = line.find('-')
+          depth = -1
+          for i in range(len(vertical_columns)):
+            if pos >= vertical_columns[i]:
+              depth = i
+          assert depth != -1
+
+          line = line.strip('|- \t')
+          m = re.search(r'^[\d\.]+%[-\s]+(.+)$', line)
+          if m:
+            function_name = m.group(1)
+          else:
+            function_name = line
+
+          node = CallTreeNode(function_name)
+          if depth == 0:
+            cur_symbol.set_call_tree(node)
+
+          else:
+            call_tree_stack[depth - 1].add_child(node)
+          call_tree_stack[depth] = node
+          last_node = node
+          last_depth = depth
+
+    return symbols
+
+  def check_report_file(self, test, report_file, has_callgraph):
+    symbols = self._read_report_file(report_file, has_callgraph)
+    if not self._check_symbol_overhead_requirements(test, symbols):
+      return False
+    if has_callgraph:
+      if not self._check_symbol_children_overhead_requirements(test, symbols):
+        return False
+      if not self._check_symbol_relation_requirements(test, symbols):
+        return False
+    return True
+
+  def _check_symbol_overhead_requirements(self, test, symbols):
+    result = True
+    matched = [False] * len(test.symbol_overhead_requirements)
+    matched_overhead = [0] * len(test.symbol_overhead_requirements)
+    for symbol in symbols:
+      for i in range(len(test.symbol_overhead_requirements)):
+        req = test.symbol_overhead_requirements[i]
+        if req.is_match(symbol):
+          matched[i] = True
+          matched_overhead[i] += symbol.overhead
+    for i in range(len(matched)):
+      if not matched[i]:
+        print 'requirement (%s) has no matched symbol in test %s' % (
+            test.symbol_overhead_requirements[i], test)
+        result = False
+      else:
+        fulfilled = req.check_overhead(matched_overhead[i])
+        if not fulfilled:
+          print "Symbol (%s) doesn't match requirement (%s) in test %s" % (
+              symbol, req, test)
+          result = False
+    return result
+
+  def _check_symbol_children_overhead_requirements(self, test, symbols):
+    result = True
+    matched = [False] * len(test.symbol_children_overhead_requirements)
+    for symbol in symbols:
+      for i in range(len(test.symbol_children_overhead_requirements)):
+        req = test.symbol_children_overhead_requirements[i]
+        if req.is_match(symbol):
+          matched[i] = True
+          fulfilled = req.check_overhead(symbol.children_overhead)
+          if not fulfilled:
+            print "Symbol (%s) doesn't match requirement (%s) in test %s" % (
+                symbol, req, test)
+            result = False
+    for i in range(len(matched)):
+      if not matched[i]:
+        print 'requirement (%s) has no matched symbol in test %s' % (
+            test.symbol_children_overhead_requirements[i], test)
+        result = False
+    return result
+
+  def _check_symbol_relation_requirements(self, test, symbols):
+    result = True
+    matched = [False] * len(test.symbol_relation_requirements)
+    for symbol in symbols:
+      for i in range(len(test.symbol_relation_requirements)):
+        req = test.symbol_relation_requirements[i]
+        if req.is_match(symbol):
+          matched[i] = True
+          fulfilled = req.check_relation(symbol.call_tree)
+          if not fulfilled:
+            print "Symbol (%s) doesn't match requirement (%s) in test %s" % (
+                symbol, req, test)
+            result = False
+    for i in range(len(matched)):
+      if not matched[i]:
+        print 'requirement (%s) has no matched symbol in test %s' % (
+            test.symbol_relation_requirements[i], test)
+        result = False
+    return result
+
+
+def runtest(host, device, normal, callgraph, selected_tests):
+  tests = load_config_file(os.path.dirname(os.path.realpath(__file__)) + \
+                           '/runtest.conf')
+  host_runner = HostRunner('simpleperf')
+  device_runner = DeviceRunner('simpleperf')
+  report_analyzer = ReportAnalyzer()
+  for test in tests:
+    if selected_tests is not None:
+      if test.test_name not in selected_tests:
+        continue
+    if host and normal:
+      host_runner.record(test.executable_name, 'perf.data')
+      host_runner.report('perf.data', 'perf.report',
+                         additional_options = test.report_options)
+      result = report_analyzer.check_report_file(
+          test, 'perf.report', False)
+      print 'test %s on host %s' % (
+          test.test_name, 'Succeeded' if result else 'Failed')
+      if not result:
+        exit(1)
+
+    if device and normal:
+      device_runner.record(test.executable_name, 'perf.data')
+      device_runner.report('perf.data', 'perf.report',
+                           additional_options = test.report_options)
+      result = report_analyzer.check_report_file(test, 'perf.report', False)
+      print 'test %s on device %s' % (
+          test.test_name, 'Succeeded' if result else 'Failed')
+      if not result:
+        exit(1)
+
+    if host and callgraph:
+      host_runner.record(
+          test.executable_name,
+          'perf_g.data',
+          additional_options=['-g', '-f', '1000'])
+      host_runner.report(
+          'perf_g.data',
+          'perf_g.report',
+          additional_options=['-g', 'callee'] + test.report_options)
+      result = report_analyzer.check_report_file(test, 'perf_g.report', True)
+      print 'call-graph test %s on host %s' % (
+          test.test_name, 'Succeeded' if result else 'Failed')
+      if not result:
+        exit(1)
+
+    if device and callgraph:
+      # Decrease sampling frequency by -f 1000 to avoid losing records
+      # while recording call-graph.
+      device_runner.record(
+          test.executable_name,
+          'perf_g.data',
+          additional_options=['-g', '-f', '1000'])
+      device_runner.report(
+          'perf_g.data',
+          'perf_g.report',
+          additional_options=['-g', 'callee'] + test.report_options)
+      result = report_analyzer.check_report_file(test, 'perf_g.report', True)
+      print 'call-graph test %s on device %s' % (
+          test.test_name, 'Succeeded' if result else 'Failed')
+      if not result:
+        exit(1)
+
+def main():
+  host = True
+  device = True
+  normal = True
+  callgraph = True
+  selected_tests = None
+  i = 1
+  while i < len(sys.argv):
+    if sys.argv[i] == '--host':
+      host = True
+      device = False
+    elif sys.argv[i] == '--device':
+      host = False
+      device = True
+    elif sys.argv[i] == '--normal':
+      normal = True
+      callgraph = False
+    elif sys.argv[i] == '--callgraph':
+      normal = False
+      callgraph = True
+    elif sys.argv[i] == '--test':
+      if i < len(sys.argv):
+        i += 1
+        for test in sys.argv[i].split(','):
+          if selected_tests is None:
+            selected_tests = {}
+          selected_tests[test] = True
+    i += 1
+  runtest(host, device, normal, callgraph, selected_tests)
+
+if __name__ == '__main__':
+  main()
diff --git a/simpleperf/runtest/two_functions.cpp b/simpleperf/runtest/two_functions.cpp
new file mode 100644
index 0000000..1d3e389
--- /dev/null
+++ b/simpleperf/runtest/two_functions.cpp
@@ -0,0 +1,24 @@
+#include <stdlib.h>
+
+constexpr int LOOP_COUNT = 100000000;
+
+volatile int a[2];
+void Function1() {
+  volatile int* p = a + atoi("0");
+  for (int i = 0; i < LOOP_COUNT; ++i) {
+    *p = i;
+  }
+}
+
+void Function2() {
+  volatile int* p = a + atoi("1");
+  for (int i = 0; i < LOOP_COUNT; ++i) {
+    *p = i;
+  }
+}
+
+int main() {
+  Function1();
+  Function2();
+  return 0;
+}
diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h
new file mode 100644
index 0000000..7091ce0
--- /dev/null
+++ b/simpleperf/sample_tree.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_SAMPLE_TREE_H_
+#define SIMPLE_PERF_SAMPLE_TREE_H_
+
+#include "callchain.h"
+#include "dwarf_unwind.h"
+#include "perf_regs.h"
+#include "record.h"
+#include "SampleComparator.h"
+#include "SampleDisplayer.h"
+#include "thread_tree.h"
+
+// A SampleTree is a collection of samples. A profiling report is mainly about
+// constructing a SampleTree and display it. There are three steps involved:
+// build the tree, sort the tree, and display it. For example, if we want to
+// show how many cpu-cycles are spent in different functions, we should do as
+// follows:
+// 1. Build a SampleTree from SampleRecords with each sample containing
+//    (cpu-cycles, function name). When building the tree, we should merge
+//    samples containing the same function name.
+// 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
+//    samples in a decreasing order of cpu-cycles, we should sort it like this.
+// 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
+//    pair.
+//
+// We represent the three steps with three template classes.
+// 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
+//    SampleTreeBuilder's constructor decides the property of samples should be
+//    merged together.
+// 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
+//    sorted by SampleTreeSorter. The sort result decides the order to show
+//    samples.
+// 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
+//    displays each sample in the SampleTree.
+
+template <typename EntryT, typename AccumulateInfoT>
+class SampleTreeBuilder {
+ public:
+  explicit SampleTreeBuilder(SampleComparator<EntryT> comparator)
+      : sample_set_(comparator),
+        accumulate_callchain_(false),
+        sample_comparator_(comparator),
+        callchain_sample_set_(comparator),
+        use_branch_address_(false),
+        build_callchain_(false),
+        use_caller_as_callchain_root_(false),
+        strict_unwind_arch_check_(false) {}
+
+  virtual ~SampleTreeBuilder() {}
+
+  void SetBranchSampleOption(bool use_branch_address) {
+    use_branch_address_ = use_branch_address;
+  }
+
+  void SetCallChainSampleOptions(bool accumulate_callchain,
+                                 bool build_callchain,
+                                 bool use_caller_as_callchain_root,
+                                 bool strict_unwind_arch_check) {
+    accumulate_callchain_ = accumulate_callchain;
+    build_callchain_ = build_callchain;
+    use_caller_as_callchain_root_ = use_caller_as_callchain_root;
+    strict_unwind_arch_check_ = strict_unwind_arch_check;
+  }
+
+  void ProcessSampleRecord(const SampleRecord& r) {
+    if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
+      for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
+        auto& item = r.branch_stack_data.stack[i];
+        if (item.from != 0 && item.to != 0) {
+          CreateBranchSample(r, item);
+        }
+      }
+      return;
+    }
+    bool in_kernel = r.InKernel();
+    AccumulateInfoT acc_info;
+    EntryT* sample = CreateSample(r, in_kernel, &acc_info);
+    if (sample == nullptr) {
+      return;
+    }
+    if (accumulate_callchain_) {
+      std::vector<uint64_t> ips;
+      if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
+        ips.insert(ips.end(), r.callchain_data.ips,
+                   r.callchain_data.ips + r.callchain_data.ip_nr);
+      }
+      const ThreadEntry* thread = GetThreadOfSample(sample);
+      // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
+      // make up for the missing kernel patch in N9. See b/22612370.
+      if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
+          (r.regs_user_data.reg_mask != 0) &&
+          (r.sample_type & PERF_SAMPLE_STACK_USER) &&
+          (r.GetValidStackSize() > 0)) {
+        RegSet regs =
+            CreateRegSet(r.regs_user_data.reg_mask, r.regs_user_data.regs);
+        ArchType arch = GetArchForAbi(ScopedCurrentArch::GetCurrentArch(),
+                                      r.regs_user_data.abi);
+        std::vector<uint64_t> unwind_ips =
+            UnwindCallChain(arch, *thread, regs, r.stack_user_data.data,
+                            r.GetValidStackSize(), strict_unwind_arch_check_);
+        if (!unwind_ips.empty()) {
+          ips.push_back(PERF_CONTEXT_USER);
+          ips.insert(ips.end(), unwind_ips.begin(), unwind_ips.end());
+        }
+      }
+
+      std::vector<EntryT*> callchain;
+      callchain.push_back(sample);
+
+      bool first_ip = true;
+      for (auto& ip : ips) {
+        if (ip >= PERF_CONTEXT_MAX) {
+          switch (ip) {
+            case PERF_CONTEXT_KERNEL:
+              in_kernel = true;
+              break;
+            case PERF_CONTEXT_USER:
+              in_kernel = false;
+              break;
+            default:
+              LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
+          }
+        } else {
+          if (first_ip) {
+            first_ip = false;
+            // Remove duplication with sampled ip.
+            if (ip == r.ip_data.ip) {
+              continue;
+            }
+          }
+          EntryT* callchain_sample =
+              CreateCallChainSample(sample, ip, in_kernel, callchain, acc_info);
+          if (callchain_sample == nullptr) {
+            break;
+          }
+          callchain.push_back(callchain_sample);
+        }
+      }
+
+      if (build_callchain_) {
+        std::set<EntryT*> added_set;
+        if (use_caller_as_callchain_root_) {
+          std::reverse(callchain.begin(), callchain.end());
+        }
+        while (callchain.size() >= 2) {
+          EntryT* sample = callchain[0];
+          callchain.erase(callchain.begin());
+          // Add only once for recursive calls on callchain.
+          if (added_set.find(sample) != added_set.end()) {
+            continue;
+          }
+          added_set.insert(sample);
+          InsertCallChainForSample(sample, callchain, acc_info);
+        }
+      }
+    }
+  }
+
+  std::vector<EntryT*> GetSamples() const {
+    std::vector<EntryT*> result;
+    for (auto& entry : sample_set_) {
+      result.push_back(entry);
+    }
+    return result;
+  }
+
+ protected:
+  virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
+                               AccumulateInfoT* acc_info) = 0;
+  virtual EntryT* CreateBranchSample(const SampleRecord& r,
+                                     const BranchStackItemType& item) = 0;
+  virtual EntryT* CreateCallChainSample(const EntryT* sample, uint64_t ip,
+                                        bool in_kernel,
+                                        const std::vector<EntryT*>& callchain,
+                                        const AccumulateInfoT& acc_info) = 0;
+  virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
+  virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
+  virtual bool FilterSample(const EntryT*) { return true; }
+
+  virtual void UpdateSummary(const EntryT*) {}
+
+  virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
+
+  EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
+    if (sample == nullptr || !FilterSample(sample.get())) {
+      return nullptr;
+    }
+    UpdateSummary(sample.get());
+    EntryT* result;
+    auto it = sample_set_.find(sample.get());
+    if (it == sample_set_.end()) {
+      result = sample.get();
+      sample_set_.insert(sample.get());
+      sample_storage_.push_back(std::move(sample));
+    } else {
+      result = *it;
+      MergeSample(*it, sample.get());
+    }
+    return result;
+  }
+
+  EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
+                                const std::vector<EntryT*>& callchain) {
+    if (sample == nullptr) {
+      return nullptr;
+    }
+    if (!FilterSample(sample.get())) {
+      // Store in callchain_sample_set_ for use in other EntryT's callchain.
+      auto it = callchain_sample_set_.find(sample.get());
+      if (it != callchain_sample_set_.end()) {
+        return *it;
+      }
+      EntryT* result = sample.get();
+      callchain_sample_set_.insert(sample.get());
+      sample_storage_.push_back(std::move(sample));
+      return result;
+    }
+
+    auto it = sample_set_.find(sample.get());
+    if (it != sample_set_.end()) {
+      EntryT* sample = *it;
+      // Process only once for recursive function call.
+      if (std::find(callchain.begin(), callchain.end(), sample) !=
+          callchain.end()) {
+        return sample;
+      }
+    }
+    return InsertSample(std::move(sample));
+  }
+
+  void InsertCallChainForSample(EntryT* sample,
+                                const std::vector<EntryT*>& callchain,
+                                const AccumulateInfoT& acc_info) {
+    uint64_t period = GetPeriodForCallChain(acc_info);
+    sample->callchain.AddCallChain(
+        callchain, period, [&](const EntryT* s1, const EntryT* s2) {
+          return sample_comparator_.IsSameSample(s1, s2);
+        });
+  }
+
+  std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
+  bool accumulate_callchain_;
+
+ private:
+  const SampleComparator<EntryT> sample_comparator_;
+  // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
+  // and only used in other EntryT's callchain.
+  std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
+  std::vector<std::unique_ptr<EntryT>> sample_storage_;
+
+  bool use_branch_address_;
+  bool build_callchain_;
+  bool use_caller_as_callchain_root_;
+  bool strict_unwind_arch_check_;
+};
+
+template <typename EntryT>
+class SampleTreeSorter {
+ public:
+  explicit SampleTreeSorter(SampleComparator<EntryT> comparator)
+      : comparator_(comparator) {}
+
+  virtual ~SampleTreeSorter() {}
+
+  void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
+    if (sort_callchain) {
+      for (auto& sample : v) {
+        SortCallChain(sample);
+      }
+    }
+    if (!comparator_.empty()) {
+      std::sort(v.begin(), v.end(), [this](const EntryT* s1, const EntryT* s2) {
+        return comparator_(s1, s2);
+      });
+    }
+  }
+
+ protected:
+  void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
+
+ private:
+  SampleComparator<EntryT> comparator_;
+};
+
+template <typename EntryT, typename InfoT>
+class SampleTreeDisplayer {
+ public:
+  explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer)
+      : displayer_(displayer) {}
+
+  virtual ~SampleTreeDisplayer() {}
+
+  void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples,
+                      const InfoT* info) {
+    displayer_.SetInfo(info);
+    for (const auto& sample : samples) {
+      displayer_.AdjustWidth(sample);
+    }
+    displayer_.PrintNames(fp);
+    for (const auto& sample : samples) {
+      displayer_.PrintSample(fp, sample);
+    }
+  }
+
+ private:
+  SampleDisplayer<EntryT, InfoT> displayer_;
+};
+
+#endif  // SIMPLE_PERF_SAMPLE_TREE_H_
diff --git a/simpleperf/sample_tree_test.cpp b/simpleperf/sample_tree_test.cpp
new file mode 100644
index 0000000..e288968
--- /dev/null
+++ b/simpleperf/sample_tree_test.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "sample_tree.h"
+#include "thread_tree.h"
+
+namespace {
+
+struct SampleEntry {
+  int pid;
+  int tid;
+  const char* thread_comm;
+  std::string dso_name;
+  uint64_t map_start_addr;
+  size_t sample_count;
+
+  SampleEntry(int pid, int tid, const char* thread_comm,
+              const std::string& dso_name, uint64_t map_start_addr,
+              size_t sample_count = 1u)
+      : pid(pid),
+        tid(tid),
+        thread_comm(thread_comm),
+        dso_name(dso_name),
+        map_start_addr(map_start_addr),
+        sample_count(sample_count) {}
+};
+
+BUILD_COMPARE_VALUE_FUNCTION(TestComparePid, pid);
+BUILD_COMPARE_VALUE_FUNCTION(TestCompareTid, tid);
+BUILD_COMPARE_STRING_FUNCTION(TestCompareDsoName, dso_name.c_str());
+BUILD_COMPARE_VALUE_FUNCTION(TestCompareMapStartAddr, map_start_addr);
+
+class TestSampleComparator : public SampleComparator<SampleEntry> {
+ public:
+  TestSampleComparator() {
+    AddCompareFunction(TestComparePid);
+    AddCompareFunction(TestCompareTid);
+    AddCompareFunction(CompareComm);
+    AddCompareFunction(TestCompareDsoName);
+    AddCompareFunction(TestCompareMapStartAddr);
+  }
+};
+
+class TestSampleTreeBuilder : public SampleTreeBuilder<SampleEntry, int> {
+ public:
+  explicit TestSampleTreeBuilder(ThreadTree* thread_tree)
+      : SampleTreeBuilder(TestSampleComparator()), thread_tree_(thread_tree) {}
+
+  void AddSample(int pid, int tid, uint64_t ip, bool in_kernel) {
+    const ThreadEntry* thread = thread_tree_->FindThreadOrNew(pid, tid);
+    const MapEntry* map = thread_tree_->FindMap(thread, ip, in_kernel);
+    InsertSample(std::unique_ptr<SampleEntry>(new SampleEntry(
+        pid, tid, thread->comm, map->dso->Path(), map->start_addr)));
+  }
+
+ protected:
+  SampleEntry* CreateSample(const SampleRecord&, bool, int*) override {
+    return nullptr;
+  }
+  SampleEntry* CreateBranchSample(const SampleRecord&,
+                                  const BranchStackItemType&) override {
+    return nullptr;
+  };
+  SampleEntry* CreateCallChainSample(const SampleEntry*, uint64_t, bool,
+                                     const std::vector<SampleEntry*>&,
+                                     const int&) override {
+    return nullptr;
+  }
+  const ThreadEntry* GetThreadOfSample(SampleEntry*) override {
+    return nullptr;
+  }
+  uint64_t GetPeriodForCallChain(const int&) override { return 0; }
+  void MergeSample(SampleEntry* sample1, SampleEntry* sample2) override {
+    sample1->sample_count += sample2->sample_count;
+  }
+
+ private:
+  ThreadTree* thread_tree_;
+};
+
+static void SampleMatchExpectation(const SampleEntry& sample,
+                                   const SampleEntry& expected,
+                                   bool* has_error) {
+  *has_error = true;
+  ASSERT_EQ(expected.pid, sample.pid);
+  ASSERT_EQ(expected.tid, sample.tid);
+  ASSERT_STREQ(expected.thread_comm, sample.thread_comm);
+  ASSERT_EQ(expected.dso_name, sample.dso_name);
+  ASSERT_EQ(expected.map_start_addr, sample.map_start_addr);
+  ASSERT_EQ(expected.sample_count, sample.sample_count);
+  *has_error = false;
+}
+
+static void CheckSamples(const std::vector<SampleEntry*>& samples,
+                         const std::vector<SampleEntry>& expected_samples) {
+  ASSERT_EQ(samples.size(), expected_samples.size());
+  for (size_t i = 0; i < samples.size(); ++i) {
+    bool has_error;
+    SampleMatchExpectation(*samples[i], expected_samples[i], &has_error);
+    ASSERT_FALSE(has_error) << "Error matching sample at pos " << i;
+  }
+}
+}
+
+class SampleTreeTest : public testing::Test {
+ protected:
+  virtual void SetUp() {
+    thread_tree.AddThread(1, 1, "p1t1");
+    thread_tree.AddThread(1, 11, "p1t11");
+    thread_tree.AddThread(2, 2, "p2t2");
+    thread_tree.AddThreadMap(1, 1, 1, 5, 0, 0, "process1_thread1");
+    thread_tree.AddThreadMap(1, 1, 6, 5, 0, 0, "process1_thread1_map2");
+    thread_tree.AddThreadMap(1, 11, 1, 10, 0, 0, "process1_thread11");
+    thread_tree.AddThreadMap(2, 2, 1, 20, 0, 0, "process2_thread2");
+    thread_tree.AddKernelMap(10, 20, 0, 0, "kernel");
+    sample_tree_builder.reset(new TestSampleTreeBuilder(&thread_tree));
+  }
+
+  void CheckSamples(const std::vector<SampleEntry>& expected_samples) {
+    ::CheckSamples(sample_tree_builder->GetSamples(), expected_samples);
+  }
+
+  ThreadTree thread_tree;
+  std::unique_ptr<TestSampleTreeBuilder> sample_tree_builder;
+};
+
+TEST_F(SampleTreeTest, ip_in_map) {
+  sample_tree_builder->AddSample(1, 1, 1, false);
+  sample_tree_builder->AddSample(1, 1, 2, false);
+  sample_tree_builder->AddSample(1, 1, 5, false);
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "p1t1", "process1_thread1", 1, 3),
+  };
+  CheckSamples(expected_samples);
+}
+
+TEST_F(SampleTreeTest, different_pid) {
+  sample_tree_builder->AddSample(1, 1, 1, false);
+  sample_tree_builder->AddSample(2, 2, 1, false);
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "p1t1", "process1_thread1", 1, 1),
+      SampleEntry(2, 2, "p2t2", "process2_thread2", 1, 1),
+  };
+  CheckSamples(expected_samples);
+}
+
+TEST_F(SampleTreeTest, different_tid) {
+  sample_tree_builder->AddSample(1, 1, 1, false);
+  sample_tree_builder->AddSample(1, 11, 1, false);
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "p1t1", "process1_thread1", 1, 1),
+      SampleEntry(1, 11, "p1t11", "process1_thread11", 1, 1),
+  };
+  CheckSamples(expected_samples);
+}
+
+TEST_F(SampleTreeTest, different_comm) {
+  sample_tree_builder->AddSample(1, 1, 1, false);
+  thread_tree.AddThread(1, 1, "p1t1_comm2");
+  sample_tree_builder->AddSample(1, 1, 1, false);
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "p1t1", "process1_thread1", 1, 1),
+      SampleEntry(1, 1, "p1t1_comm2", "process1_thread1", 1, 1),
+  };
+  CheckSamples(expected_samples);
+}
+
+TEST_F(SampleTreeTest, different_map) {
+  sample_tree_builder->AddSample(1, 1, 1, false);
+  sample_tree_builder->AddSample(1, 1, 6, false);
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "p1t1", "process1_thread1", 1, 1),
+      SampleEntry(1, 1, "p1t1", "process1_thread1_map2", 6, 1),
+  };
+  CheckSamples(expected_samples);
+}
+
+TEST_F(SampleTreeTest, unmapped_sample) {
+  sample_tree_builder->AddSample(1, 1, 0, false);
+  sample_tree_builder->AddSample(1, 1, 31, false);
+  sample_tree_builder->AddSample(1, 1, 70, false);
+  // Match the unknown map.
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "p1t1", "unknown", 0, 3),
+  };
+  CheckSamples(expected_samples);
+}
+
+TEST_F(SampleTreeTest, map_kernel) {
+  sample_tree_builder->AddSample(1, 1, 10, true);
+  sample_tree_builder->AddSample(1, 1, 10, false);
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "p1t1", "kernel", 10, 1),
+      SampleEntry(1, 1, "p1t1", "process1_thread1_map2", 6, 1),
+  };
+  CheckSamples(expected_samples);
+}
+
+TEST(sample_tree, overlapped_map) {
+  ThreadTree thread_tree;
+  TestSampleTreeBuilder sample_tree_builder(&thread_tree);
+  thread_tree.AddThread(1, 1, "thread1");
+  thread_tree.AddThreadMap(1, 1, 1, 10, 0, 0, "map1");  // Add map 1.
+  sample_tree_builder.AddSample(1, 1, 5, false);        // Hit map 1.
+  thread_tree.AddThreadMap(1, 1, 5, 20, 0, 0, "map2");  // Add map 2.
+  sample_tree_builder.AddSample(1, 1, 6, false);        // Hit map 2.
+  sample_tree_builder.AddSample(1, 1, 4, false);        // Hit map 1.
+  thread_tree.AddThreadMap(1, 1, 2, 7, 0, 0, "map3");   // Add map 3.
+  sample_tree_builder.AddSample(1, 1, 7, false);        // Hit map 3.
+  sample_tree_builder.AddSample(1, 1, 10, false);       // Hit map 2.
+
+  std::vector<SampleEntry> expected_samples = {
+      SampleEntry(1, 1, "thread1", "map1", 1, 2),
+      SampleEntry(1, 1, "thread1", "map2", 5, 1),
+      SampleEntry(1, 1, "thread1", "map2", 9, 1),
+      SampleEntry(1, 1, "thread1", "map3", 2, 1),
+  };
+  CheckSamples(sample_tree_builder.GetSamples(), expected_samples);
+}
diff --git a/simpleperf/simpleperf_report.py b/simpleperf/simpleperf_report.py
new file mode 100644
index 0000000..225998a
--- /dev/null
+++ b/simpleperf/simpleperf_report.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Simpleperf gui reporter: provide gui interface for simpleperf report command.
+
+There are two ways to use gui reporter. One way is to pass it a report file
+generated by simpleperf report command, and reporter will display it. The
+other ways is to pass it any arguments you want to use when calling
+simpleperf report command. The reporter will call `simpleperf report` to
+generate report file, and display it.
+"""
+
+import os.path
+import re
+import subprocess
+import sys
+from tkFont import *
+from Tkinter import *
+from ttk import *
+
+PAD_X = 3
+PAD_Y = 3
+
+
+class CallTreeNode(object):
+
+  """Representing a node in call-graph."""
+
+  def __init__(self, percentage, function_name):
+    self.percentage = percentage
+    self.call_stack = [function_name]
+    self.children = []
+
+  def add_call(self, function_name):
+    self.call_stack.append(function_name)
+
+  def add_child(self, node):
+    self.children.append(node)
+
+  def __str__(self):
+    strs = self.dump()
+    return '\n'.join(strs)
+
+  def dump(self):
+    strs = []
+    strs.append('CallTreeNode percentage = %.2f' % self.percentage)
+    for function_name in self.call_stack:
+      strs.append(' %s' % function_name)
+    for child in self.children:
+      child_strs = child.dump()
+      strs.extend(['  ' + x for x in child_strs])
+    return strs
+
+
+class ReportItem(object):
+
+  """Representing one item in report, may contain a CallTree."""
+
+  def __init__(self, raw_line):
+    self.raw_line = raw_line
+    self.call_tree = None
+
+  def __str__(self):
+    strs = []
+    strs.append('ReportItem (raw_line %s)' % self.raw_line)
+    if self.call_tree is not None:
+      strs.append('%s' % self.call_tree)
+    return '\n'.join(strs)
+
+
+def parse_report_items(lines):
+  report_items = []
+  cur_report_item = None
+  call_tree_stack = {}
+  vertical_columns = []
+  last_node = None
+
+  for line in lines:
+    if not line:
+      continue
+    if not line[0].isspace():
+      cur_report_item = ReportItem(line)
+      report_items.append(cur_report_item)
+      # Each report item can have different column depths.
+      vertical_columns = []
+    else:
+      for i in range(len(line)):
+        if line[i] == '|':
+          if not vertical_columns or vertical_columns[-1] < i:
+            vertical_columns.append(i)
+
+      if not line.strip('| \t'):
+        continue
+      if line.find('-') == -1:
+        line = line.strip('| \t')
+        function_name = line
+        last_node.add_call(function_name)
+      else:
+        pos = line.find('-')
+        depth = -1
+        for i in range(len(vertical_columns)):
+          if pos >= vertical_columns[i]:
+            depth = i
+        assert depth != -1
+
+        line = line.strip('|- \t')
+        m = re.search(r'^([\d\.]+)%[-\s]+(.+)$', line)
+        if m:
+          percentage = float(m.group(1))
+          function_name = m.group(2)
+        else:
+          percentage = 100.0
+          function_name = line
+
+        node = CallTreeNode(percentage, function_name)
+        if depth == 0:
+          cur_report_item.call_tree = node
+        else:
+          call_tree_stack[depth - 1].add_child(node)
+        call_tree_stack[depth] = node
+        last_node = node
+
+  return report_items
+
+
+class ReportWindow(object):
+
+  """A window used to display report file."""
+
+  def __init__(self, master, report_context, title_line, report_items):
+    frame = Frame(master)
+    frame.pack(fill=BOTH, expand=1)
+
+    font = Font(family='courier', size=10)
+
+    # Report Context
+    for line in report_context:
+      label = Label(frame, text=line, font=font)
+      label.pack(anchor=W, padx=PAD_X, pady=PAD_Y)
+
+    # Space
+    label = Label(frame, text='', font=font)
+    label.pack(anchor=W, padx=PAD_X, pady=PAD_Y)
+
+    # Title
+    label = Label(frame, text='  ' + title_line, font=font)
+    label.pack(anchor=W, padx=PAD_X, pady=PAD_Y)
+
+    # Report Items
+    report_frame = Frame(frame)
+    report_frame.pack(fill=BOTH, expand=1)
+
+    yscrollbar = Scrollbar(report_frame)
+    yscrollbar.pack(side=RIGHT, fill=Y)
+    xscrollbar = Scrollbar(report_frame, orient=HORIZONTAL)
+    xscrollbar.pack(side=BOTTOM, fill=X)
+
+    tree = Treeview(report_frame, columns=[title_line], show='')
+    tree.pack(side=LEFT, fill=BOTH, expand=1)
+    tree.tag_configure('set_font', font=font)
+
+    tree.config(yscrollcommand=yscrollbar.set)
+    yscrollbar.config(command=tree.yview)
+    tree.config(xscrollcommand=xscrollbar.set)
+    xscrollbar.config(command=tree.xview)
+
+    self.display_report_items(tree, report_items)
+
+  def display_report_items(self, tree, report_items):
+    for report_item in report_items:
+      prefix_str = '+ ' if report_item.call_tree is not None else '  '
+      id = tree.insert(
+          '',
+          'end',
+          None,
+          values=[
+              prefix_str +
+              report_item.raw_line],
+          tag='set_font')
+      if report_item.call_tree is not None:
+        self.display_call_tree(tree, id, report_item.call_tree, 1)
+
+  def display_call_tree(self, tree, parent_id, node, indent):
+    id = parent_id
+    indent_str = '  ' * indent
+
+    if node.percentage != 100.0:
+      percentage_str = '%.2f%%' % node.percentage
+    else:
+      percentage_str = ''
+    first_open = True if node.percentage == 100.0 else False
+
+    for i in range(len(node.call_stack)):
+      s = indent_str
+      s += '+ ' if node.children else '  '
+      s += percentage_str if i == 0 else ' ' * len(percentage_str)
+      s += node.call_stack[i]
+      child_open = first_open if i == 0 else True
+      id = tree.insert(id, 'end', None, values=[s], open=child_open,
+                       tag='set_font')
+
+    for child in node.children:
+      self.display_call_tree(tree, id, child, indent + 1)
+
+
+def display_report_file(report_file):
+  fh = open(report_file, 'r')
+  lines = fh.readlines()
+  fh.close()
+
+  lines = [x.rstrip() for x in lines]
+
+  blank_line_index = -1
+  for i in range(len(lines)):
+    if not lines[i]:
+      blank_line_index = i
+      break
+  assert blank_line_index != -1
+  assert blank_line_index + 1 < len(lines)
+
+  report_context = lines[:blank_line_index]
+  title_line = lines[blank_line_index + 1]
+  report_items = parse_report_items(lines[blank_line_index + 2:])
+
+  root = Tk()
+  ReportWindow(root, report_context, title_line, report_items)
+  root.mainloop()
+
+
+def call_simpleperf_report(args, report_file):
+  output_fh = open(report_file, 'w')
+  args = ['simpleperf', 'report'] + args
+  subprocess.check_call(args, stdout=output_fh)
+  output_fh.close()
+
+
+def main():
+  if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
+    display_report_file(sys.argv[1])
+  else:
+    call_simpleperf_report(sys.argv[1:], 'perf.report')
+    display_report_file('perf.report')
+
+
+if __name__ == '__main__':
+  main()
diff --git a/simpleperf/test_util.h b/simpleperf/test_util.h
new file mode 100644
index 0000000..15d11cb
--- /dev/null
+++ b/simpleperf/test_util.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "read_elf.h"
+#include "workload.h"
+
+static const std::string SLEEP_SEC = "0.001";
+
+void CreateProcesses(size_t count, std::vector<std::unique_ptr<Workload>>* workloads);
+
+void ParseSymbol(const ElfFileSymbol& symbol, std::map<std::string, ElfFileSymbol>* symbols);
+void CheckElfFileSymbols(const std::map<std::string, ElfFileSymbol>& symbols);
+
+bool IsRoot();
+
+#define TEST_IN_ROOT(TestStatement)                                                                \
+  do {                                                                                             \
+    if (IsRoot()) {                                                                                \
+      TestStatement;                                                                               \
+    } else {                                                                                       \
+      GTEST_LOG_(INFO) << "Didn't test \"" << #TestStatement << "\" requires root privileges";     \
+    }                                                                                              \
+  } while (0)
diff --git a/simpleperf/testdata/data/app/com.example.hellojni-1/base.apk b/simpleperf/testdata/data/app/com.example.hellojni-1/base.apk
new file mode 100644
index 0000000..95ea93a
--- /dev/null
+++ b/simpleperf/testdata/data/app/com.example.hellojni-1/base.apk
Binary files differ
diff --git a/simpleperf/testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check b/simpleperf/testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check
new file mode 100755
index 0000000..5c1a9dd
--- /dev/null
+++ b/simpleperf/testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check
Binary files differ
diff --git a/simpleperf/testdata/data/symfs_for_no_symbol_table_warning/elf b/simpleperf/testdata/data/symfs_for_no_symbol_table_warning/elf
new file mode 100644
index 0000000..a92e41f
--- /dev/null
+++ b/simpleperf/testdata/data/symfs_for_no_symbol_table_warning/elf
Binary files differ
diff --git a/simpleperf/testdata/data/symfs_for_read_elf_file_warning/elf b/simpleperf/testdata/data/symfs_for_read_elf_file_warning/elf
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/simpleperf/testdata/data/symfs_for_read_elf_file_warning/elf
diff --git a/simpleperf/testdata/data/wrong_symfs_for_build_id_check/elf_for_build_id_check b/simpleperf/testdata/data/wrong_symfs_for_build_id_check/elf_for_build_id_check
new file mode 100755
index 0000000..0489a22
--- /dev/null
+++ b/simpleperf/testdata/data/wrong_symfs_for_build_id_check/elf_for_build_id_check
Binary files differ
diff --git a/simpleperf/testdata/elf b/simpleperf/testdata/elf
new file mode 100644
index 0000000..f63c25c
--- /dev/null
+++ b/simpleperf/testdata/elf
Binary files differ
diff --git a/simpleperf/testdata/elf_file_source.cpp b/simpleperf/testdata/elf_file_source.cpp
new file mode 100644
index 0000000..3cfd00b
--- /dev/null
+++ b/simpleperf/testdata/elf_file_source.cpp
@@ -0,0 +1,20 @@
+#include <pthread.h>
+
+volatile int GlobalVar;
+
+extern "C" void CalledFunc() {
+  GlobalVar++;
+}
+
+extern "C" void GlobalFunc() {
+  for (int i = 0; i < 1000000; ++i) {
+    CalledFunc();
+  }
+}
+
+int main() {
+  while (true) {
+    GlobalFunc();
+  }
+  return 0;
+}
diff --git a/simpleperf/testdata/elf_with_mini_debug_info b/simpleperf/testdata/elf_with_mini_debug_info
new file mode 100644
index 0000000..b3aa967
--- /dev/null
+++ b/simpleperf/testdata/elf_with_mini_debug_info
Binary files differ
diff --git a/simpleperf/testdata/has_embedded_native_libs_apk_perf.data b/simpleperf/testdata/has_embedded_native_libs_apk_perf.data
new file mode 100644
index 0000000..fafbbbc
--- /dev/null
+++ b/simpleperf/testdata/has_embedded_native_libs_apk_perf.data
Binary files differ
diff --git a/simpleperf/testdata/perf.data b/simpleperf/testdata/perf.data
new file mode 100644
index 0000000..64a59da
--- /dev/null
+++ b/simpleperf/testdata/perf.data
Binary files differ
diff --git a/simpleperf/testdata/perf_b.data b/simpleperf/testdata/perf_b.data
new file mode 100644
index 0000000..e514944
--- /dev/null
+++ b/simpleperf/testdata/perf_b.data
Binary files differ
diff --git a/simpleperf/testdata/perf_for_build_id_check.data b/simpleperf/testdata/perf_for_build_id_check.data
new file mode 100644
index 0000000..1012d4b
--- /dev/null
+++ b/simpleperf/testdata/perf_for_build_id_check.data
Binary files differ
diff --git a/simpleperf/testdata/perf_g_fp.data b/simpleperf/testdata/perf_g_fp.data
new file mode 100644
index 0000000..de9cf53
--- /dev/null
+++ b/simpleperf/testdata/perf_g_fp.data
Binary files differ
diff --git a/simpleperf/testdata/perf_with_kernel_symbol.data b/simpleperf/testdata/perf_with_kernel_symbol.data
new file mode 100644
index 0000000..8b1fda1
--- /dev/null
+++ b/simpleperf/testdata/perf_with_kernel_symbol.data
Binary files differ
diff --git a/simpleperf/testdata/perf_with_kmem_slab_callgraph.data b/simpleperf/testdata/perf_with_kmem_slab_callgraph.data
new file mode 100644
index 0000000..cdb691f
--- /dev/null
+++ b/simpleperf/testdata/perf_with_kmem_slab_callgraph.data
Binary files differ
diff --git a/simpleperf/testdata/perf_with_mini_debug_info.data b/simpleperf/testdata/perf_with_mini_debug_info.data
new file mode 100644
index 0000000..0b02b3b
--- /dev/null
+++ b/simpleperf/testdata/perf_with_mini_debug_info.data
Binary files differ
diff --git a/simpleperf/testdata/perf_with_multiple_pids_and_tids.data b/simpleperf/testdata/perf_with_multiple_pids_and_tids.data
new file mode 100644
index 0000000..ef4f0d4
--- /dev/null
+++ b/simpleperf/testdata/perf_with_multiple_pids_and_tids.data
Binary files differ
diff --git a/simpleperf/testdata/perf_with_symbols.data b/simpleperf/testdata/perf_with_symbols.data
new file mode 100644
index 0000000..8571a96
--- /dev/null
+++ b/simpleperf/testdata/perf_with_symbols.data
Binary files differ
diff --git a/simpleperf/testdata/perf_with_symbols_for_nonzero_minvaddr_dso.data b/simpleperf/testdata/perf_with_symbols_for_nonzero_minvaddr_dso.data
new file mode 100644
index 0000000..cf45d3a
--- /dev/null
+++ b/simpleperf/testdata/perf_with_symbols_for_nonzero_minvaddr_dso.data
Binary files differ
diff --git a/simpleperf/testdata/perf_with_two_event_types.data b/simpleperf/testdata/perf_with_two_event_types.data
new file mode 100644
index 0000000..ba9a606
--- /dev/null
+++ b/simpleperf/testdata/perf_with_two_event_types.data
Binary files differ
diff --git a/simpleperf/thread_tree.cpp b/simpleperf/thread_tree.cpp
new file mode 100644
index 0000000..981a5dd
--- /dev/null
+++ b/simpleperf/thread_tree.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "thread_tree.h"
+
+#include <inttypes.h>
+
+#include <limits>
+
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+
+#include "environment.h"
+#include "perf_event.h"
+#include "record.h"
+
+namespace simpleperf {
+
+bool MapComparator::operator()(const MapEntry* map1,
+                               const MapEntry* map2) const {
+  if (map1->start_addr != map2->start_addr) {
+    return map1->start_addr < map2->start_addr;
+  }
+  // Compare map->len instead of map->get_end_addr() here. Because we set map's
+  // len to std::numeric_limits<uint64_t>::max() in FindMapByAddr(), which makes
+  // map->get_end_addr() overflow.
+  if (map1->len != map2->len) {
+    return map1->len < map2->len;
+  }
+  if (map1->time != map2->time) {
+    return map1->time < map2->time;
+  }
+  return false;
+}
+
+void ThreadTree::AddThread(int pid, int tid, const std::string& comm) {
+  auto it = thread_tree_.find(tid);
+  if (it == thread_tree_.end()) {
+    ThreadEntry* thread = new ThreadEntry{
+        pid, tid,
+        "unknown",                             // comm
+        std::set<MapEntry*, MapComparator>(),  // maps
+    };
+    auto pair = thread_tree_.insert(
+        std::make_pair(tid, std::unique_ptr<ThreadEntry>(thread)));
+    CHECK(pair.second);
+    it = pair.first;
+  }
+  thread_comm_storage_.push_back(
+      std::unique_ptr<std::string>(new std::string(comm)));
+  it->second->comm = thread_comm_storage_.back()->c_str();
+}
+
+void ThreadTree::ForkThread(int pid, int tid, int ppid, int ptid) {
+  ThreadEntry* parent = FindThreadOrNew(ppid, ptid);
+  ThreadEntry* child = FindThreadOrNew(pid, tid);
+  child->comm = parent->comm;
+  child->maps = parent->maps;
+}
+
+ThreadEntry* ThreadTree::FindThreadOrNew(int pid, int tid) {
+  auto it = thread_tree_.find(tid);
+  if (it == thread_tree_.end()) {
+    AddThread(pid, tid, "unknown");
+    it = thread_tree_.find(tid);
+  } else {
+    if (pid != it->second.get()->pid) {
+      // TODO: b/22185053.
+      LOG(DEBUG) << "unexpected (pid, tid) pair: expected ("
+                 << it->second.get()->pid << ", " << tid << "), actual (" << pid
+                 << ", " << tid << ")";
+    }
+  }
+  return it->second.get();
+}
+
+void ThreadTree::AddKernelMap(uint64_t start_addr, uint64_t len, uint64_t pgoff,
+                              uint64_t time, const std::string& filename) {
+  // kernel map len can be 0 when record command is not run in supervisor mode.
+  if (len == 0) {
+    return;
+  }
+  Dso* dso = FindKernelDsoOrNew(filename);
+  MapEntry* map =
+      AllocateMap(MapEntry(start_addr, len, pgoff, time, dso, true));
+  FixOverlappedMap(&kernel_map_tree_, map);
+  auto pair = kernel_map_tree_.insert(map);
+  CHECK(pair.second);
+}
+
+Dso* ThreadTree::FindKernelDsoOrNew(const std::string& filename) {
+  if (filename == DEFAULT_KERNEL_MMAP_NAME) {
+    return kernel_dso_.get();
+  }
+  auto it = module_dso_tree_.find(filename);
+  if (it == module_dso_tree_.end()) {
+    module_dso_tree_[filename] = Dso::CreateDso(DSO_KERNEL_MODULE, filename);
+    it = module_dso_tree_.find(filename);
+  }
+  return it->second.get();
+}
+
+void ThreadTree::AddThreadMap(int pid, int tid, uint64_t start_addr,
+                              uint64_t len, uint64_t pgoff, uint64_t time,
+                              const std::string& filename) {
+  ThreadEntry* thread = FindThreadOrNew(pid, tid);
+  Dso* dso = FindUserDsoOrNew(filename);
+  MapEntry* map =
+      AllocateMap(MapEntry(start_addr, len, pgoff, time, dso, false));
+  FixOverlappedMap(&thread->maps, map);
+  auto pair = thread->maps.insert(map);
+  CHECK(pair.second);
+}
+
+Dso* ThreadTree::FindUserDsoOrNew(const std::string& filename) {
+  auto it = user_dso_tree_.find(filename);
+  if (it == user_dso_tree_.end()) {
+    user_dso_tree_[filename] = Dso::CreateDso(DSO_ELF_FILE, filename);
+    it = user_dso_tree_.find(filename);
+  }
+  return it->second.get();
+}
+
+MapEntry* ThreadTree::AllocateMap(const MapEntry& value) {
+  MapEntry* map = new MapEntry(value);
+  map_storage_.push_back(std::unique_ptr<MapEntry>(map));
+  return map;
+}
+
+void ThreadTree::FixOverlappedMap(std::set<MapEntry*, MapComparator>* map_set,
+                                  const MapEntry* map) {
+  for (auto it = map_set->begin(); it != map_set->end();) {
+    if ((*it)->start_addr >= map->get_end_addr()) {
+      // No more overlapped maps.
+      break;
+    }
+    if ((*it)->get_end_addr() <= map->start_addr) {
+      ++it;
+    } else {
+      MapEntry* old = *it;
+      if (old->start_addr < map->start_addr) {
+        MapEntry* before = AllocateMap(
+            MapEntry(old->start_addr, map->start_addr - old->start_addr,
+                     old->pgoff, old->time, old->dso, old->in_kernel));
+        map_set->insert(before);
+      }
+      if (old->get_end_addr() > map->get_end_addr()) {
+        MapEntry* after = AllocateMap(MapEntry(
+            map->get_end_addr(), old->get_end_addr() - map->get_end_addr(),
+            map->get_end_addr() - old->start_addr + old->pgoff, old->time,
+            old->dso, old->in_kernel));
+        map_set->insert(after);
+      }
+
+      it = map_set->erase(it);
+    }
+  }
+}
+
+static bool IsAddrInMap(uint64_t addr, const MapEntry* map) {
+  return (addr >= map->start_addr && addr < map->get_end_addr());
+}
+
+static MapEntry* FindMapByAddr(const std::set<MapEntry*, MapComparator>& maps,
+                               uint64_t addr) {
+  // Construct a map_entry which is strictly after the searched map_entry, based
+  // on MapComparator.
+  MapEntry find_map(addr, std::numeric_limits<uint64_t>::max(), 0,
+                    std::numeric_limits<uint64_t>::max(), nullptr, false);
+  auto it = maps.upper_bound(&find_map);
+  if (it != maps.begin() && IsAddrInMap(addr, *--it)) {
+    return *it;
+  }
+  return nullptr;
+}
+
+const MapEntry* ThreadTree::FindMap(const ThreadEntry* thread, uint64_t ip,
+                                    bool in_kernel) {
+  MapEntry* result = nullptr;
+  if (!in_kernel) {
+    result = FindMapByAddr(thread->maps, ip);
+  } else {
+    result = FindMapByAddr(kernel_map_tree_, ip);
+  }
+  return result != nullptr ? result : &unknown_map_;
+}
+
+const MapEntry* ThreadTree::FindMap(const ThreadEntry* thread, uint64_t ip) {
+  MapEntry* result = FindMapByAddr(thread->maps, ip);
+  if (result != nullptr) {
+    return result;
+  }
+  result = FindMapByAddr(kernel_map_tree_, ip);
+  return result != nullptr ? result : &unknown_map_;
+}
+
+const Symbol* ThreadTree::FindSymbol(const MapEntry* map, uint64_t ip,
+                                     uint64_t* pvaddr_in_file) {
+  uint64_t vaddr_in_file;
+  Dso* dso = map->dso;
+  if (dso == kernel_dso_.get()) {
+    vaddr_in_file = ip;
+  } else {
+    vaddr_in_file = ip - map->start_addr + map->dso->MinVirtualAddress();
+  }
+  const Symbol* symbol = dso->FindSymbol(vaddr_in_file);
+  if (symbol == nullptr && map->in_kernel && dso != kernel_dso_.get()) {
+    // It is in a kernel module, but we can't find the kernel module file, or
+    // the kernel module file contains no symbol. Try finding the symbol in
+    // /proc/kallsyms.
+    vaddr_in_file = ip;
+    dso = kernel_dso_.get();
+    symbol = dso->FindSymbol(vaddr_in_file);
+  }
+  if (symbol == nullptr) {
+    if (show_ip_for_unknown_symbol_) {
+      std::string name = android::base::StringPrintf(
+          "%s%s[+%" PRIx64 "]", (show_mark_for_unknown_symbol_ ? "*" : ""),
+          dso->FileName().c_str(), vaddr_in_file);
+      dso->InsertSymbol(Symbol(name, vaddr_in_file, 1));
+      symbol = dso->FindSymbol(vaddr_in_file);
+      CHECK(symbol != nullptr);
+    } else {
+      symbol = &unknown_symbol_;
+    }
+  }
+  if (pvaddr_in_file != nullptr) {
+    *pvaddr_in_file = vaddr_in_file;
+  }
+  return symbol;
+}
+
+const Symbol* ThreadTree::FindKernelSymbol(uint64_t ip) {
+  const MapEntry* map = FindMap(nullptr, ip, true);
+  return FindSymbol(map, ip, nullptr);
+}
+
+void ThreadTree::ClearThreadAndMap() {
+  thread_tree_.clear();
+  thread_comm_storage_.clear();
+  kernel_map_tree_.clear();
+  map_storage_.clear();
+}
+
+void ThreadTree::Update(const Record& record) {
+  if (record.type() == PERF_RECORD_MMAP) {
+    const MmapRecord& r = *static_cast<const MmapRecord*>(&record);
+    if (r.InKernel()) {
+      AddKernelMap(r.data->addr, r.data->len, r.data->pgoff,
+                   r.sample_id.time_data.time, r.filename);
+    } else {
+      AddThreadMap(r.data->pid, r.data->tid, r.data->addr, r.data->len,
+                   r.data->pgoff, r.sample_id.time_data.time, r.filename);
+    }
+  } else if (record.type() == PERF_RECORD_MMAP2) {
+    const Mmap2Record& r = *static_cast<const Mmap2Record*>(&record);
+    if (r.InKernel()) {
+      AddKernelMap(r.data->addr, r.data->len, r.data->pgoff,
+                   r.sample_id.time_data.time, r.filename);
+    } else {
+      std::string filename = (r.filename == DEFAULT_EXECNAME_FOR_THREAD_MMAP)
+                                 ? "[unknown]"
+                                 : r.filename;
+      AddThreadMap(r.data->pid, r.data->tid, r.data->addr, r.data->len,
+                   r.data->pgoff, r.sample_id.time_data.time, filename);
+    }
+  } else if (record.type() == PERF_RECORD_COMM) {
+    const CommRecord& r = *static_cast<const CommRecord*>(&record);
+    AddThread(r.data->pid, r.data->tid, r.comm);
+  } else if (record.type() == PERF_RECORD_FORK) {
+    const ForkRecord& r = *static_cast<const ForkRecord*>(&record);
+    ForkThread(r.data->pid, r.data->tid, r.data->ppid, r.data->ptid);
+  } else if (record.type() == SIMPLE_PERF_RECORD_KERNEL_SYMBOL) {
+    const auto& r = *static_cast<const KernelSymbolRecord*>(&record);
+    Dso::SetKallsyms(std::move(r.kallsyms));
+  } else if (record.type() == SIMPLE_PERF_RECORD_DSO) {
+    auto& r = *static_cast<const DsoRecord*>(&record);
+    Dso* dso = nullptr;
+    if (r.dso_type == DSO_KERNEL || r.dso_type == DSO_KERNEL_MODULE) {
+      dso = FindKernelDsoOrNew(r.dso_name);
+    } else {
+      dso = FindUserDsoOrNew(r.dso_name);
+    }
+    dso->SetMinVirtualAddress(r.min_vaddr);
+    dso_id_to_dso_map_[r.dso_id] = dso;
+  } else if (record.type() == SIMPLE_PERF_RECORD_SYMBOL) {
+    auto& r = *static_cast<const SymbolRecord*>(&record);
+    Dso* dso = dso_id_to_dso_map_[r.dso_id];
+    CHECK(dso != nullptr);
+    dso->InsertSymbol(Symbol(r.name, r.addr, r.len));
+  }
+}
+
+}  // namespace simpleperf
diff --git a/simpleperf/thread_tree.h b/simpleperf/thread_tree.h
new file mode 100644
index 0000000..5498df5
--- /dev/null
+++ b/simpleperf/thread_tree.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_THREAD_TREE_H_
+#define SIMPLE_PERF_THREAD_TREE_H_
+
+#include <stdint.h>
+
+#include <limits>
+#include <memory>
+#include <set>
+
+#include "dso.h"
+#include "environment.h"
+
+struct Record;
+
+namespace simpleperf {
+
+struct MapEntry {
+  uint64_t start_addr;
+  uint64_t len;
+  uint64_t pgoff;
+  uint64_t time;  // Map creation time.
+  Dso* dso;
+  bool in_kernel;
+
+  MapEntry(uint64_t start_addr, uint64_t len, uint64_t pgoff, uint64_t time,
+           Dso* dso, bool in_kernel)
+      : start_addr(start_addr),
+        len(len),
+        pgoff(pgoff),
+        time(time),
+        dso(dso),
+        in_kernel(in_kernel) {}
+  MapEntry() {}
+
+  uint64_t get_end_addr() const { return start_addr + len; }
+};
+
+struct MapComparator {
+  bool operator()(const MapEntry* map1, const MapEntry* map2) const;
+};
+
+struct ThreadEntry {
+  int pid;
+  int tid;
+  const char* comm;  // It always refers to the latest comm.
+  std::set<MapEntry*, MapComparator> maps;
+};
+
+// ThreadTree contains thread information (in ThreadEntry) and mmap information
+// (in MapEntry) of the monitored threads. It also has interface to access
+// symbols in executable binaries mapped in the monitored threads.
+class ThreadTree {
+ public:
+  ThreadTree()
+      : show_ip_for_unknown_symbol_(false),
+        show_mark_for_unknown_symbol_(false),
+        unknown_symbol_("unknown", 0,
+                        std::numeric_limits<unsigned long long>::max()) {
+    unknown_dso_ = Dso::CreateDso(DSO_ELF_FILE, "unknown");
+    unknown_map_ = MapEntry(0, std::numeric_limits<unsigned long long>::max(),
+                            0, 0, unknown_dso_.get(), false);
+    kernel_dso_ = Dso::CreateDso(DSO_KERNEL, DEFAULT_KERNEL_MMAP_NAME);
+  }
+
+  void AddThread(int pid, int tid, const std::string& comm);
+  void ForkThread(int pid, int tid, int ppid, int ptid);
+  ThreadEntry* FindThreadOrNew(int pid, int tid);
+  void AddKernelMap(uint64_t start_addr, uint64_t len, uint64_t pgoff,
+                    uint64_t time, const std::string& filename);
+  void AddThreadMap(int pid, int tid, uint64_t start_addr, uint64_t len,
+                    uint64_t pgoff, uint64_t time, const std::string& filename);
+  const MapEntry* FindMap(const ThreadEntry* thread, uint64_t ip,
+                          bool in_kernel);
+  // Find map for an ip address when we don't know whether it is in kernel.
+  const MapEntry* FindMap(const ThreadEntry* thread, uint64_t ip);
+  const Symbol* FindSymbol(const MapEntry* map, uint64_t ip,
+                           uint64_t* pvaddr_in_file);
+  const Symbol* FindKernelSymbol(uint64_t ip);
+  const Symbol* UnknownSymbol() const { return &unknown_symbol_; }
+
+  void ShowIpForUnknownSymbol() { show_ip_for_unknown_symbol_ = true; }
+  void ShowMarkForUnknownSymbol() {
+    show_mark_for_unknown_symbol_ = true;
+    unknown_symbol_ = Symbol("*unknown", 0, ULLONG_MAX);
+  }
+  // Clear thread and map information, but keep loaded dso information. It saves
+  // the time to reload dso information.
+  void ClearThreadAndMap();
+
+  // Update thread tree with information provided by record.
+  void Update(const Record& record);
+
+ private:
+  Dso* FindKernelDsoOrNew(const std::string& filename);
+  Dso* FindUserDsoOrNew(const std::string& filename);
+  MapEntry* AllocateMap(const MapEntry& value);
+  void FixOverlappedMap(std::set<MapEntry*, MapComparator>* map_set,
+                        const MapEntry* map);
+
+  std::unordered_map<int, std::unique_ptr<ThreadEntry>> thread_tree_;
+  std::vector<std::unique_ptr<std::string>> thread_comm_storage_;
+
+  std::set<MapEntry*, MapComparator> kernel_map_tree_;
+  std::vector<std::unique_ptr<MapEntry>> map_storage_;
+  MapEntry unknown_map_;
+
+  std::unique_ptr<Dso> kernel_dso_;
+  std::unordered_map<std::string, std::unique_ptr<Dso>> module_dso_tree_;
+  std::unordered_map<std::string, std::unique_ptr<Dso>> user_dso_tree_;
+  std::unique_ptr<Dso> unknown_dso_;
+  bool show_ip_for_unknown_symbol_;
+  bool show_mark_for_unknown_symbol_;
+  Symbol unknown_symbol_;
+  std::unordered_map<uint64_t, Dso*> dso_id_to_dso_map_;
+};
+
+}  // namespace simpleperf
+
+using MapEntry = simpleperf::MapEntry;
+using ThreadEntry = simpleperf::ThreadEntry;
+using ThreadTree = simpleperf::ThreadTree;
+
+#endif  // SIMPLE_PERF_THREAD_TREE_H_
diff --git a/simpleperf/tracing.cpp b/simpleperf/tracing.cpp
new file mode 100644
index 0000000..884a883
--- /dev/null
+++ b/simpleperf/tracing.cpp
@@ -0,0 +1,426 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tracing.h"
+
+#include <string.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
+
+#include "perf_event.h"
+#include "utils.h"
+
+const char TRACING_INFO_MAGIC[10] = {23,  8,   68,  't', 'r',
+                                     'a', 'c', 'i', 'n', 'g'};
+
+template <class T>
+void AppendData(std::vector<char>& data, const T& s) {
+  const char* p = reinterpret_cast<const char*>(&s);
+  data.insert(data.end(), p, p + sizeof(T));
+}
+
+static void AppendData(std::vector<char>& data, const char* s) {
+  data.insert(data.end(), s, s + strlen(s) + 1);
+}
+
+template <>
+void AppendData(std::vector<char>& data, const std::string& s) {
+  data.insert(data.end(), s.c_str(), s.c_str() + s.size() + 1);
+}
+
+template <>
+void MoveFromBinaryFormat(std::string& data, const char*& p) {
+  data.clear();
+  while (*p != '\0') {
+    data.push_back(*p++);
+  }
+  p++;
+}
+
+static void AppendFile(std::vector<char>& data, const std::string& file,
+                       uint32_t file_size_bytes = 8) {
+  if (file_size_bytes == 8) {
+    uint64_t file_size = file.size();
+    AppendData(data, file_size);
+  } else if (file_size_bytes == 4) {
+    uint32_t file_size = file.size();
+    AppendData(data, file_size);
+  }
+  data.insert(data.end(), file.begin(), file.end());
+}
+
+static void DetachFile(const char*& p, std::string& file,
+                       uint32_t file_size_bytes = 8) {
+  uint64_t file_size = ConvertBytesToValue(p, file_size_bytes);
+  p += file_size_bytes;
+  file.clear();
+  file.insert(file.end(), p, p + file_size);
+  p += file_size;
+}
+
+struct TraceType {
+  std::string system;
+  std::string name;
+};
+
+class TracingFile {
+ public:
+  TracingFile();
+  bool RecordHeaderFiles();
+  void RecordFtraceFiles(const std::vector<TraceType>& trace_types);
+  bool RecordEventFiles(const std::vector<TraceType>& trace_types);
+  bool RecordKallsymsFile();
+  bool RecordPrintkFormatsFile();
+  std::vector<char> BinaryFormat() const;
+  void LoadFromBinary(const std::vector<char>& data);
+  void Dump(size_t indent) const;
+  std::vector<TracingFormat> LoadTracingFormatsFromEventFiles() const;
+  const std::string& GetKallsymsFile() const { return kallsyms_file; }
+  uint32_t GetPageSize() const { return page_size; }
+
+ private:
+  char magic[10];
+  std::string version;
+  char endian;
+  uint8_t size_of_long;
+  uint32_t page_size;
+  std::string header_page_file;
+  std::string header_event_file;
+
+  std::vector<std::string> ftrace_format_files;
+  // pair of system, format_file_data.
+  std::vector<std::pair<std::string, std::string>> event_format_files;
+
+  std::string kallsyms_file;
+  std::string printk_formats_file;
+};
+
+TracingFile::TracingFile() {
+  memcpy(magic, TRACING_INFO_MAGIC, sizeof(TRACING_INFO_MAGIC));
+  version = "0.5";
+  endian = 0;
+  size_of_long = static_cast<int>(sizeof(long));
+  page_size = static_cast<uint32_t>(::GetPageSize());
+}
+
+bool TracingFile::RecordHeaderFiles() {
+  if (!android::base::ReadFileToString(
+          "/sys/kernel/debug/tracing/events/header_page", &header_page_file)) {
+    PLOG(ERROR)
+        << "failed to read /sys/kernel/debug/tracing/events/header_page";
+    return false;
+  }
+  if (!android::base::ReadFileToString(
+          "/sys/kernel/debug/tracing/events/header_event",
+          &header_event_file)) {
+    PLOG(ERROR)
+        << "failed to read /sys/kernel/debug/tracing/events/header_event";
+    return false;
+  }
+  return true;
+}
+
+void TracingFile::RecordFtraceFiles(const std::vector<TraceType>& trace_types) {
+  for (const auto& type : trace_types) {
+    std::string format_path = android::base::StringPrintf(
+        "/sys/kernel/debug/tracing/events/ftrace/%s/format", type.name.c_str());
+    std::string format_data;
+    if (android::base::ReadFileToString(format_path, &format_data)) {
+      ftrace_format_files.push_back(std::move(format_data));
+    }
+  }
+}
+
+bool TracingFile::RecordEventFiles(const std::vector<TraceType>& trace_types) {
+  for (const auto& type : trace_types) {
+    std::string format_path = android::base::StringPrintf(
+        "/sys/kernel/debug/tracing/events/%s/%s/format", type.system.c_str(),
+        type.name.c_str());
+    std::string format_data;
+    if (!android::base::ReadFileToString(format_path, &format_data)) {
+      PLOG(ERROR) << "failed to read " << format_path;
+      return false;
+    }
+    event_format_files.push_back(
+        std::make_pair(type.system, std::move(format_data)));
+  }
+  return true;
+}
+
+bool TracingFile::RecordPrintkFormatsFile() {
+  if (!android::base::ReadFileToString(
+          "/sys/kernel/debug/tracing/printk_formats", &printk_formats_file)) {
+    PLOG(ERROR) << "failed to read /sys/kernel/debug/tracing/printk_formats";
+    return false;
+  }
+  return true;
+}
+
+std::vector<char> TracingFile::BinaryFormat() const {
+  std::vector<char> ret;
+  ret.insert(ret.end(), magic, magic + sizeof(magic));
+  AppendData(ret, version);
+  ret.push_back(endian);
+  AppendData(ret, size_of_long);
+  AppendData(ret, page_size);
+  AppendData(ret, "header_page");
+  AppendFile(ret, header_page_file);
+  AppendData(ret, "header_event");
+  AppendFile(ret, header_event_file);
+  int count = static_cast<int>(ftrace_format_files.size());
+  AppendData(ret, count);
+  for (const auto& format : ftrace_format_files) {
+    AppendFile(ret, format);
+  }
+  count = static_cast<int>(event_format_files.size());
+  AppendData(ret, count);
+  for (const auto& pair : event_format_files) {
+    AppendData(ret, pair.first);
+    AppendData(ret, 1);
+    AppendFile(ret, pair.second);
+  }
+  AppendFile(ret, kallsyms_file, 4);
+  AppendFile(ret, printk_formats_file, 4);
+  return ret;
+}
+
+void TracingFile::LoadFromBinary(const std::vector<char>& data) {
+  const char* p = data.data();
+  const char* end = data.data() + data.size();
+  CHECK(memcmp(p, magic, sizeof(magic)) == 0);
+  p += sizeof(magic);
+  MoveFromBinaryFormat(version, p);
+  MoveFromBinaryFormat(endian, p);
+  MoveFromBinaryFormat(size_of_long, p);
+  MoveFromBinaryFormat(page_size, p);
+  std::string filename;
+  MoveFromBinaryFormat(filename, p);
+  CHECK_EQ(filename, "header_page");
+  DetachFile(p, header_page_file);
+  MoveFromBinaryFormat(filename, p);
+  CHECK_EQ(filename, "header_event");
+  DetachFile(p, header_event_file);
+  uint32_t count;
+  MoveFromBinaryFormat(count, p);
+  ftrace_format_files.resize(count);
+  for (uint32_t i = 0; i < count; ++i) {
+    DetachFile(p, ftrace_format_files[i]);
+  }
+  MoveFromBinaryFormat(count, p);
+  event_format_files.clear();
+  for (uint32_t i = 0; i < count; ++i) {
+    std::string system;
+    MoveFromBinaryFormat(system, p);
+    uint32_t count_in_system;
+    MoveFromBinaryFormat(count_in_system, p);
+    for (uint32_t i = 0; i < count_in_system; ++i) {
+      std::string format;
+      DetachFile(p, format);
+      event_format_files.push_back(std::make_pair(system, std::move(format)));
+    }
+  }
+  DetachFile(p, kallsyms_file, 4);
+  DetachFile(p, printk_formats_file, 4);
+  CHECK_EQ(p, end);
+}
+
+void TracingFile::Dump(size_t indent) const {
+  PrintIndented(indent, "tracing data:\n");
+  PrintIndented(indent + 1, "magic: ");
+  for (size_t i = 0; i < 3u; ++i) {
+    printf("0x%x ", magic[i]);
+  }
+  for (size_t i = 3; i < sizeof(magic); ++i) {
+    printf("%c", magic[i]);
+  }
+  printf("\n");
+  PrintIndented(indent + 1, "version: %s\n", version.c_str());
+  PrintIndented(indent + 1, "endian: %d\n", endian);
+  PrintIndented(indent + 1, "header_page:\n%s\n\n", header_page_file.c_str());
+  PrintIndented(indent + 1, "header_event:\n%s\n\n", header_event_file.c_str());
+  for (size_t i = 0; i < ftrace_format_files.size(); ++i) {
+    PrintIndented(indent + 1, "ftrace format file %zu/%zu:\n%s\n\n", i + 1,
+                  ftrace_format_files.size(), ftrace_format_files[i].c_str());
+  }
+  for (size_t i = 0; i < event_format_files.size(); ++i) {
+    PrintIndented(indent + 1, "event format file %zu/%zu %s:\n%s\n\n", i + 1,
+                  event_format_files.size(),
+                  event_format_files[i].first.c_str(),
+                  event_format_files[i].second.c_str());
+  }
+  PrintIndented(indent + 1, "kallsyms:\n%s\n\n", kallsyms_file.c_str());
+  PrintIndented(indent + 1, "printk_formats:\n%s\n\n",
+                printk_formats_file.c_str());
+}
+
+enum class FormatParsingState {
+  READ_NAME,
+  READ_ID,
+  READ_FIELDS,
+  READ_PRINTFMT,
+};
+
+// Parse lines like: field:char comm[16]; offset:8; size:16;  signed:1;
+static TracingField ParseTracingField(const std::string& s) {
+  TracingField field;
+  size_t start = 0;
+  std::string name;
+  std::string value;
+  for (size_t i = 0; i < s.size(); ++i) {
+    if (!isspace(s[i]) && (i == 0 || isspace(s[i - 1]))) {
+      start = i;
+    } else if (s[i] == ':') {
+      name = s.substr(start, i - start);
+      start = i + 1;
+    } else if (s[i] == ';') {
+      value = s.substr(start, i - start);
+      if (name == "field") {
+        size_t pos = value.find_first_of('[');
+        if (pos == std::string::npos) {
+          field.name = value;
+          field.elem_count = 1;
+        } else {
+          field.name = value.substr(0, pos);
+          field.elem_count =
+              static_cast<size_t>(strtoull(&value[pos + 1], nullptr, 10));
+        }
+      } else if (name == "offset") {
+        field.offset =
+            static_cast<size_t>(strtoull(value.c_str(), nullptr, 10));
+      } else if (name == "size") {
+        size_t size = static_cast<size_t>(strtoull(value.c_str(), nullptr, 10));
+        CHECK_EQ(size % field.elem_count, 0u);
+        field.elem_size = size / field.elem_count;
+      } else if (name == "signed") {
+        int is_signed = static_cast<int>(strtoull(value.c_str(), nullptr, 10));
+        field.is_signed = (is_signed == 1);
+      }
+    }
+  }
+  return field;
+}
+
+std::vector<TracingFormat> TracingFile::LoadTracingFormatsFromEventFiles()
+    const {
+  std::vector<TracingFormat> formats;
+  for (const auto& pair : event_format_files) {
+    TracingFormat format;
+    format.system_name = pair.first;
+    std::vector<std::string> strs = android::base::Split(pair.second, "\n");
+    FormatParsingState state = FormatParsingState::READ_NAME;
+    for (const auto& s : strs) {
+      if (state == FormatParsingState::READ_NAME) {
+        size_t pos = s.find_first_of("name:");
+        if (pos != std::string::npos) {
+          format.name = android::base::Trim(s.substr(pos + strlen("name:")));
+          state = FormatParsingState::READ_ID;
+        }
+      } else if (state == FormatParsingState::READ_ID) {
+        size_t pos = s.find_first_of("ID:");
+        if (pos != std::string::npos) {
+          format.id =
+              strtoull(s.substr(pos + strlen("ID:")).c_str(), nullptr, 10);
+          state = FormatParsingState::READ_FIELDS;
+        }
+      } else if (state == FormatParsingState::READ_FIELDS) {
+        size_t pos = s.find_first_of("field:");
+        if (pos != std::string::npos) {
+          TracingField field = ParseTracingField(s);
+          format.fields.push_back(field);
+        }
+      }
+    }
+    formats.push_back(format);
+  }
+  return formats;
+}
+
+Tracing::Tracing(const std::vector<char>& data) {
+  tracing_file_ = new TracingFile;
+  tracing_file_->LoadFromBinary(data);
+}
+
+Tracing::~Tracing() { delete tracing_file_; }
+
+void Tracing::Dump(size_t indent) { tracing_file_->Dump(indent); }
+
+TracingFormat Tracing::GetTracingFormatHavingId(uint64_t trace_event_id) {
+  if (tracing_formats_.empty()) {
+    tracing_formats_ = tracing_file_->LoadTracingFormatsFromEventFiles();
+  }
+  for (const auto& format : tracing_formats_) {
+    if (format.id == trace_event_id) {
+      return format;
+    }
+  }
+  LOG(FATAL) << "no tracing format for id " << trace_event_id;
+  return TracingFormat();
+}
+
+std::string Tracing::GetTracingEventNameHavingId(uint64_t trace_event_id) {
+  if (tracing_formats_.empty()) {
+    tracing_formats_ = tracing_file_->LoadTracingFormatsFromEventFiles();
+  }
+  for (const auto& format : tracing_formats_) {
+    if (format.id == trace_event_id) {
+      return android::base::StringPrintf("%s:%s", format.system_name.c_str(),
+                                         format.name.c_str());
+    }
+  }
+  return "";
+}
+
+const std::string& Tracing::GetKallsyms() const {
+  return tracing_file_->GetKallsymsFile();
+}
+
+uint32_t Tracing::GetPageSize() const { return tracing_file_->GetPageSize(); }
+
+bool GetTracingData(const std::vector<const EventType*>& event_types,
+                    std::vector<char>* data) {
+  data->clear();
+  std::vector<TraceType> trace_types;
+  for (const auto& type : event_types) {
+    CHECK_EQ(PERF_TYPE_TRACEPOINT, type->type);
+    size_t pos = type->name.find(':');
+    TraceType trace_type;
+    trace_type.system = type->name.substr(0, pos);
+    trace_type.name = type->name.substr(pos + 1);
+    trace_types.push_back(trace_type);
+  }
+  TracingFile tracing_file;
+  if (!tracing_file.RecordHeaderFiles()) {
+    return false;
+  }
+  tracing_file.RecordFtraceFiles(trace_types);
+  if (!tracing_file.RecordEventFiles(trace_types)) {
+    return false;
+  }
+  // Don't record /proc/kallsyms here, as it will be contained in
+  // KernelSymbolRecord.
+  if (!tracing_file.RecordPrintkFormatsFile()) {
+    return false;
+  }
+  *data = tracing_file.BinaryFormat();
+  return true;
+}
diff --git a/simpleperf/tracing.h b/simpleperf/tracing.h
new file mode 100644
index 0000000..e4c375b
--- /dev/null
+++ b/simpleperf/tracing.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_TRACING_H_
+#define SIMPLE_PERF_TRACING_H_
+
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "event_type.h"
+#include "utils.h"
+
+struct TracingField {
+  std::string name;
+  size_t offset;
+  size_t elem_size;
+  size_t elem_count;
+  bool is_signed;
+};
+
+struct TracingFieldPlace {
+  uint32_t offset;
+  uint32_t size;
+
+  uint64_t ReadFromData(const char* raw_data) {
+    return ConvertBytesToValue(raw_data + offset, size);
+  }
+};
+
+struct TracingFormat {
+  std::string system_name;
+  std::string name;
+  uint64_t id;
+  std::vector<TracingField> fields;
+
+  void GetField(const std::string& name, TracingFieldPlace& place) {
+    const TracingField& field = GetField(name);
+    place.offset = field.offset;
+    place.size = field.elem_size;
+  }
+
+ private:
+  const TracingField& GetField(const std::string& name) {
+    for (const auto& field : fields) {
+      if (field.name == name) {
+        return field;
+      }
+    }
+    LOG(FATAL) << "Couldn't find field " << name << "in TracingFormat of "
+               << this->name;
+    return fields[0];
+  }
+};
+
+class TracingFile;
+
+class Tracing {
+ public:
+  explicit Tracing(const std::vector<char>& data);
+  ~Tracing();
+  void Dump(size_t indent);
+  TracingFormat GetTracingFormatHavingId(uint64_t trace_event_id);
+  std::string GetTracingEventNameHavingId(uint64_t trace_event_id);
+  const std::string& GetKallsyms() const;
+  uint32_t GetPageSize() const;
+
+ private:
+  TracingFile* tracing_file_;
+  std::vector<TracingFormat> tracing_formats_;
+};
+
+bool GetTracingData(const std::vector<const EventType*>& event_types,
+                    std::vector<char>* data);
+
+#endif  // SIMPLE_PERF_TRACING_H_
diff --git a/simpleperf/utils.cpp b/simpleperf/utils.cpp
new file mode 100644
index 0000000..e2d25aa
--- /dev/null
+++ b/simpleperf/utils.cpp
@@ -0,0 +1,340 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <map>
+#include <string>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+
+#include <7zCrc.h>
+#include <Xz.h>
+#include <XzCrc64.h>
+
+void OneTimeFreeAllocator::Clear() {
+  for (auto& p : v_) {
+    delete[] p;
+  }
+  v_.clear();
+  cur_ = nullptr;
+  end_ = nullptr;
+}
+
+const char* OneTimeFreeAllocator::AllocateString(const std::string& s) {
+  size_t size = s.size() + 1;
+  if (cur_ + size > end_) {
+    size_t alloc_size = std::max(size, unit_size_);
+    char* p = new char[alloc_size];
+    v_.push_back(p);
+    cur_ = p;
+    end_ = p + alloc_size;
+  }
+  strcpy(cur_, s.c_str());
+  const char* result = cur_;
+  cur_ += size;
+  return result;
+}
+
+
+FileHelper FileHelper::OpenReadOnly(const std::string& filename) {
+    int fd = TEMP_FAILURE_RETRY(open(filename.c_str(), O_RDONLY | O_BINARY));
+    return FileHelper(fd);
+}
+
+FileHelper FileHelper::OpenWriteOnly(const std::string& filename) {
+    int fd = TEMP_FAILURE_RETRY(open(filename.c_str(), O_WRONLY | O_BINARY | O_CREAT, 0644));
+    return FileHelper(fd);
+}
+
+FileHelper::~FileHelper() {
+  if (fd_ != -1) {
+    close(fd_);
+  }
+}
+
+ArchiveHelper::ArchiveHelper(int fd, const std::string& debug_filename) : valid_(false) {
+  int rc = OpenArchiveFd(fd, "", &handle_, false);
+  if (rc == 0) {
+    valid_ = true;
+  } else {
+    LOG(ERROR) << "Failed to open archive " << debug_filename << ": " << ErrorCodeString(rc);
+  }
+}
+
+ArchiveHelper::~ArchiveHelper() {
+  if (valid_) {
+    CloseArchive(handle_);
+  }
+}
+
+void PrintIndented(size_t indent, const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  printf("%*s", static_cast<int>(indent * 2), "");
+  vprintf(fmt, ap);
+  va_end(ap);
+}
+
+void FprintIndented(FILE* fp, size_t indent, const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  fprintf(fp, "%*s", static_cast<int>(indent * 2), "");
+  vfprintf(fp, fmt, ap);
+  va_end(ap);
+}
+
+bool IsPowerOfTwo(uint64_t value) {
+  return (value != 0 && ((value & (value - 1)) == 0));
+}
+
+std::vector<std::string> GetEntriesInDir(const std::string& dirpath) {
+  std::vector<std::string> result;
+  DIR* dir = opendir(dirpath.c_str());
+  if (dir == nullptr) {
+    PLOG(DEBUG) << "can't open dir " << dirpath;
+    return result;
+  }
+  dirent* entry;
+  while ((entry = readdir(dir)) != nullptr) {
+    if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
+      continue;
+    }
+    result.push_back(entry->d_name);
+  }
+  closedir(dir);
+  return result;
+}
+
+std::vector<std::string> GetSubDirs(const std::string& dirpath) {
+  std::vector<std::string> entries = GetEntriesInDir(dirpath);
+  std::vector<std::string> result;
+  for (size_t i = 0; i < entries.size(); ++i) {
+    if (IsDir(dirpath + "/" + entries[i])) {
+      result.push_back(std::move(entries[i]));
+    }
+  }
+  return result;
+}
+
+bool IsDir(const std::string& dirpath) {
+  struct stat st;
+  if (stat(dirpath.c_str(), &st) == 0) {
+    if (S_ISDIR(st.st_mode)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool IsRegularFile(const std::string& filename) {
+  struct stat st;
+  if (stat(filename.c_str(), &st) == 0) {
+    if (S_ISREG(st.st_mode)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+uint64_t GetFileSize(const std::string& filename) {
+  struct stat st;
+  if (stat(filename.c_str(), &st) == 0) {
+    return static_cast<uint64_t>(st.st_size);
+  }
+  return 0;
+}
+
+bool MkdirWithParents(const std::string& path) {
+  size_t prev_end = 0;
+  while (prev_end < path.size()) {
+    size_t next_end = path.find('/', prev_end + 1);
+    if (next_end == std::string::npos) {
+      break;
+    }
+    std::string dir_path = path.substr(0, next_end);
+    if (!IsDir(dir_path)) {
+#if defined(_WIN32)
+      int ret = mkdir(dir_path.c_str());
+#else
+      int ret = mkdir(dir_path.c_str(), 0755);
+#endif
+      if (ret != 0) {
+        PLOG(ERROR) << "failed to create dir " << dir_path;
+        return false;
+      }
+    }
+    prev_end = next_end;
+  }
+  return true;
+}
+
+static void* xz_alloc(void*, size_t size) {
+  return malloc(size);
+}
+
+static void xz_free(void*, void* address) {
+  free(address);
+}
+
+bool XzDecompress(const std::string& compressed_data, std::string* decompressed_data) {
+  ISzAlloc alloc;
+  CXzUnpacker state;
+  alloc.Alloc = xz_alloc;
+  alloc.Free = xz_free;
+  XzUnpacker_Construct(&state, &alloc);
+  CrcGenerateTable();
+  Crc64GenerateTable();
+  size_t src_offset = 0;
+  size_t dst_offset = 0;
+  std::string dst(compressed_data.size(), ' ');
+
+  ECoderStatus status = CODER_STATUS_NOT_FINISHED;
+  while (status == CODER_STATUS_NOT_FINISHED) {
+    dst.resize(dst.size() * 2);
+    size_t src_remaining = compressed_data.size() - src_offset;
+    size_t dst_remaining = dst.size() - dst_offset;
+    int res = XzUnpacker_Code(&state, reinterpret_cast<Byte*>(&dst[dst_offset]), &dst_remaining,
+                              reinterpret_cast<const Byte*>(&compressed_data[src_offset]),
+                              &src_remaining, CODER_FINISH_ANY, &status);
+    if (res != SZ_OK) {
+      LOG(ERROR) << "LZMA decompression failed with error " << res;
+      XzUnpacker_Free(&state);
+      return false;
+    }
+    src_offset += src_remaining;
+    dst_offset += dst_remaining;
+  }
+  XzUnpacker_Free(&state);
+  if (!XzUnpacker_IsStreamWasFinished(&state)) {
+    LOG(ERROR) << "LZMA decompresstion failed due to incomplete stream";
+    return false;
+  }
+  dst.resize(dst_offset);
+  *decompressed_data = std::move(dst);
+  return true;
+}
+
+bool GetLogSeverity(const std::string& name, android::base::LogSeverity* severity) {
+  static std::map<std::string, android::base::LogSeverity> log_severity_map = {
+      {"verbose", android::base::VERBOSE},
+      {"debug", android::base::DEBUG},
+      {"info", android::base::INFO},
+      {"warning", android::base::WARNING},
+      {"error", android::base::ERROR},
+      {"fatal", android::base::FATAL},
+  };
+  auto it = log_severity_map.find(name);
+  if (it != log_severity_map.end()) {
+    *severity = it->second;
+    return true;
+  }
+  return false;
+}
+
+bool IsRoot() {
+  static int is_root = -1;
+  if (is_root == -1) {
+#if defined(__linux__)
+    is_root = (getuid() == 0) ? 1 : 0;
+#else
+    is_root = 0;
+#endif
+  }
+  return is_root == 1;
+}
+
+bool ProcessKernelSymbols(std::string& symbol_data,
+                          const std::function<bool(const KernelSymbol&)>& callback) {
+  char* p = &symbol_data[0];
+  char* data_end = p + symbol_data.size();
+  while (p < data_end) {
+    char* line_end = strchr(p, '\n');
+    if (line_end != nullptr) {
+      *line_end = '\0';
+    }
+    size_t line_size = (line_end != nullptr) ? (line_end - p) : (data_end - p);
+    // Parse line like: ffffffffa005c4e4 d __warned.41698       [libsas]
+    char name[line_size];
+    char module[line_size];
+    strcpy(module, "");
+
+    KernelSymbol symbol;
+    int ret = sscanf(p, "%" PRIx64 " %c %s%s", &symbol.addr, &symbol.type, name, module);
+    if (line_end != nullptr) {
+      *line_end = '\n';
+      p = line_end + 1;
+    } else {
+      p = data_end;
+    }
+    if (ret >= 3) {
+      symbol.name = name;
+      size_t module_len = strlen(module);
+      if (module_len > 2 && module[0] == '[' && module[module_len - 1] == ']') {
+        module[module_len - 1] = '\0';
+        symbol.module = &module[1];
+      } else {
+        symbol.module = nullptr;
+      }
+
+      if (callback(symbol)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+size_t GetPageSize() {
+#if defined(__linux__)
+  return sysconf(_SC_PAGE_SIZE);
+#else
+  return 4096;
+#endif
+}
+
+uint64_t ConvertBytesToValue(const char* bytes, uint32_t size) {
+  switch (size) {
+    case 1:
+      return *reinterpret_cast<const uint8_t*>(bytes);
+    case 2:
+      return *reinterpret_cast<const uint16_t*>(bytes);
+    case 4:
+      return *reinterpret_cast<const uint32_t*>(bytes);
+    case 8:
+      return *reinterpret_cast<const uint64_t*>(bytes);
+  }
+  LOG(FATAL) << "unexpected size " << size << " in ConvertBytesToValue";
+  return 0;
+}
+
+timeval SecondToTimeval(double time_in_sec) {
+  timeval tv;
+  tv.tv_sec = static_cast<time_t>(time_in_sec);
+  tv.tv_usec = static_cast<int>((time_in_sec - tv.tv_sec) * 1000000);
+  return tv;
+}
diff --git a/simpleperf/utils.h b/simpleperf/utils.h
new file mode 100644
index 0000000..a032681
--- /dev/null
+++ b/simpleperf/utils.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_UTILS_H_
+#define SIMPLE_PERF_UTILS_H_
+
+#include <stddef.h>
+#include <time.h>
+
+#include <string>
+#include <vector>
+
+#include <android-base/logging.h>
+#include <android-base/macros.h>
+#include <ziparchive/zip_archive.h>
+
+static inline uint64_t Align(uint64_t value, uint64_t alignment) {
+  return (value + alignment - 1) & ~(alignment - 1);
+}
+
+#ifdef _WIN32
+#define CLOSE_ON_EXEC_MODE ""
+#else
+#define CLOSE_ON_EXEC_MODE "e"
+#endif
+
+// OneTimeAllocator is used to allocate memory many times and free only once at the end.
+// It reduces the cost to free each allocated memory.
+class OneTimeFreeAllocator {
+ public:
+  explicit OneTimeFreeAllocator(size_t unit_size = 8192u)
+      : unit_size_(unit_size), cur_(nullptr), end_(nullptr) {
+  }
+
+  ~OneTimeFreeAllocator() {
+    Clear();
+  }
+
+  void Clear();
+  const char* AllocateString(const std::string& s);
+
+ private:
+  const size_t unit_size_;
+  std::vector<char*> v_;
+  char* cur_;
+  char* end_;
+};
+
+class FileHelper {
+ public:
+  static FileHelper OpenReadOnly(const std::string& filename);
+  static FileHelper OpenWriteOnly(const std::string& filename);
+
+  FileHelper(FileHelper&& other) {
+    fd_ = other.fd_;
+    other.fd_ = -1;
+  }
+
+  ~FileHelper();
+
+  explicit operator bool() const {
+    return fd_ != -1;
+  }
+
+  int fd() const {
+    return fd_;
+  }
+
+ private:
+  explicit FileHelper(int fd) : fd_(fd) {}
+  int fd_;
+
+  DISALLOW_COPY_AND_ASSIGN(FileHelper);
+};
+
+class ArchiveHelper {
+ public:
+  ArchiveHelper(int fd, const std::string& debug_filename);
+  ~ArchiveHelper();
+
+  explicit operator bool() const {
+    return valid_;
+  }
+  ZipArchiveHandle &archive_handle() {
+    return handle_;
+  }
+
+ private:
+  ZipArchiveHandle handle_;
+  bool valid_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArchiveHelper);
+};
+
+template <class T>
+void MoveFromBinaryFormat(T& data, const char*& p) {
+  data = *reinterpret_cast<const T*>(p);
+  p += sizeof(T);
+}
+
+void PrintIndented(size_t indent, const char* fmt, ...);
+void FprintIndented(FILE* fp, size_t indent, const char* fmt, ...);
+
+bool IsPowerOfTwo(uint64_t value);
+
+std::vector<std::string> GetEntriesInDir(const std::string& dirpath);
+std::vector<std::string> GetSubDirs(const std::string& dirpath);
+bool IsDir(const std::string& dirpath);
+bool IsRegularFile(const std::string& filename);
+uint64_t GetFileSize(const std::string& filename);
+bool MkdirWithParents(const std::string& path);
+
+bool XzDecompress(const std::string& compressed_data, std::string* decompressed_data);
+
+bool GetLogSeverity(const std::string& name, android::base::LogSeverity* severity);
+
+bool IsRoot();
+
+struct KernelSymbol {
+  uint64_t addr;
+  char type;
+  const char* name;
+  const char* module;  // If nullptr, the symbol is not in a kernel module.
+};
+
+bool ProcessKernelSymbols(std::string& symbol_data,
+                          const std::function<bool(const KernelSymbol&)>& callback);
+
+size_t GetPageSize();
+
+uint64_t ConvertBytesToValue(const char* bytes, uint32_t size);
+
+timeval SecondToTimeval(double time_in_sec);
+
+#endif  // SIMPLE_PERF_UTILS_H_
diff --git a/simpleperf/utils_test.cpp b/simpleperf/utils_test.cpp
new file mode 100644
index 0000000..23c669e
--- /dev/null
+++ b/simpleperf/utils_test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "utils.h"
+
+static bool ModulesMatch(const char* p, const char* q) {
+  if (p == nullptr && q == nullptr) {
+    return true;
+  }
+  if (p != nullptr && q != nullptr) {
+    return strcmp(p, q) == 0;
+  }
+  return false;
+}
+
+static bool KernelSymbolsMatch(const KernelSymbol& sym1,
+                               const KernelSymbol& sym2) {
+  return sym1.addr == sym2.addr && sym1.type == sym2.type &&
+         strcmp(sym1.name, sym2.name) == 0 &&
+         ModulesMatch(sym1.module, sym2.module);
+}
+
+TEST(environment, ProcessKernelSymbols) {
+  std::string data =
+      "ffffffffa005c4e4 d __warned.41698   [libsas]\n"
+      "aaaaaaaaaaaaaaaa T _text\n"
+      "cccccccccccccccc c ccccc\n";
+  KernelSymbol expected_symbol;
+  expected_symbol.addr = 0xffffffffa005c4e4ULL;
+  expected_symbol.type = 'd';
+  expected_symbol.name = "__warned.41698";
+  expected_symbol.module = "libsas";
+  ASSERT_TRUE(ProcessKernelSymbols(
+      data,
+      std::bind(&KernelSymbolsMatch, std::placeholders::_1, expected_symbol)));
+
+  expected_symbol.addr = 0xaaaaaaaaaaaaaaaaULL;
+  expected_symbol.type = 'T';
+  expected_symbol.name = "_text";
+  expected_symbol.module = nullptr;
+  ASSERT_TRUE(ProcessKernelSymbols(
+      data,
+      std::bind(&KernelSymbolsMatch, std::placeholders::_1, expected_symbol)));
+
+  expected_symbol.name = "non_existent_symbol";
+  ASSERT_FALSE(ProcessKernelSymbols(
+      data,
+      std::bind(&KernelSymbolsMatch, std::placeholders::_1, expected_symbol)));
+}
diff --git a/simpleperf/workload.cpp b/simpleperf/workload.cpp
new file mode 100644
index 0000000..1d34c11
--- /dev/null
+++ b/simpleperf/workload.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "workload.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <android-base/logging.h>
+
+std::unique_ptr<Workload> Workload::CreateWorkload(const std::vector<std::string>& args) {
+  std::unique_ptr<Workload> workload(new Workload(args));
+  if (workload != nullptr && workload->CreateNewProcess()) {
+    return workload;
+  }
+  return nullptr;
+}
+
+Workload::~Workload() {
+  if (work_pid_ != -1 && work_state_ != NotYetCreateNewProcess) {
+    if (!Workload::WaitChildProcess(false, false)) {
+      kill(work_pid_, SIGKILL);
+      Workload::WaitChildProcess(true, true);
+    }
+  }
+  if (start_signal_fd_ != -1) {
+    close(start_signal_fd_);
+  }
+  if (exec_child_fd_ != -1) {
+    close(exec_child_fd_);
+  }
+}
+
+static void ChildProcessFn(std::vector<std::string>& args, int start_signal_fd, int exec_child_fd);
+
+bool Workload::CreateNewProcess() {
+  CHECK_EQ(work_state_, NotYetCreateNewProcess);
+
+  int start_signal_pipe[2];
+  if (pipe2(start_signal_pipe, O_CLOEXEC) != 0) {
+    PLOG(ERROR) << "pipe2() failed";
+    return false;
+  }
+
+  int exec_child_pipe[2];
+  if (pipe2(exec_child_pipe, O_CLOEXEC) != 0) {
+    PLOG(ERROR) << "pipe2() failed";
+    close(start_signal_pipe[0]);
+    close(start_signal_pipe[1]);
+    return false;
+  }
+
+  pid_t pid = fork();
+  if (pid == -1) {
+    PLOG(ERROR) << "fork() failed";
+    close(start_signal_pipe[0]);
+    close(start_signal_pipe[1]);
+    close(exec_child_pipe[0]);
+    close(exec_child_pipe[1]);
+    return false;
+  } else if (pid == 0) {
+    // In child process.
+    close(start_signal_pipe[1]);
+    close(exec_child_pipe[0]);
+    ChildProcessFn(args_, start_signal_pipe[0], exec_child_pipe[1]);
+    _exit(0);
+  }
+  // In parent process.
+  close(start_signal_pipe[0]);
+  close(exec_child_pipe[1]);
+  start_signal_fd_ = start_signal_pipe[1];
+  exec_child_fd_ = exec_child_pipe[0];
+  work_pid_ = pid;
+  work_state_ = NotYetStartNewProcess;
+  return true;
+}
+
+static void ChildProcessFn(std::vector<std::string>& args, int start_signal_fd, int exec_child_fd) {
+  // Die if parent exits.
+  prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
+  std::vector<char*> argv(args.size() + 1);
+  for (size_t i = 0; i < args.size(); ++i) {
+    argv[i] = &args[i][0];
+  }
+  argv[args.size()] = nullptr;
+
+  char start_signal = 0;
+  ssize_t nread = TEMP_FAILURE_RETRY(read(start_signal_fd, &start_signal, 1));
+  if (nread == 1 && start_signal == 1) {
+    close(start_signal_fd);
+    execvp(argv[0], argv.data());
+    // If execvp() succeed, we will not arrive here. But if it failed, we need to
+    // report the failure to the parent process by writing 1 to exec_child_fd.
+    int saved_errno = errno;
+    char exec_child_failed = 1;
+    TEMP_FAILURE_RETRY(write(exec_child_fd, &exec_child_failed, 1));
+    close(exec_child_fd);
+    errno = saved_errno;
+    PLOG(ERROR) << "child process failed to execvp(" << argv[0] << ")";
+  } else {
+    PLOG(ERROR) << "child process failed to receive start_signal, nread = " << nread;
+  }
+}
+
+bool Workload::Start() {
+  CHECK_EQ(work_state_, NotYetStartNewProcess);
+  char start_signal = 1;
+  ssize_t nwrite = TEMP_FAILURE_RETRY(write(start_signal_fd_, &start_signal, 1));
+  if (nwrite != 1) {
+    PLOG(ERROR) << "write start signal failed";
+    return false;
+  }
+  char exec_child_failed;
+  ssize_t nread = TEMP_FAILURE_RETRY(read(exec_child_fd_, &exec_child_failed, 1));
+  if (nread != 0) {
+    if (nread == -1) {
+      PLOG(ERROR) << "failed to receive error message from child process";
+    } else {
+      LOG(ERROR) << "received error message from child process";
+    }
+    return false;
+  }
+  work_state_ = Started;
+  return true;
+}
+
+bool Workload::WaitChildProcess(bool wait_forever, bool is_child_killed) {
+  bool finished = false;
+  int status;
+  pid_t result = TEMP_FAILURE_RETRY(waitpid(work_pid_, &status, (wait_forever ? 0 : WNOHANG)));
+  if (result == work_pid_) {
+    finished = true;
+    if (WIFSIGNALED(status)) {
+      if (!(is_child_killed && WTERMSIG(status) == SIGKILL)) {
+        LOG(WARNING) << "child process was terminated by signal " << strsignal(WTERMSIG(status));
+      }
+    } else if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
+      LOG(WARNING) << "child process exited with exit code " << WEXITSTATUS(status);
+    }
+  } else if (result == -1) {
+    PLOG(ERROR) << "waitpid() failed";
+  }
+  return finished;
+}
diff --git a/simpleperf/workload.h b/simpleperf/workload.h
new file mode 100644
index 0000000..fa754b5
--- /dev/null
+++ b/simpleperf/workload.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_WORKLOAD_H_
+#define SIMPLE_PERF_WORKLOAD_H_
+
+#include <sys/types.h>
+#include <chrono>
+#include <string>
+#include <vector>
+
+#include <android-base/macros.h>
+
+class Workload {
+ private:
+  enum WorkState {
+    NotYetCreateNewProcess,
+    NotYetStartNewProcess,
+    Started,
+  };
+
+ public:
+  static std::unique_ptr<Workload> CreateWorkload(const std::vector<std::string>& args);
+
+  ~Workload();
+
+  bool Start();
+  pid_t GetPid() {
+    return work_pid_;
+  }
+
+ private:
+  explicit Workload(const std::vector<std::string>& args)
+      : work_state_(NotYetCreateNewProcess),
+        args_(args),
+        work_pid_(-1),
+        start_signal_fd_(-1),
+        exec_child_fd_(-1) {
+  }
+
+  bool CreateNewProcess();
+  bool WaitChildProcess(bool wait_forever, bool is_child_killed);
+
+  WorkState work_state_;
+  std::vector<std::string> args_;
+  pid_t work_pid_;
+  int start_signal_fd_;  // The parent process writes 1 to start workload in the child process.
+  int exec_child_fd_;    // The child process writes 1 to notify that execvp() failed.
+
+  DISALLOW_COPY_AND_ASSIGN(Workload);
+};
+
+#endif  // SIMPLE_PERF_WORKLOAD_H_
diff --git a/simpleperf/workload_test.cpp b/simpleperf/workload_test.cpp
new file mode 100644
index 0000000..9824143
--- /dev/null
+++ b/simpleperf/workload_test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <signal.h>
+
+#include "IOEventLoop.h"
+#include "utils.h"
+#include "workload.h"
+
+TEST(workload, success) {
+  IOEventLoop loop;
+  ASSERT_TRUE(loop.AddSignalEvent(SIGCHLD, [&]() {
+    return loop.ExitLoop();
+  }));
+  auto workload = Workload::CreateWorkload({"sleep", "1"});
+  ASSERT_TRUE(workload != nullptr);
+  ASSERT_TRUE(workload->GetPid() != 0);
+  ASSERT_TRUE(workload->Start());
+  ASSERT_TRUE(loop.RunLoop());
+}
+
+TEST(workload, execvp_failure) {
+  auto workload = Workload::CreateWorkload({"/dev/null"});
+  ASSERT_TRUE(workload != nullptr);
+  ASSERT_FALSE(workload->Start());
+}
+
+static void run_signaled_workload() {
+  {
+    IOEventLoop loop;
+    ASSERT_TRUE(loop.AddSignalEvent(SIGCHLD, [&]() {
+      return loop.ExitLoop();
+    }));
+    auto workload = Workload::CreateWorkload({"sleep", "10"});
+    ASSERT_TRUE(workload != nullptr);
+    ASSERT_TRUE(workload->Start());
+    ASSERT_EQ(0, kill(workload->GetPid(), SIGKILL));
+    ASSERT_TRUE(loop.RunLoop());
+  }
+  // Make sure all destructors are called before exit().
+  exit(0);
+}
+
+TEST(workload, signaled_warning) {
+  ASSERT_EXIT(run_signaled_workload(), testing::ExitedWithCode(0),
+              "child process was terminated by signal");
+}
+
+static void run_exit_nonzero_workload() {
+  {
+    IOEventLoop loop;
+    ASSERT_TRUE(loop.AddSignalEvent(SIGCHLD, [&]() {
+      return loop.ExitLoop();
+    }));
+    auto workload = Workload::CreateWorkload({"ls", "nonexistdir"});
+    ASSERT_TRUE(workload != nullptr);
+    ASSERT_TRUE(workload->Start());
+    ASSERT_TRUE(loop.RunLoop());
+  }
+  // Make sure all destructors are called before exit().
+  exit(0);
+}
+
+TEST(workload, exit_nonzero_warning) {
+  ASSERT_EXIT(run_exit_nonzero_workload(), testing::ExitedWithCode(0),
+              "child process exited with exit code");
+}