blob: 1a861f7bf4fdea0fff1bc9b64cc97dc5dd72278c [file] [log] [blame]
/*
* @file pe_profiling/operf_process_info.cpp
* This file contains functions for storing process information,
* handling exectuable mmappings, etc.
*
* @remark Copyright 2011 OProfile authors
* @remark Read the file COPYING
*
* Created on: Dec 13, 2011
* @author Maynard Johnson
* (C) Copyright IBM Corp. 2011
*
* Modified by Maynard Johnson <maynardj@us.ibm.com>
* (C) Copyright IBM Corporation 2013
*
*/
#include <stdio.h>
#include <unistd.h>
#include <iostream>
#include <sstream>
#include <map>
#include <string.h>
#include <errno.h>
#include "operf_process_info.h"
#include "file_manip.h"
#include "operf_utils.h"
using namespace std;
using namespace OP_perf_utils;
operf_process_info::operf_process_info(pid_t tgid, const char * appname,
bool app_arg_is_fullname, bool is_valid)
: pid(tgid), valid(is_valid), appname_valid(false), forked(false), look_for_appname_match(false),
appname_is_fullname(NOT_FULLNAME), num_app_chars_matched(-1)
{
_appname = "";
set_appname(appname, app_arg_is_fullname);
parent_of_fork = NULL;
}
operf_process_info::~operf_process_info()
{
map<u64, struct operf_mmap *>::iterator it;
map<u64, struct operf_mmap *>::iterator end;
if (valid) {
it = mmappings.begin();
end = mmappings.end();
}
mmappings.clear();
}
void operf_process_info::set_appname(const char * appname, bool app_arg_is_fullname)
{
char exe_symlink[64];
char exe_realpath[PATH_MAX];
/* A combination of non-null appname and app_arg_is_fullname==true may be passed
* from various locations. But a non-null appname and app_arg_is_fullname==false
* may only be passed as a result of a PERF_RECORD_COMM event.
*/
bool from_COMM_event = (appname && !app_arg_is_fullname);
if (appname_valid)
return;
/* If stored _appname is not empty, it implies we've been through this function before
* (and would have tried the readlink method or, perhaps, fallen back to some other
* method to set the stored _appname). If we're here because of something other than
* a COMM event (e.g. MMAP event), then we should compare our stored _appname with our
* collection of mmapping basenames to see if we can find an appname match; otherwise,
* if the passed appname is NULL, we just return, since a NULL appname won't help us here.
*/
if (_appname.length()) {
if (look_for_appname_match && !from_COMM_event)
return find_best_match_appname_all_mappings();
else if (!appname)
return;
}
snprintf(exe_symlink, 64, "/proc/%d/exe", pid);
memset(exe_realpath, '\0', PATH_MAX);
/* If the user is running a command via taskset, the kernel will send us a PERF_RECORD_COMM
* for both comm=taskset and comm=<user_command> for the same process ID !!
* The user will not be interested in taskset samples; thus, we ignore such COMM events.
* This is a hack, but there doesn't seem to be a better way around the possibility of having
* application samples attributed to "taskset" instead of the application.
*/
if (readlink(exe_symlink, exe_realpath, sizeof(exe_realpath)-1) > 0) {
_appname = exe_realpath;
app_basename = op_basename(_appname);
if (!strncmp(app_basename.c_str(), "taskset", strlen("taskset"))) {
_appname = "unknown";
app_basename = "unknown";
} else {
appname_valid = true;
}
} else {
/* Most likely that the process has ended already, so we'll need to determine
* the appname through different means.
*/
if (cverb << vmisc) {
ostringstream message;
message << "PID: " << hex << pid << " Unable to obtain appname from " << exe_symlink << endl
<< "\t" << strerror(errno) << endl;
cerr << message.str();
}
if (appname && strcmp(appname, "taskset")) {
_appname = appname;
if (app_arg_is_fullname) {
appname_valid = true;
} else {
look_for_appname_match = true;
}
} else {
_appname = "unknown";
}
app_basename = _appname;
}
ostringstream message;
message << "PID: " << hex << pid << " appname is set to "
<< _appname << endl;
cverb << vmisc << message.str();
if (look_for_appname_match)
find_best_match_appname_all_mappings();
}
/* This operf_process_info object may be a parent to processes that it has forked.
* If the forked process has not done an 'exec' yet (i.e., we've not received a
* COMM event for it), then it's still a dependent process of its parent.
* If so, it will be in the parent's collection of forked processes. So,
* when adding a new mapping, we should copy that mapping to each forked
* child's operf_process_info object. Then, if samples are taken for that
* mapping for that forked process, the samples can be correctly attributed.
*/
void operf_process_info::process_mapping(struct operf_mmap * mapping, bool do_self)
{
if (!appname_valid && !is_forked()) {
if (look_for_appname_match)
check_mapping_for_appname(mapping);
else
set_appname(NULL, false);
}
set_new_mapping_recursive(mapping, do_self);
}
int operf_process_info::get_num_matching_chars(string mapped_filename, string & basename)
{
size_t app_length;
size_t basename_length;
const char * app_cstr, * basename_cstr;
string app_basename;
basename = op_basename(mapped_filename);
if (appname_is_fullname == NOT_FULLNAME) {
// This implies _appname is storing a short name from a COMM event
app_length = _appname.length();
app_cstr = _appname.c_str();
} else {
app_basename = op_basename(_appname);
app_length = app_basename.length();
app_cstr = app_basename.c_str();
}
basename_length = basename.length();
if (app_length > basename_length)
return -1;
basename_cstr = basename.c_str();
int num_matched_chars = 0;
for (size_t i = 0; i < app_length; i++) {
if (app_cstr[i] == basename_cstr[i])
num_matched_chars++;
else
break;
}
return num_matched_chars ? num_matched_chars : -1;
}
/* If we do not know the full pathname of our app yet,
* let's try to determine if the passed filename is a good
* candidate appname.
* ASSUMPTION: This function is called only when look_for_appname_match==true.
*/
void operf_process_info::check_mapping_for_appname(struct operf_mmap * mapping)
{
if (!mapping->is_anon_mapping) {
string basename;
int num_matched_chars = get_num_matching_chars(mapping->filename, basename);
if (num_matched_chars > num_app_chars_matched) {
if (num_matched_chars == app_basename.length()) {
appname_is_fullname = YES_FULLNAME;
look_for_appname_match = false;
appname_valid = true;
} else {
appname_is_fullname = MAYBE_FULLNAME;
}
_appname = mapping->filename;
app_basename = basename;
num_app_chars_matched = num_matched_chars;
cverb << vmisc << "Best appname match is " << _appname << endl;
}
}
}
void operf_process_info::find_best_match_appname_all_mappings(void)
{
map<u64, struct operf_mmap *>::iterator it;
// We may not even have a candidate shortname (from a COMM event) for the app yet
if (_appname == "unknown")
return;
it = mmappings.begin();
while (it != mmappings.end()) {
check_mapping_for_appname(it->second);
it++;
}
}
const struct operf_mmap * operf_process_info::find_mapping_for_sample(u64 sample_addr)
{
map<u64, struct operf_mmap *>::iterator it = mmappings.begin();
while (it != mmappings.end()) {
if (sample_addr >= it->second->start_addr && sample_addr <= it->second->end_addr)
return it->second;
it++;
}
return NULL;
}
/**
* Hypervisor samples cannot be attributed to any real binary, so we synthesize
* an operf_mmap object with the name of "[hypervisor_bucket]". We mark this
* mmaping as "is_anon" so that hypervisor samples are handled in the same way as
* anon samples (and vdso, heap, and stack) -- i.e., a sample file is created
* with the following pieces of information in its name:
* - [hypervisor_bucket]
* - PID
* - address range
*
* The address range part is problematic for hypervisor samples, since we don't
* know the range of sample addresses until we process all the samples. This is
* why we need to adjust the hypervisor_mmaping when we detect an ip that's
* outside of the current address range. This is also why we defer processing
* hypervisor samples the first time through the processing of sample
* data. See operf_utils::__handle_sample_event for details relating to how we
* defer processing of such samples.
*/
void operf_process_info::process_hypervisor_mapping(u64 ip)
{
bool create_new_hyperv_mmap = true;
u64 curr_start, curr_end;
map<u64, struct operf_mmap *>::iterator it;
map<u64, struct operf_mmap *>::iterator end;
curr_end = curr_start = ~0ULL;
it = mmappings.begin();
end = mmappings.end();
while (it != end) {
if (it->second->is_hypervisor) {
struct operf_mmap * _mmap = it->second;
curr_start = _mmap->start_addr;
curr_end = _mmap->end_addr;
if (curr_start > ip) {
mmappings.erase(it);
delete _mmap;
} else {
create_new_hyperv_mmap = false;
if (curr_end <= ip)
_mmap->end_addr = ip;
}
break;
}
it++;
}
if (create_new_hyperv_mmap) {
struct operf_mmap * hypervisor_mmap = new struct operf_mmap;
memset(hypervisor_mmap, 0, sizeof(struct operf_mmap));
hypervisor_mmap->start_addr = ip;
hypervisor_mmap->end_addr = ((curr_end == ~0ULL) || (curr_end < ip)) ? ip : curr_end;
strcpy(hypervisor_mmap->filename, "[hypervisor_bucket]");
hypervisor_mmap->is_anon_mapping = true;
hypervisor_mmap->pgoff = 0;
hypervisor_mmap->is_hypervisor = true;
if (cverb << vmisc) {
ostringstream message;
message << "Synthesize mmapping for " << hypervisor_mmap->filename << endl;
message << "\tstart_addr: " << hex << hypervisor_mmap->start_addr;
message << "; end addr: " << hypervisor_mmap->end_addr << endl;
cout << message.str();
}
process_mapping(hypervisor_mmap, false);
}
}
void operf_process_info::copy_mappings_to_forked_process(operf_process_info * forked_pid)
{
map<u64, struct operf_mmap *>::iterator it = mmappings.begin();
while (it != mmappings.end()) {
struct operf_mmap * mapping = it->second;
/* We can pass just the pointer of the operf_mmap object because the
* original object is created in operf_utils:__handle_mmap_event and
* is saved in the global all_images_map.
*/
forked_pid->process_mapping(mapping, true);
it++;
}
}
void operf_process_info::set_fork_info(operf_process_info * parent)
{
forked = true;
parent_of_fork = parent;
parent_of_fork->add_forked_pid_association(this);
parent_of_fork->copy_mappings_to_forked_process(this);
}
/* ASSUMPTION: This function should only be called during reprocessing phase
* since we blindly set the _appname to that of the parent. If this function
* were called from elsewhere, the parent's _appname might not yet be fully baked.
*/
void operf_process_info::connect_forked_process_to_parent(void)
{
if (cverb << vmisc)
cout << "Connecting forked proc " << pid << " to parent " << parent_of_fork << endl;
valid = true;
_appname = parent_of_fork->get_app_name();
app_basename = op_basename(_appname);
appname_valid = true;
}
void operf_process_info::remove_forked_process(pid_t forked_pid)
{
std::vector<operf_process_info *>::iterator it = forked_processes.begin();
while (it != forked_processes.end()) {
operf_process_info * p = *it;
if (p->pid == forked_pid) {
forked_processes.erase(it);
break;
}
it++;
}
}
/* See comment in operf_utils::__handle_comm_event for conditions under
* which this function is called.
*/
void operf_process_info::try_disassociate_from_parent(char * app_shortname)
{
if (parent_of_fork && (parent_of_fork->pid == this->pid))
return;
if (cverb << vmisc && parent_of_fork)
cout << "Dis-associating forked proc " << pid
<< " from parent " << parent_of_fork->pid << endl;
valid = true;
set_appname(app_shortname, false);
map<u64, struct operf_mmap *>::iterator it = mmappings.begin();
while (it != mmappings.end()) {
operf_mmap * cur = it->second;
/* mmappings from the parent may have been added to this proc info prior
* to this proc info becoming valid since we could not know at the time if
* this proc would ever be valid. But now we know it's valid (which is why
* we're dis-associating from the parent), so we remove these unnecessary
* parent mmappings.
*/
if (mmappings_from_parent[cur->start_addr]) {
mmappings_from_parent[cur->start_addr] = false;
mmappings.erase(it++);
} else {
process_mapping(cur, false);
it++;
}
}
if (parent_of_fork) {
parent_of_fork->remove_forked_process(this->pid);
parent_of_fork = NULL;
}
forked = false;
}
/* This function adds a new mapping to the current operf_process_info
* and then calls the same function on each of its forked children.
* If do_self==true, it means this function is being called by a parent
* on a forked child's operf_process_info. Then, if the mapping already
* exists, we do not set the corresponding mmappings_from_parent since we
* want to retain the knowledge that the mapping had already been added for
* this process versus from the parent. If do_self==false, it means this
* operf_process_info is the top-level parent and should set the corresponding
* mmappings_from_parent to false. The mmappings_from_parent map allows us to
* know whether to keep or discard the mapping if/when we dis-associate from
* the parent,
*/
void operf_process_info::set_new_mapping_recursive(struct operf_mmap * mapping, bool do_self)
{
if (do_self) {
map<u64, struct operf_mmap *>::iterator it = mmappings.find(mapping->start_addr);
if (it == mmappings.end())
mmappings_from_parent[mapping->start_addr] = true;
else
mmappings_from_parent[mapping->start_addr] = false;
} else {
mmappings_from_parent[mapping->start_addr] = false;
}
mmappings[mapping->start_addr] = mapping;
std::vector<operf_process_info *>::iterator it = forked_processes.begin();
while (it != forked_processes.end()) {
operf_process_info * fp = *it;
fp->set_new_mapping_recursive(mapping, true);
cverb << vmisc << "Copied new parent mapping for " << mapping->filename
<< " for forked process " << fp->pid << endl;
it++;
}
}