Revert "Fix getStats() freeze bug affecting Chromium but not WebRTC standalone."

This reverts commit 05d43c6f7f.

Reason for revert: It breaks some Chromium trybots:
https://ci.chromium.org/p/chromium/builders/luci.chromium.try/linux_chromium_asan_rel_ng/206387
https://ci.chromium.org/p/chromium/builders/luci.chromium.try/linux_chromium_tsan_rel_ng/207737
https://ci.chromium.org/p/chromium/builders/luci.chromium.try/win10_chromium_x64_rel_ng/202283

Original change's description:
> Fix getStats() freeze bug affecting Chromium but not WebRTC standalone.
> 
> PeerConnection::Close() is, per-spec, a blocking operation.
> Unfortunately, PeerConnection is implemented to own resources used by
> the network thread, and Close() - on the signaling thread - destroys
> these resources. As such, tasks run in parallel like getStats() get into
> race conditions with Close() unless synchronized. The mechanism in-place
> is RTCStatsCollector::WaitForPendingRequest(), it waits until the
> network thread is done with the in-parallel stats request.
> 
> Prior to this CL, this was implemented by performing
> rtc::Thread::ProcessMessages() in a loop until the network thread had
> posted a task on the signaling thread to say that it was done which
> would then get processed by ProcessMessages(). In WebRTC this works, and
> the test is RTCStatsIntegrationTest.GetsStatsWhileClosingPeerConnection.
> 
> But because Chromium's thread wrapper does no support
> ProcessMessages(), calling getStats() followed by close() in Chrome
> resulted in waiting forever (https://crbug.com/850907).
> 
> In this CL, the process messages loop is removed. Instead, the shared
> resources are guarded by an rtc::Event. WaitForPendingRequest() still
> blocks the signaling thread, but only while shared resources are in use
> by the network thread. After this CL, calling WaitForPendingRequest() no
> longer has any unexpected side-effects since it no longer processes
> other messages that might have been posted on the thread.
> 
> The resource ownership and threading model of WebRTC deserves to be
> revisited, but this fixes a common Chromium crash without redesigning
> PeerConnection, in a way that does not cause more blocking than what
> the other PeerConnection methods are already doing.
> 
> Note: An alternative to using rtc::Event is to use resource locks and
> to not perform the stats collection on the network thread if the
> request was cancelled before the start of processing, but this has very
> little benefit in terms of performance: once the network thread starts
> collecting the stats, it would use the lock until collection is
> completed, blocking the signaling thread trying to acquire that lock
> anyway. This defeats the purpose and is a riskier change, since
> cancelling partial collection in this inherently racy edge-case would
> have observable differences from the returned stats, which may cause
> more regressions.
> 
> Bug: chromium:850907
> Change-Id: Idceeee0bddc0c9d5518b58a2b263abb2bbf47cff
> Reviewed-on: https://webrtc-review.googlesource.com/c/121567
> Commit-Queue: Henrik Boström <hbos@webrtc.org>
> Reviewed-by: Steve Anton <steveanton@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#26707}

TBR=steveanton@webrtc.org,hbos@webrtc.org

Change-Id: Icd82cdd5bd086a90999f7fd5f8616e1f2d2153bf
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: chromium:850907
Reviewed-on: https://webrtc-review.googlesource.com/c/123225
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26721}
This commit is contained in:
Mirko Bonadei 2019-02-15 21:10:40 +00:00 committed by Commit Bot
parent ca3c8017e5
commit ca890ee582
4 changed files with 86 additions and 143 deletions

View file

@ -217,7 +217,6 @@ rtc_static_library("peerconnection") {
"../rtc_base:checks",
"../rtc_base:rtc_base",
"../rtc_base:rtc_base_approved",
"../rtc_base:rtc_post_message_with_functor",
"../rtc_base/system:rtc_export",
"../rtc_base/third_party/base64",
"../rtc_base/third_party/sigslot",

View file

@ -25,7 +25,6 @@
#include "pc/peer_connection.h"
#include "pc/rtc_stats_traversal.h"
#include "rtc_base/checks.h"
#include "rtc_base/post_message_with_functor.h"
#include "rtc_base/strings/string_builder.h"
#include "rtc_base/time_utils.h"
#include "rtc_base/trace_event.h"
@ -754,8 +753,6 @@ RTCStatsCollector::RTCStatsCollector(PeerConnectionInternal* pc,
network_thread_(pc->network_thread()),
num_pending_partial_reports_(0),
partial_report_timestamp_us_(0),
network_report_event_(true /* manual_reset */,
true /* initially_signaled */),
cache_timestamp_us_(0),
cache_lifetime_us_(cache_lifetime_us) {
RTC_DCHECK(pc_);
@ -802,7 +799,7 @@ void RTCStatsCollector::GetStatsReportInternal(
// reentrancy problems.
std::vector<RequestInfo> requests;
requests.swap(requests_);
rtc::PostMessageWithFunctor(
invoker_.AsyncInvoke<void>(
RTC_FROM_HERE, signaling_thread_,
rtc::Bind(&RTCStatsCollector::DeliverCachedReport, this, cached_report_,
std::move(requests)));
@ -833,14 +830,10 @@ void RTCStatsCollector::GetStatsReportInternal(
// network thread, where it more naturally belongs.
call_stats_ = pc_->GetCallStats();
// Don't touch |network_report_| on the signaling thread until
// ProducePartialResultsOnNetworkThread() has signaled the
// |network_report_event_|.
network_report_event_.Reset();
rtc::PostMessageWithFunctor(
invoker_.AsyncInvoke<void>(
RTC_FROM_HERE, network_thread_,
rtc::Bind(&RTCStatsCollector::ProducePartialResultsOnNetworkThread,
this, timestamp_us));
rtc::scoped_refptr<RTCStatsCollector>(this), timestamp_us));
ProducePartialResultsOnSignalingThread(timestamp_us);
}
}
@ -852,117 +845,89 @@ void RTCStatsCollector::ClearCachedStatsReport() {
void RTCStatsCollector::WaitForPendingRequest() {
RTC_DCHECK(signaling_thread_->IsCurrent());
// If a request is pending, blocks until the |network_report_event_| is
// signaled and then delivers the result. Otherwise this is a NO-OP.
MergeNetworkReport_s();
if (num_pending_partial_reports_) {
rtc::Thread::Current()->ProcessMessages(0);
while (num_pending_partial_reports_) {
rtc::Thread::Current()->SleepMs(1);
rtc::Thread::Current()->ProcessMessages(0);
}
}
}
void RTCStatsCollector::ProducePartialResultsOnSignalingThread(
int64_t timestamp_us) {
RTC_DCHECK(signaling_thread_->IsCurrent());
partial_report_ = RTCStatsReport::Create(timestamp_us);
rtc::scoped_refptr<RTCStatsReport> report = RTCStatsReport::Create(
timestamp_us);
ProducePartialResultsOnSignalingThreadImpl(timestamp_us,
partial_report_.get());
ProduceDataChannelStats_s(timestamp_us, report.get());
ProduceMediaStreamStats_s(timestamp_us, report.get());
ProduceMediaStreamTrackStats_s(timestamp_us, report.get());
ProducePeerConnectionStats_s(timestamp_us, report.get());
// ProducePartialResultsOnSignalingThread() is running synchronously on the
// signaling thread, so it is always the first partial result delivered on the
// signaling thread. The request is not complete until MergeNetworkReport_s()
// happens; we don't have to do anything here.
RTC_DCHECK_GT(num_pending_partial_reports_, 1);
--num_pending_partial_reports_;
}
void RTCStatsCollector::ProducePartialResultsOnSignalingThreadImpl(
int64_t timestamp_us,
RTCStatsReport* partial_report) {
RTC_DCHECK(signaling_thread_->IsCurrent());
ProduceDataChannelStats_s(timestamp_us, partial_report);
ProduceMediaStreamStats_s(timestamp_us, partial_report);
ProduceMediaStreamTrackStats_s(timestamp_us, partial_report);
ProducePeerConnectionStats_s(timestamp_us, partial_report);
AddPartialResults(report);
}
void RTCStatsCollector::ProducePartialResultsOnNetworkThread(
int64_t timestamp_us) {
RTC_DCHECK(network_thread_->IsCurrent());
// Touching |network_report_| on this thread is safe by this method because
// |network_report_event_| is reset before this method is invoked.
network_report_ = RTCStatsReport::Create(timestamp_us);
rtc::scoped_refptr<RTCStatsReport> report = RTCStatsReport::Create(
timestamp_us);
std::map<std::string, cricket::TransportStats> transport_stats_by_name =
pc_->GetTransportStatsByNames(transport_names_);
std::map<std::string, CertificateStatsPair> transport_cert_stats =
PrepareTransportCertificateStats_n(transport_stats_by_name);
ProducePartialResultsOnNetworkThreadImpl(
timestamp_us, transport_stats_by_name, transport_cert_stats,
network_report_.get());
// Signal that it is now safe to touch |network_report_| on the signaling
// thread, and post a task to merge it into the final results.
network_report_event_.Set();
rtc::PostMessageWithFunctor(
RTC_FROM_HERE, signaling_thread_,
rtc::Bind(&RTCStatsCollector::MergeNetworkReport_s, this));
}
void RTCStatsCollector::ProducePartialResultsOnNetworkThreadImpl(
int64_t timestamp_us,
const std::map<std::string, cricket::TransportStats>&
transport_stats_by_name,
const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
RTCStatsReport* partial_report) {
RTC_DCHECK(network_thread_->IsCurrent());
ProduceCertificateStats_n(timestamp_us, transport_cert_stats, partial_report);
ProduceCodecStats_n(timestamp_us, transceiver_stats_infos_, partial_report);
ProduceCertificateStats_n(timestamp_us, transport_cert_stats, report.get());
ProduceCodecStats_n(timestamp_us, transceiver_stats_infos_, report.get());
ProduceIceCandidateAndPairStats_n(timestamp_us, transport_stats_by_name,
call_stats_, partial_report);
ProduceRTPStreamStats_n(timestamp_us, transceiver_stats_infos_,
partial_report);
call_stats_, report.get());
ProduceRTPStreamStats_n(timestamp_us, transceiver_stats_infos_, report.get());
ProduceTransportStats_n(timestamp_us, transport_stats_by_name,
transport_cert_stats, partial_report);
transport_cert_stats, report.get());
AddPartialResults(report);
}
void RTCStatsCollector::MergeNetworkReport_s() {
RTC_DCHECK(signaling_thread_->IsCurrent());
// The |network_report_event_| must be signaled for it to be safe to touch
// |network_report_|. This is normally not blocking, but if
// WaitForPendingRequest() is called while a request is pending, we might have
// to wait until the network thread is done touching |network_report_|.
network_report_event_.Wait(rtc::Event::kForever);
if (!network_report_) {
// Normally, MergeNetworkReport_s() is executed because it is posted from
// the network thread. But if WaitForPendingRequest() is called while a
// request is pending, an early call to MergeNetworkReport_s() is made,
// merging the report and setting |network_report_| to null. If so, when the
// previously posted MergeNetworkReport_s() is later executed, the report is
// already null and nothing needs to be done here.
void RTCStatsCollector::AddPartialResults(
const rtc::scoped_refptr<RTCStatsReport>& partial_report) {
if (!signaling_thread_->IsCurrent()) {
invoker_.AsyncInvoke<void>(RTC_FROM_HERE, signaling_thread_,
rtc::Bind(&RTCStatsCollector::AddPartialResults_s,
rtc::scoped_refptr<RTCStatsCollector>(this),
partial_report));
return;
}
RTC_DCHECK_GT(num_pending_partial_reports_, 0);
RTC_DCHECK(partial_report_);
partial_report_->TakeMembersFrom(network_report_);
network_report_ = nullptr;
--num_pending_partial_reports_;
// |network_report_| is currently the only partial report collected
// asynchronously, so |num_pending_partial_reports_| must now be 0 and we are
// ready to deliver the result.
RTC_DCHECK_EQ(num_pending_partial_reports_, 0);
cache_timestamp_us_ = partial_report_timestamp_us_;
cached_report_ = partial_report_;
partial_report_ = nullptr;
transceiver_stats_infos_.clear();
// Trace WebRTC Stats when getStats is called on Javascript.
// This allows access to WebRTC stats from trace logs. To enable them,
// select the "webrtc_stats" category when recording traces.
TRACE_EVENT_INSTANT1("webrtc_stats", "webrtc_stats", "report",
cached_report_->ToJson());
AddPartialResults_s(partial_report);
}
// Deliver report and clear |requests_|.
std::vector<RequestInfo> requests;
requests.swap(requests_);
DeliverCachedReport(cached_report_, std::move(requests));
void RTCStatsCollector::AddPartialResults_s(
rtc::scoped_refptr<RTCStatsReport> partial_report) {
RTC_DCHECK(signaling_thread_->IsCurrent());
RTC_DCHECK_GT(num_pending_partial_reports_, 0);
if (!partial_report_)
partial_report_ = partial_report;
else
partial_report_->TakeMembersFrom(partial_report);
--num_pending_partial_reports_;
if (!num_pending_partial_reports_) {
cache_timestamp_us_ = partial_report_timestamp_us_;
cached_report_ = partial_report_;
partial_report_ = nullptr;
transceiver_stats_infos_.clear();
// Trace WebRTC Stats when getStats is called on Javascript.
// This allows access to WebRTC stats from trace logs. To enable them,
// select the "webrtc_stats" category when recording traces.
TRACE_EVENT_INSTANT1("webrtc_stats", "webrtc_stats", "report",
cached_report_->ToJson());
// Deliver report and clear |requests_|.
std::vector<RequestInfo> requests;
requests.swap(requests_);
DeliverCachedReport(cached_report_, std::move(requests));
}
}
void RTCStatsCollector::DeliverCachedReport(

View file

@ -27,7 +27,7 @@
#include "pc/data_channel.h"
#include "pc/peer_connection_internal.h"
#include "pc/track_media_info_map.h"
#include "rtc_base/event.h"
#include "rtc_base/async_invoker.h"
#include "rtc_base/ref_count.h"
#include "rtc_base/ssl_identity.h"
#include "rtc_base/third_party/sigslot/sigslot.h"
@ -77,21 +77,14 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
RTCStatsCollector(PeerConnectionInternal* pc, int64_t cache_lifetime_us);
~RTCStatsCollector();
struct CertificateStatsPair {
std::unique_ptr<rtc::SSLCertificateStats> local;
std::unique_ptr<rtc::SSLCertificateStats> remote;
};
// Stats gathering on a particular thread. Calls |AddPartialResults| before
// returning. Virtual for the sake of testing.
virtual void ProducePartialResultsOnSignalingThread(int64_t timestamp_us);
virtual void ProducePartialResultsOnNetworkThread(int64_t timestamp_us);
// Stats gathering on a particular thread. Virtual for the sake of testing.
virtual void ProducePartialResultsOnSignalingThreadImpl(
int64_t timestamp_us,
RTCStatsReport* partial_report);
virtual void ProducePartialResultsOnNetworkThreadImpl(
int64_t timestamp_us,
const std::map<std::string, cricket::TransportStats>&
transport_stats_by_name,
const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
RTCStatsReport* partial_report);
// Can be called on any thread.
void AddPartialResults(
const rtc::scoped_refptr<RTCStatsReport>& partial_report);
private:
class RequestInfo {
@ -137,6 +130,11 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
void GetStatsReportInternal(RequestInfo request);
struct CertificateStatsPair {
std::unique_ptr<rtc::SSLCertificateStats> local;
std::unique_ptr<rtc::SSLCertificateStats> remote;
};
// Structure for tracking stats about each RtpTransceiver managed by the
// PeerConnection. This can either by a Plan B style or Unified Plan style
// transceiver (i.e., can have 0 or many senders and receivers).
@ -152,6 +150,7 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
std::unique_ptr<TrackMediaInfoMap> track_media_info_map;
};
void AddPartialResults_s(rtc::scoped_refptr<RTCStatsReport> partial_report);
void DeliverCachedReport(
rtc::scoped_refptr<const RTCStatsReport> cached_report,
std::vector<RequestInfo> requests);
@ -212,13 +211,6 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
std::vector<RtpTransceiverStatsInfo> PrepareTransceiverStatsInfos_s() const;
std::set<std::string> PrepareTransportNames_s() const;
// Stats gathering on a particular thread.
void ProducePartialResultsOnSignalingThread(int64_t timestamp_us);
void ProducePartialResultsOnNetworkThread(int64_t timestamp_us);
// Merges |network_report_| into |partial_report_| and completes the request.
// This is a NO-OP if |network_report_| is null.
void MergeNetworkReport_s();
// Slots for signals (sigslot) that are wired up to |pc_|.
void OnDataChannelCreated(DataChannel* channel);
// Slots for signals (sigslot) that are wired up to |channel|.
@ -229,24 +221,12 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
rtc::Thread* const signaling_thread_;
rtc::Thread* const worker_thread_;
rtc::Thread* const network_thread_;
rtc::AsyncInvoker invoker_;
int num_pending_partial_reports_;
int64_t partial_report_timestamp_us_;
// Reports that are produced on the signaling thread or the network thread are
// merged into this report. It is only touched on the signaling thread. Once
// all partial reports are merged this is the result of a request.
rtc::scoped_refptr<RTCStatsReport> partial_report_;
std::vector<RequestInfo> requests_;
// Holds the result of ProducePartialResultsOnNetworkThread(). It is merged
// into |partial_report_| on the signaling thread and then nulled by
// MergeNetworkReport_s(). Thread-safety is ensured by using
// |network_report_event_|.
rtc::scoped_refptr<RTCStatsReport> network_report_;
// If set, it is safe to touch the |network_report_| on the signaling thread.
// This is reset before async-invoking ProducePartialResultsOnNetworkThread()
// and set when ProducePartialResultsOnNetworkThread() is complete, after it
// has updated the value of |network_report_|.
rtc::Event network_report_event_;
// Set in |GetStatsReport|, read in |ProducePartialResultsOnNetworkThread| and
// |ProducePartialResultsOnSignalingThread|, reset after work is complete. Not

View file

@ -2249,9 +2249,7 @@ class FakeRTCStatsCollector : public RTCStatsCollector,
worker_thread_(pc->worker_thread()),
network_thread_(pc->network_thread()) {}
void ProducePartialResultsOnSignalingThreadImpl(
int64_t timestamp_us,
RTCStatsReport* partial_report) override {
void ProducePartialResultsOnSignalingThread(int64_t timestamp_us) override {
EXPECT_TRUE(signaling_thread_->IsCurrent());
{
rtc::CritScope cs(&lock_);
@ -2259,15 +2257,13 @@ class FakeRTCStatsCollector : public RTCStatsCollector,
++produced_on_signaling_thread_;
}
partial_report->AddStats(std::unique_ptr<const RTCStats>(
rtc::scoped_refptr<RTCStatsReport> signaling_report =
RTCStatsReport::Create(0);
signaling_report->AddStats(std::unique_ptr<const RTCStats>(
new RTCTestStats("SignalingThreadStats", timestamp_us)));
AddPartialResults(signaling_report);
}
void ProducePartialResultsOnNetworkThreadImpl(
int64_t timestamp_us,
const std::map<std::string, cricket::TransportStats>&
transport_stats_by_name,
const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
RTCStatsReport* partial_report) override {
void ProducePartialResultsOnNetworkThread(int64_t timestamp_us) override {
EXPECT_TRUE(network_thread_->IsCurrent());
{
rtc::CritScope cs(&lock_);
@ -2275,8 +2271,11 @@ class FakeRTCStatsCollector : public RTCStatsCollector,
++produced_on_network_thread_;
}
partial_report->AddStats(std::unique_ptr<const RTCStats>(
rtc::scoped_refptr<RTCStatsReport> network_report =
RTCStatsReport::Create(0);
network_report->AddStats(std::unique_ptr<const RTCStats>(
new RTCTestStats("NetworkThreadStats", timestamp_us)));
AddPartialResults(network_report);
}
private: