@@ -14,6 +14,14 @@ limitations under the License.
1414==============================================================================*/
1515
1616#include " tensorflow/tsl/platform/cloud/gcs_dns_cache.h"
17+
18+ #include < cstring>
19+
20+ #include " absl/status/status.h"
21+ #include " absl/strings/str_cat.h"
22+ #include " tensorflow/tsl/platform/errors.h"
23+ #include " tensorflow/tsl/platform/retrying_utils.h"
24+ #include " tensorflow/tsl/platform/status.h"
1725#ifndef _WIN32
1826#include < arpa/inet.h>
1927#include < netdb.h>
@@ -33,19 +41,9 @@ namespace {
3341const std::vector<string>& kCachedDomainNames =
3442 *new std::vector<string>{" www.googleapis.com" , " storage.googleapis.com" };
3543
36- inline void print_getaddrinfo_error (const string& name, int error_code) {
37- #ifndef _WIN32
38- if (error_code == EAI_SYSTEM) {
39- LOG (ERROR) << " Error resolving " << name
40- << " (EAI_SYSTEM): " << strerror (errno);
41- } else {
42- LOG (ERROR) << " Error resolving " << name << " : "
43- << gai_strerror (error_code);
44- }
45- #else
46- // TODO:WSAGetLastError is better than gai_strerror
47- LOG (ERROR) << " Error resolving " << name << " : " << gai_strerror (error_code);
48- #endif
44+ inline void print_getaddrinfo_error (const string& name, Status return_status) {
45+ // Status doesn't map well to EAI type errors.
46+ LOG (ERROR) << " Error resolving " << name << " : " << return_status;
4947}
5048
5149// Selects one item at random from a vector of items, using a uniform
@@ -101,10 +99,88 @@ void GcsDnsCache::AnnotateRequest(HttpRequest* request) {
10199 hints.ai_family = AF_INET; // Only use IPv4 for now.
102100 hints.ai_socktype = SOCK_STREAM;
103101 addrinfo* result = nullptr ;
104- int return_code = getaddrinfo (name.c_str (), nullptr , &hints, &result);
102+ RetryConfig retryConfig (
103+ /* init_delay_time_us = */ 5000 ,
104+ /* max_delay_time_us = */ 50 * 1000 * 5000 ,
105+ /* max_retries = */ 5 );
106+
107+ const Status getaddrinfo_status = RetryingUtils::CallWithRetries (
108+ [&name, &hints, &result]() {
109+ int return_code = getaddrinfo (name.c_str (), nullptr , &hints, &result);
110+ absl::Status return_status;
111+ switch (return_code) {
112+ case 0 :
113+ return_status = OkStatus ();
114+ break ;
115+ #ifndef _WIN32
116+ case EAI_ADDRFAMILY:
117+ case EAI_SERVICE:
118+ case EAI_SOCKTYPE:
119+ case EAI_NONAME:
120+ return_status = absl::FailedPreconditionError (
121+ absl::StrCat (" System in invalid state for getaddrinfo call: " ,
122+ gai_strerror (return_code)));
123+ break ;
124+ case EAI_AGAIN:
125+ case EAI_NODATA: // lump nodata in here - the domains being resolved
126+ // should always have data
127+ return_status = absl::UnavailableError (absl::StrCat (
128+ " Resolving " , name, " is temporarily unavailable" ));
129+ break ;
130+ case EAI_BADFLAGS:
131+ case EAI_FAMILY:
132+ return_status = absl::InvalidArgumentError (absl::StrCat (
133+ " Bad arguments for getaddrinfo: " , gai_strerror (return_code)));
134+ break ;
135+ case EAI_FAIL:
136+ return_status = absl::NotFoundError (
137+ absl::StrCat (" Permanent failure resolving " , name, " : " ,
138+ gai_strerror (return_code)));
139+ break ;
140+ case EAI_MEMORY:
141+ return_status = absl::ResourceExhaustedError (" Out of memory" );
142+ break ;
143+ case EAI_SYSTEM:
144+ default :
145+ return_status = absl::UnknownError (strerror (return_code));
146+ #else
147+ // mapping from
148+ // https://learn.microsoft.com/en-us/windows/win32/api/ws2tcpip/nf-ws2tcpip-getaddrinfo#return-value
149+ case WSATYPE_NOT_FOUND:
150+ case WSAESOCKTNOSUPPORT:
151+ case WSAHOST_NOT_FOUND:
152+ return_status = absl::FailedPreconditionError (
153+ absl::StrCat (" System in invalid state for getaddrinfo call: " ,
154+ gai_strerror (return_code)));
155+ break ;
156+ case WSATRY_AGAIN:
157+ return_status = absl::UnavailableError (absl::StrCat (
158+ " Resolving " , name, " is temporarily unavailable" ));
159+ break ;
160+ case WSAEINVAL:
161+ case WSAEAFNOSUPPORT:
162+ return_status = absl::InvalidArgumentError (absl::StrCat (
163+ " Bad arguments for getaddrinfo: " , gai_strerror (return_code)));
164+ break ;
165+ case WSANO_RECOVERY:
166+ return_status = absl::NotFoundError (
167+ absl::StrCat (" Permanent failure resolving " , name, " : " ,
168+ gai_strerror (return_code)));
169+ break ;
170+ case WSA_NOT_ENOUGH_MEMORY:
171+ return_status = absl::ResourceExhaustedError (" Out of memory" );
172+ break ;
173+ default :
174+ return_status = absl::UnknownError (strerror (return_code));
175+ #endif
176+ }
177+
178+ return Status (return_status);
179+ },
180+ retryConfig);
105181
106182 std::vector<string> output;
107- if (return_code == 0 ) {
183+ if (getaddrinfo_status. ok () ) {
108184 for (const addrinfo* i = result; i != nullptr ; i = i->ai_next ) {
109185 if (i->ai_family != AF_INET || i->ai_addr ->sa_family != AF_INET) {
110186 LOG (WARNING) << " Non-IPv4 address returned. ai_family: " << i->ai_family
@@ -125,7 +201,7 @@ void GcsDnsCache::AnnotateRequest(HttpRequest* request) {
125201 }
126202 }
127203 } else {
128- print_getaddrinfo_error (name, return_code );
204+ print_getaddrinfo_error (name, getaddrinfo_status );
129205 }
130206 if (result != nullptr ) {
131207 freeaddrinfo (result);
0 commit comments