doq.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. package dns
  2. import (
  3. "context"
  4. "crypto/tls"
  5. "encoding/binary"
  6. "errors"
  7. "fmt"
  8. "net"
  9. "runtime"
  10. "strconv"
  11. "sync"
  12. "time"
  13. "github.com/metacubex/mihomo/component/ca"
  14. C "github.com/metacubex/mihomo/constant"
  15. "github.com/metacubex/mihomo/log"
  16. "github.com/metacubex/quic-go"
  17. D "github.com/miekg/dns"
  18. )
  19. const NextProtoDQ = "doq"
  20. const (
  21. // QUICCodeNoError is used when the connection or stream needs to be closed,
  22. // but there is no error to signal.
  23. QUICCodeNoError = quic.ApplicationErrorCode(0)
  24. // QUICCodeInternalError signals that the DoQ implementation encountered
  25. // an internal error and is incapable of pursuing the transaction or the
  26. // connection.
  27. QUICCodeInternalError = quic.ApplicationErrorCode(1)
  28. // QUICKeepAlivePeriod is the value that we pass to *quic.Config and that
  29. // controls the period with with keep-alive frames are being sent to the
  30. // connection. We set it to 20s as it would be in the quic-go@v0.27.1 with
  31. // KeepAlive field set to true This value is specified in
  32. // https://pkg.go.dev/github.com/metacubex/quic-go/internal/protocol#MaxKeepAliveInterval.
  33. //
  34. // TODO(ameshkov): Consider making it configurable.
  35. QUICKeepAlivePeriod = time.Second * 20
  36. DefaultTimeout = time.Second * 5
  37. )
  38. // dnsOverQUIC is a struct that implements the Upstream interface for the
  39. // DNS-over-QUIC protocol (spec: https://www.rfc-editor.org/rfc/rfc9250.html).
  40. type dnsOverQUIC struct {
  41. // quicConfig is the QUIC configuration that is used for establishing
  42. // connections to the upstream. This configuration includes the TokenStore
  43. // that needs to be stored for the lifetime of dnsOverQUIC since we can
  44. // re-create the connection.
  45. quicConfig *quic.Config
  46. quicConfigGuard sync.Mutex
  47. // conn is the current active QUIC connection. It can be closed and
  48. // re-opened when needed.
  49. conn quic.Connection
  50. connMu sync.RWMutex
  51. // bytesPool is a *sync.Pool we use to store byte buffers in. These byte
  52. // buffers are used to read responses from the upstream.
  53. bytesPool *sync.Pool
  54. bytesPoolGuard sync.Mutex
  55. addr string
  56. dialer *dnsDialer
  57. }
  58. // type check
  59. var _ dnsClient = (*dnsOverQUIC)(nil)
  60. // newDoQ returns the DNS-over-QUIC Upstream.
  61. func newDoQ(resolver *Resolver, addr string, proxyAdapter C.ProxyAdapter, proxyName string) (dnsClient, error) {
  62. doq := &dnsOverQUIC{
  63. addr: addr,
  64. dialer: newDNSDialer(resolver, proxyAdapter, proxyName),
  65. quicConfig: &quic.Config{
  66. KeepAlivePeriod: QUICKeepAlivePeriod,
  67. TokenStore: newQUICTokenStore(),
  68. },
  69. }
  70. runtime.SetFinalizer(doq, (*dnsOverQUIC).Close)
  71. return doq, nil
  72. }
  73. // Address implements the Upstream interface for *dnsOverQUIC.
  74. func (doq *dnsOverQUIC) Address() string { return doq.addr }
  75. func (doq *dnsOverQUIC) ExchangeContext(ctx context.Context, m *D.Msg) (msg *D.Msg, err error) {
  76. // When sending queries over a QUIC connection, the DNS Message ID MUST be
  77. // set to zero.
  78. m = m.Copy()
  79. id := m.Id
  80. m.Id = 0
  81. defer func() {
  82. // Restore the original ID to not break compatibility with proxies.
  83. m.Id = id
  84. if msg != nil {
  85. msg.Id = id
  86. }
  87. }()
  88. // Check if there was already an active conn before sending the request.
  89. // We'll only attempt to re-connect if there was one.
  90. hasConnection := doq.hasConnection()
  91. // Make the first attempt to send the DNS query.
  92. msg, err = doq.exchangeQUIC(ctx, m)
  93. // Make up to 2 attempts to re-open the QUIC connection and send the request
  94. // again. There are several cases where this workaround is necessary to
  95. // make DoQ usable. We need to make 2 attempts in the case when the
  96. // connection was closed (due to inactivity for example) AND the server
  97. // refuses to open a 0-RTT connection.
  98. for i := 0; hasConnection && doq.shouldRetry(err) && i < 2; i++ {
  99. log.Debugln("re-creating the QUIC connection and retrying due to %v", err)
  100. // Close the active connection to make sure we'll try to re-connect.
  101. doq.closeConnWithError(err)
  102. // Retry sending the request.
  103. msg, err = doq.exchangeQUIC(ctx, m)
  104. }
  105. if err != nil {
  106. // If we're unable to exchange messages, make sure the connection is
  107. // closed and signal about an internal error.
  108. doq.closeConnWithError(err)
  109. }
  110. return msg, err
  111. }
  112. // Close implements the Upstream interface for *dnsOverQUIC.
  113. func (doq *dnsOverQUIC) Close() (err error) {
  114. doq.connMu.Lock()
  115. defer doq.connMu.Unlock()
  116. runtime.SetFinalizer(doq, nil)
  117. if doq.conn != nil {
  118. err = doq.conn.CloseWithError(QUICCodeNoError, "")
  119. }
  120. return err
  121. }
  122. // exchangeQUIC attempts to open a QUIC connection, send the DNS message
  123. // through it and return the response it got from the server.
  124. func (doq *dnsOverQUIC) exchangeQUIC(ctx context.Context, msg *D.Msg) (resp *D.Msg, err error) {
  125. var conn quic.Connection
  126. conn, err = doq.getConnection(ctx, true)
  127. if err != nil {
  128. return nil, err
  129. }
  130. var buf []byte
  131. buf, err = msg.Pack()
  132. if err != nil {
  133. return nil, fmt.Errorf("failed to pack DNS message for DoQ: %w", err)
  134. }
  135. var stream quic.Stream
  136. stream, err = doq.openStream(ctx, conn)
  137. if err != nil {
  138. return nil, err
  139. }
  140. _, err = stream.Write(AddPrefix(buf))
  141. if err != nil {
  142. return nil, fmt.Errorf("failed to write to a QUIC stream: %w", err)
  143. }
  144. // The client MUST send the DNS query over the selected stream, and MUST
  145. // indicate through the STREAM FIN mechanism that no further data will
  146. // be sent on that stream. Note, that stream.Close() closes the
  147. // write-direction of the stream, but does not prevent reading from it.
  148. _ = stream.Close()
  149. return doq.readMsg(stream)
  150. }
  151. // AddPrefix adds a 2-byte prefix with the DNS message length.
  152. func AddPrefix(b []byte) (m []byte) {
  153. m = make([]byte, 2+len(b))
  154. binary.BigEndian.PutUint16(m, uint16(len(b)))
  155. copy(m[2:], b)
  156. return m
  157. }
  158. // shouldRetry checks what error we received and decides whether it is required
  159. // to re-open the connection and retry sending the request.
  160. func (doq *dnsOverQUIC) shouldRetry(err error) (ok bool) {
  161. return isQUICRetryError(err)
  162. }
  163. // getBytesPool returns (creates if needed) a pool we store byte buffers in.
  164. func (doq *dnsOverQUIC) getBytesPool() (pool *sync.Pool) {
  165. doq.bytesPoolGuard.Lock()
  166. defer doq.bytesPoolGuard.Unlock()
  167. if doq.bytesPool == nil {
  168. doq.bytesPool = &sync.Pool{
  169. New: func() interface{} {
  170. b := make([]byte, MaxMsgSize)
  171. return &b
  172. },
  173. }
  174. }
  175. return doq.bytesPool
  176. }
  177. // getConnection opens or returns an existing quic.Connection. useCached
  178. // argument controls whether we should try to use the existing cached
  179. // connection. If it is false, we will forcibly create a new connection and
  180. // close the existing one if needed.
  181. func (doq *dnsOverQUIC) getConnection(ctx context.Context, useCached bool) (quic.Connection, error) {
  182. var conn quic.Connection
  183. doq.connMu.RLock()
  184. conn = doq.conn
  185. if conn != nil && useCached {
  186. doq.connMu.RUnlock()
  187. return conn, nil
  188. }
  189. if conn != nil {
  190. // we're recreating the connection, let's create a new one.
  191. _ = conn.CloseWithError(QUICCodeNoError, "")
  192. }
  193. doq.connMu.RUnlock()
  194. doq.connMu.Lock()
  195. defer doq.connMu.Unlock()
  196. var err error
  197. conn, err = doq.openConnection(ctx)
  198. if err != nil {
  199. return nil, err
  200. }
  201. doq.conn = conn
  202. return conn, nil
  203. }
  204. // hasConnection returns true if there's an active QUIC connection.
  205. func (doq *dnsOverQUIC) hasConnection() (ok bool) {
  206. doq.connMu.Lock()
  207. defer doq.connMu.Unlock()
  208. return doq.conn != nil
  209. }
  210. // getQUICConfig returns the QUIC config in a thread-safe manner. Note, that
  211. // this method returns a pointer, it is forbidden to change its properties.
  212. func (doq *dnsOverQUIC) getQUICConfig() (c *quic.Config) {
  213. doq.quicConfigGuard.Lock()
  214. defer doq.quicConfigGuard.Unlock()
  215. return doq.quicConfig
  216. }
  217. // resetQUICConfig re-creates the tokens store as we may need to use a new one
  218. // if we failed to connect.
  219. func (doq *dnsOverQUIC) resetQUICConfig() {
  220. doq.quicConfigGuard.Lock()
  221. defer doq.quicConfigGuard.Unlock()
  222. doq.quicConfig = doq.quicConfig.Clone()
  223. doq.quicConfig.TokenStore = newQUICTokenStore()
  224. }
  225. // openStream opens a new QUIC stream for the specified connection.
  226. func (doq *dnsOverQUIC) openStream(ctx context.Context, conn quic.Connection) (quic.Stream, error) {
  227. ctx, cancel := context.WithCancel(ctx)
  228. defer cancel()
  229. stream, err := conn.OpenStreamSync(ctx)
  230. if err == nil {
  231. return stream, nil
  232. }
  233. // We can get here if the old QUIC connection is not valid anymore. We
  234. // should try to re-create the connection again in this case.
  235. newConn, err := doq.getConnection(ctx, false)
  236. if err != nil {
  237. return nil, err
  238. }
  239. // Open a new stream.
  240. return newConn.OpenStreamSync(ctx)
  241. }
  242. // openConnection opens a new QUIC connection.
  243. func (doq *dnsOverQUIC) openConnection(ctx context.Context) (conn quic.Connection, err error) {
  244. // we're using bootstrapped address instead of what's passed to the function
  245. // it does not create an actual connection, but it helps us determine
  246. // what IP is actually reachable (when there're v4/v6 addresses).
  247. rawConn, err := doq.dialer.DialContext(ctx, "udp", doq.addr)
  248. if err != nil {
  249. return nil, fmt.Errorf("failed to open a QUIC connection: %w", err)
  250. }
  251. addr := rawConn.RemoteAddr().String()
  252. // It's never actually used
  253. _ = rawConn.Close()
  254. ip, port, err := net.SplitHostPort(addr)
  255. if err != nil {
  256. return nil, err
  257. }
  258. p, err := strconv.Atoi(port)
  259. udpAddr := net.UDPAddr{IP: net.ParseIP(ip), Port: p}
  260. udp, err := doq.dialer.ListenPacket(ctx, "udp", addr)
  261. if err != nil {
  262. return nil, err
  263. }
  264. host, _, err := net.SplitHostPort(doq.addr)
  265. if err != nil {
  266. return nil, err
  267. }
  268. tlsConfig := ca.GetGlobalTLSConfig(
  269. &tls.Config{
  270. ServerName: host,
  271. InsecureSkipVerify: false,
  272. NextProtos: []string{
  273. NextProtoDQ,
  274. },
  275. SessionTicketsDisabled: false,
  276. })
  277. transport := quic.Transport{Conn: udp}
  278. transport.SetCreatedConn(true) // auto close conn
  279. transport.SetSingleUse(true) // auto close transport
  280. conn, err = transport.Dial(ctx, &udpAddr, tlsConfig, doq.getQUICConfig())
  281. if err != nil {
  282. return nil, fmt.Errorf("opening quic connection to %s: %w", doq.addr, err)
  283. }
  284. return conn, nil
  285. }
  286. // closeConnWithError closes the active connection with error to make sure that
  287. // new queries were processed in another connection. We can do that in the case
  288. // of a fatal error.
  289. func (doq *dnsOverQUIC) closeConnWithError(err error) {
  290. doq.connMu.Lock()
  291. defer doq.connMu.Unlock()
  292. if doq.conn == nil {
  293. // Do nothing, there's no active conn anyways.
  294. return
  295. }
  296. code := QUICCodeNoError
  297. if err != nil {
  298. code = QUICCodeInternalError
  299. }
  300. if errors.Is(err, quic.Err0RTTRejected) {
  301. // Reset the TokenStore only if 0-RTT was rejected.
  302. doq.resetQUICConfig()
  303. }
  304. err = doq.conn.CloseWithError(code, "")
  305. if err != nil {
  306. log.Errorln("failed to close the conn: %v", err)
  307. }
  308. doq.conn = nil
  309. }
  310. // readMsg reads the incoming DNS message from the QUIC stream.
  311. func (doq *dnsOverQUIC) readMsg(stream quic.Stream) (m *D.Msg, err error) {
  312. pool := doq.getBytesPool()
  313. bufPtr := pool.Get().(*[]byte)
  314. defer pool.Put(bufPtr)
  315. respBuf := *bufPtr
  316. n, err := stream.Read(respBuf)
  317. if err != nil && n == 0 {
  318. return nil, fmt.Errorf("reading response from %s: %w", doq.Address(), err)
  319. }
  320. // All DNS messages (queries and responses) sent over DoQ connections MUST
  321. // be encoded as a 2-octet length field followed by the message content as
  322. // specified in [RFC1035].
  323. // IMPORTANT: Note, that we ignore this prefix here as this implementation
  324. // does not support receiving multiple messages over a single connection.
  325. m = new(D.Msg)
  326. err = m.Unpack(respBuf[2:])
  327. if err != nil {
  328. return nil, fmt.Errorf("unpacking response from %s: %w", doq.Address(), err)
  329. }
  330. return m, nil
  331. }
  332. // newQUICTokenStore creates a new quic.TokenStore that is necessary to have
  333. // in order to benefit from 0-RTT.
  334. func newQUICTokenStore() (s quic.TokenStore) {
  335. // You can read more on address validation here:
  336. // https://datatracker.ietf.org/doc/html/rfc9000#section-8.1
  337. // Setting maxOrigins to 1 and tokensPerOrigin to 10 assuming that this is
  338. // more than enough for the way we use it (one connection per upstream).
  339. return quic.NewLRUTokenStore(1, 10)
  340. }
  341. // isQUICRetryError checks the error and determines whether it may signal that
  342. // we should re-create the QUIC connection. This requirement is caused by
  343. // quic-go issues, see the comments inside this function.
  344. // TODO(ameshkov): re-test when updating quic-go.
  345. func isQUICRetryError(err error) (ok bool) {
  346. var qAppErr *quic.ApplicationError
  347. if errors.As(err, &qAppErr) && qAppErr.ErrorCode == 0 {
  348. // This error is often returned when the server has been restarted,
  349. // and we try to use the same connection on the client-side. It seems,
  350. // that the old connections aren't closed immediately on the server-side
  351. // and that's why one can run into this.
  352. // In addition to that, quic-go HTTP3 client implementation does not
  353. // clean up dead connections (this one is specific to DoH3 upstream):
  354. // https://github.com/metacubex/quic-go/issues/765
  355. return true
  356. }
  357. var qIdleErr *quic.IdleTimeoutError
  358. if errors.As(err, &qIdleErr) {
  359. // This error means that the connection was closed due to being idle.
  360. // In this case we should forcibly re-create the QUIC connection.
  361. // Reproducing is rather simple, stop the server and wait for 30 seconds
  362. // then try to send another request via the same upstream.
  363. return true
  364. }
  365. var resetErr *quic.StatelessResetError
  366. if errors.As(err, &resetErr) {
  367. // A stateless reset is sent when a server receives a QUIC packet that
  368. // it doesn't know how to decrypt. For instance, it may happen when
  369. // the server was recently rebooted. We should reconnect and try again
  370. // in this case.
  371. return true
  372. }
  373. var qTransportError *quic.TransportError
  374. if errors.As(err, &qTransportError) && qTransportError.ErrorCode == quic.NoError {
  375. // A transport error with the NO_ERROR error code could be sent by the
  376. // server when it considers that it's time to close the connection.
  377. // For example, Google DNS eventually closes an active connection with
  378. // the NO_ERROR code and "Connection max age expired" message:
  379. // https://github.com/AdguardTeam/dnsproxy/issues/283
  380. return true
  381. }
  382. if errors.Is(err, quic.Err0RTTRejected) {
  383. // This error happens when we try to establish a 0-RTT connection with
  384. // a token the server is no more aware of. This can be reproduced by
  385. // restarting the QUIC server (it will clear its tokens cache). The
  386. // next connection attempt will return this error until the client's
  387. // tokens cache is purged.
  388. return true
  389. }
  390. return false
  391. }