diff options
| author | Markus Pettersson <markus.pettersson@mullvad.net> | 2025-05-06 11:28:19 +0200 |
|---|---|---|
| committer | Joakim Hulthe <joakim.hulthe@mullvad.net> | 2025-05-14 18:00:24 +0200 |
| commit | 5997e11b2eb109baad9cec317e232807bb9f92b8 (patch) | |
| tree | 8751e323ed8c0de9e7f3fad506b39cef57645505 | |
| parent | d17158a3384e3fd2aa7f2e06ff6aa5bfb728eb66 (diff) | |
| download | mullvadvpn-5997e11b2eb109baad9cec317e232807bb9f92b8.tar.xz mullvadvpn-5997e11b2eb109baad9cec317e232807bb9f92b8.zip | |
Use local DNS resolver on macOS by default
- Enable the local dns resolver in [talpid_core::resolver] by default
on macOS.
- Change the local dns resolver to not run on a non-standard port.
To avoid conflicting with any other service running on 127.0.0.1:53,
we instead try to bind to a random address in the 127/24 range. This
requires configuring an IP address "alias" on the `lo0` network
device, but should be fairly non-invasive. The alias is removed when
the daemon is shut down.
- Add env var `TALPID_DISABLE_LOCAL_DNS_RESOLVER` which lets you
disable the local resolver.
Co-authored-by: Joakim Hulthe <joakim.hulthe@mullvad.net>
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | talpid-core/src/resolver.rs | 251 | ||||
| -rw-r--r-- | talpid-core/src/tunnel_state_machine/connected_state.rs | 6 | ||||
| -rw-r--r-- | talpid-core/src/tunnel_state_machine/connecting_state.rs | 6 | ||||
| -rw-r--r-- | talpid-core/src/tunnel_state_machine/disconnected_state.rs | 8 | ||||
| -rw-r--r-- | talpid-core/src/tunnel_state_machine/error_state.rs | 8 | ||||
| -rw-r--r-- | talpid-types/src/drop_guard.rs | 39 | ||||
| -rw-r--r-- | talpid-types/src/lib.rs | 2 |
8 files changed, 223 insertions, 99 deletions
@@ -165,6 +165,8 @@ See [this](Release.md) for instructions on how to make a new release. * `netsh`: use the `netsh` program * `tcpip`: set TCP/IP parameters in the registry +- `TALPID_DISABLE_LOCAL_DNS_RESOLVER` - Set this variable to `1` to disable the local DNS resolver (macOS only). + * `TALPID_FORCE_USERSPACE_WIREGUARD` - Forces the daemon to use the userspace implementation of WireGuard on Linux. diff --git a/talpid-core/src/resolver.rs b/talpid-core/src/resolver.rs index 77b416fb95..15b018c3df 100644 --- a/talpid-core/src/resolver.rs +++ b/talpid-core/src/resolver.rs @@ -41,7 +41,14 @@ use hickory_server::{ server::{Request, RequestHandler, ResponseHandler, ResponseInfo}, ServerFuture, }; +use rand::random; use std::sync::LazyLock; +use talpid_types::drop_guard::{on_drop, OnDrop}; +use tokio::{ + net::{self, UdpSocket}, + process::Command, + task::JoinHandle, +}; /// If a local DNS resolver should be used at all times. /// @@ -49,34 +56,27 @@ use std::sync::LazyLock; /// the local DNS resoler to work around Apple's captive portals check. Exactly how this is done is /// documented elsewhere. pub static LOCAL_DNS_RESOLVER: LazyLock<bool> = LazyLock::new(|| { - use talpid_platform_metadata::MacosVersion; - let version = MacosVersion::new().expect("Could not detect macOS version"); - let v = |s| MacosVersion::from_raw_version(s).unwrap(); - // Apple services tried to perform DNS lookups on the physical interface on some macOS - // versions, so we added redirect rules to always redirect DNS to our local DNS resolver. - // This seems to break some apps which do not like that we redirect DNS on port 53 to our local - // DNS resolver running on some other, arbitrary port, and so we disable this behaviour on - // macOS versions that are unaffected by this naughty bug. - // - // The workaround should only be applied to the affected macOS versions because some programs - // set the `skip filtering` pf flag on loopback, which meant that the pf filtering would break - // unexpectedly. We could clear the `skip filtering` flag to force pf filtering on loopback, - // but apparently it is good practice to enable `skip filtering` on loopback so we decided - // against this. Source: https://www.openbsd.org/faq/pf/filter.html - // - // It should be noted that most programs still works fine with this workaround enabled. Notably - // programs that use `getaddrinfo` would behave correctly when we redirect DNS to our local - // resolver, while some programs always used port 53 no matter what (nslookup for example). - // Also, most programs don't set the `skip filtering` pf flag on loopback, but some notable - // ones do for some reason. Orbstack is one such example, which meant that people running - // containers would run into the aforementioned issue. - let use_local_dns_resolver = v("14.6") <= version && version < v("15.1"); - if use_local_dns_resolver { + let disable_local_dns_resolver = std::env::var("TALPID_DISABLE_LOCAL_DNS_RESOLVER") + .map(|v| v != "0") + // Use the local DNS resolver by default. + .unwrap_or(false); + + if !disable_local_dns_resolver { log::debug!("Using local DNS resolver"); } - use_local_dns_resolver + !disable_local_dns_resolver }); +// Name of the loopback network device. +const LOOPBACK: &str = "lo0"; + +/// The port we should bind the local DNS resolver to. +const DNS_PORT: u16 = if cfg!(test) { + 1053 // use a value above 1000 to allow for running the tests without root privileges +} else { + 53 +}; + const ALLOWED_RECORD_TYPES: &[RecordType] = &[RecordType::A, RecordType::CNAME]; const CAPTIVE_PORTAL_DOMAINS: &[&str] = &["captive.apple.com", "netcts.cdn-apple.com"]; @@ -105,19 +105,20 @@ pub async fn start_resolver() -> Result<ResolverHandle, Error> { pub enum Error { /// Failed to bind UDP socket #[error("Failed to bind UDP socket")] - UdpBindError(#[source] io::Error), + UdpBind, /// Failed to get local address of a bound UDP socket #[error("Failed to get local address of a bound UDP socket")] - GetSocketAddrError(#[source] io::Error), + GetSocketAddr(#[source] io::Error), } /// A DNS resolver that forwards queries to some other DNS server /// /// Is controlled by commands sent through [ResolverHandle]s. +/// When all [ResolverHandle]s are dropped, [Self::rx] will close and [Self::run] will exit. struct LocalResolver { rx: mpsc::UnboundedReceiver<ResolverMessage>, - dns_server: Option<(tokio::task::JoinHandle<()>, oneshot::Receiver<()>)>, + dns_server_task: JoinHandle<()>, inner_resolver: Resolver, } @@ -171,7 +172,7 @@ impl From<Config> for Resolver { dns_servers.retain(|addr| !addr.is_loopback()); let forward_server_config = - NameServerConfigGroup::from_ips_clear(dns_servers, 53, true); + NameServerConfigGroup::from_ips_clear(dns_servers, DNS_PORT, true); let forward_config = ResolverConfig::from_parts(None, vec![], forward_server_config); @@ -259,17 +260,17 @@ impl Resolver { #[derive(Clone)] pub struct ResolverHandle { tx: Arc<mpsc::UnboundedSender<ResolverMessage>>, - listening_port: u16, + listening_addr: SocketAddr, } impl ResolverHandle { - fn new(tx: Arc<mpsc::UnboundedSender<ResolverMessage>>, listening_port: u16) -> Self { - Self { tx, listening_port } + fn new(tx: Arc<mpsc::UnboundedSender<ResolverMessage>>, listening_addr: SocketAddr) -> Self { + Self { tx, listening_addr } } - /// Get listening port for resolver handle - pub fn listening_port(&self) -> u16 { - self.listening_port + /// Get socket address associated with the running DNS resolver. + pub fn listening_addr(&self) -> SocketAddr { + self.listening_addr } /// Set the DNS server to forward queries to `dns_servers` @@ -298,69 +299,159 @@ impl ResolverHandle { impl LocalResolver { /// Constructs a new filtering resolver and it's handle. async fn new() -> Result<(Self, ResolverHandle), Error> { - let (tx, rx) = mpsc::unbounded(); - let command_tx = Arc::new(tx); - + let (command_tx, command_rx) = mpsc::unbounded(); + let command_tx = Arc::new(command_tx); let weak_tx = Arc::downgrade(&command_tx); - let (mut server, port) = Self::new_server(0, weak_tx.clone()).await?; - let (server_done_tx, server_done_rx) = oneshot::channel(); - let server_handle = tokio::spawn(async move { + let (socket, cleanup_ifconfig) = Self::new_random_socket().await?; + let resolver_addr = socket.local_addr().map_err(Error::GetSocketAddr)?; + let mut server = Self::new_server(socket, weak_tx.clone())?; + + let dns_server_task = tokio::spawn(async move { + // This drop guard will clean up the loopback IP addr alias when the task exits. + let _cleanup_ifconfig = cleanup_ifconfig; + + log::info!("Running DNS resolver on {resolver_addr}"); + loop { - if let Err(err) = server.block_until_done().await { - log::error!("DNS server unexpectedly stopped: {}", err); + let Err(err) = server.block_until_done().await else { + break; // Graceful shutdown + }; + + log::error!("DNS server unexpectedly stopped: {}", err); + drop(server); // drop the old server since we need to create a new one - if weak_tx.strong_count() > 0 { - log::debug!("Attempting restart server"); - match Self::new_server(port, weak_tx.clone()).await { - Ok((new_server, _port)) => { - server = new_server; - continue; - } - Err(error) => { - log::error!("Failed to restart DNS server: {error}"); - } - } + // Exit if `command_tx` has been dropped. + if weak_tx.strong_count() == 0 { + break; + } + + log::debug!("Attempting to restart server"); + + let socket = match net::UdpSocket::bind(resolver_addr).await { + Ok(socket) => socket, + Err(e) => { + log::error!("Failed to bind DNS server to {resolver_addr}: {e}"); + break; + } + }; + + match Self::new_server(socket, weak_tx.clone()) { + Ok(new_server) => server = new_server, + Err(error) => { + log::error!("Failed to restart DNS server: {error}"); + break; } } - break; } - - let _ = server_done_tx.send(()); }); let resolver = Self { - rx, - dns_server: Some((server_handle, server_done_rx)), + rx: command_rx, + dns_server_task, inner_resolver: Resolver::from(Config::Blocking), }; - Ok((resolver, ResolverHandle::new(command_tx, port))) + Ok((resolver, ResolverHandle::new(command_tx, resolver_addr))) } - async fn new_server( - port: u16, + fn new_server( + socket: UdpSocket, command_tx: Weak<mpsc::UnboundedSender<ResolverMessage>>, - ) -> Result<(ServerFuture<ResolverImpl>, u16), Error> { + ) -> Result<ServerFuture<ResolverImpl>, Error> { let mut server = ServerFuture::new(ResolverImpl { tx: command_tx }); - let server_listening_socket = - tokio::net::UdpSocket::bind(SocketAddr::new(Ipv4Addr::LOCALHOST.into(), port)) + server.register_socket(socket); + + Ok(server) + } + + /// Create a new [net::UdpSocket] bound to port 53 on loopback. + /// + /// This socket will try to bind to the following IPs in sequential order: + /// - random ip in the range 127.1-255.0-255.0-255 : 53 + /// - random ip in the range 127.1-255.0-255.0-255 : 53 + /// - random ip in the range 127.1-255.0-255.0-255 : 53 + /// - 127.0.0.1 : 53 + /// + /// We do this to try and avoid collisions with other DNS servers running on the same system. + /// + /// # Returns + /// - The first successfully bound [UdpSocket] + /// - An [OnDrop] guard that will delete the IP aliases added, if any. + /// If the guard is dropped while the socket is in use, calls to read/write will likely fail. + async fn new_random_socket() -> Result<(UdpSocket, OnDrop), Error> { + use std::net::Ipv4Addr; + + let random_loopback = || async move { + let addr = Ipv4Addr::new(127, 1u8.max(random()), random(), random()); + + // TODO: this command requires root privileges and will thus not work in `cargo test`. + // This means that the tests will fall back to 127.0.0.1, and will not assert that the + // ifconfig stuff actually works. We probably do want to test this, so what do? + let output = Command::new("ifconfig") + .args([LOOPBACK, "alias", &format!("{addr}"), "up"]) + .output() .await - .map_err(Error::UdpBindError)?; - let port = server_listening_socket - .local_addr() - .map_err(Error::GetSocketAddrError)? - .port(); - server.register_socket(server_listening_socket); + .inspect_err(|e| { + log::warn!("Failed to spawn `ifconfig {LOOPBACK} alias {addr} up`: {e}") + }) + .ok()?; + + if !output.status.success() { + log::warn!("Non-zero exit code from ifconfig: {}", output.status); + return None; + } + + log::debug!("Created loopback address {addr}"); + + // Clean up ip address when stopping the resolver + let cleanup_ifconfig = on_drop(move || { + tokio::task::spawn(async move { + log::debug!("Cleaning up loopback address {addr}"); + + let result = Command::new("ifconfig") + .args([LOOPBACK, "delete", &format!("{addr}")]) + .output() + .await; + + if let Err(e) = result { + log::warn!("Failed to clean up {LOOPBACK} alias {addr}: {e}"); + } + }); + }) + .boxed(); + + Some((addr, cleanup_ifconfig)) + }; + + for attempt in 0.. { + let (socket_addr, on_drop) = match attempt { + ..3 => match random_loopback().await { + Some(random) => random, + None => continue, + }, + 3 => (Ipv4Addr::LOCALHOST, OnDrop::noop()), + 4.. => break, + }; + + match net::UdpSocket::bind((socket_addr, DNS_PORT)).await { + Ok(socket) => return Ok((socket, on_drop)), + Err(err) => log::warn!("Failed to bind DNS server to {socket_addr}: {err}"), + } + } - Ok((server, port)) + // See logs for details. + Err(Error::UdpBind) } /// Runs the filtering resolver as an actor, listening for new queries instances. When all /// related [ResolverHandle] instances are dropped, this function will return, closing the DNS /// server. async fn run(mut self) { + let abort_handle = self.dns_server_task.abort_handle(); + let _abort_dns_server_task = on_drop(|| abort_handle.abort()); + while let Some(request) = self.rx.next().await { match request { ResolverMessage::SetConfig { @@ -381,11 +472,6 @@ impl LocalResolver { } } } - - if let Some((server_handle, done_rx)) = self.dns_server.take() { - server_handle.abort(); - let _ = done_rx.await; - } } } @@ -541,11 +627,11 @@ mod test { super::start_resolver().await.unwrap() } - fn get_test_resolver(port: u16) -> hickory_server::resolver::TokioAsyncResolver { + fn get_test_resolver(addr: SocketAddr) -> hickory_server::resolver::TokioAsyncResolver { let resolver_config = ResolverConfig::from_parts( None, vec![], - NameServerConfigGroup::from_ips_clear(&[Ipv4Addr::LOCALHOST.into()], port, true), + NameServerConfigGroup::from_ips_clear(&[addr.ip()], addr.port(), true), ); TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) } @@ -554,7 +640,7 @@ mod test { fn test_successful_lookup() { let rt = tokio::runtime::Runtime::new().unwrap(); let handle = rt.block_on(start_resolver()); - let test_resolver = get_test_resolver(handle.listening_port()); + let test_resolver = get_test_resolver(handle.listening_addr()); rt.block_on(async move { for domain in &*ALLOWED_DOMAINS { @@ -570,7 +656,7 @@ mod test { let rt = tokio::runtime::Runtime::new().unwrap(); let handle = rt.block_on(start_resolver()); - let test_resolver = get_test_resolver(handle.listening_port()); + let test_resolver = get_test_resolver(handle.listening_addr()); let captive_portal_domain = LowerName::from(Name::from_str("apple.com").unwrap()); let resolver_result = rt.block_on(async move { @@ -589,10 +675,9 @@ mod test { let rt = tokio::runtime::Runtime::new().unwrap(); let handle = rt.block_on(start_resolver()); - let port = handle.listening_port(); + let addr = handle.listening_addr(); mem::drop(handle); thread::sleep(Duration::from_millis(300)); - UdpSocket::bind((Ipv4Addr::LOCALHOST, port)) - .expect("Failed to bind to a port that should have been removed"); + UdpSocket::bind(addr).expect("Failed to bind to a port that should have been removed"); } } diff --git a/talpid-core/src/tunnel_state_machine/connected_state.rs b/talpid-core/src/tunnel_state_machine/connected_state.rs index 05024554a3..d64d5033e3 100644 --- a/talpid-core/src/tunnel_state_machine/connected_state.rs +++ b/talpid-core/src/tunnel_state_machine/connected_state.rs @@ -142,7 +142,7 @@ impl ConnectedState { #[cfg(target_os = "macos")] redirect_interface, #[cfg(target_os = "macos")] - dns_redirect_port: shared_values.filtering_resolver.listening_port(), + dns_redirect_port: shared_values.filtering_resolver.listening_addr().port(), } } @@ -187,8 +187,8 @@ impl ConnectedState { ); // Set system DNS to our local DNS resolver let system_dns = DnsConfig::default().resolve( - &[std::net::Ipv4Addr::LOCALHOST.into()], - shared_values.filtering_resolver.listening_port(), + &[shared_values.filtering_resolver.listening_addr().ip()], + shared_values.filtering_resolver.listening_addr().port(), ); shared_values .dns_monitor diff --git a/talpid-core/src/tunnel_state_machine/connecting_state.rs b/talpid-core/src/tunnel_state_machine/connecting_state.rs index 66534f6081..92f19ca956 100644 --- a/talpid-core/src/tunnel_state_machine/connecting_state.rs +++ b/talpid-core/src/tunnel_state_machine/connecting_state.rs @@ -59,8 +59,8 @@ impl ConnectingState { if *LOCAL_DNS_RESOLVER { // Set system DNS to our local DNS resolver let system_dns = DnsConfig::default().resolve( - &[std::net::Ipv4Addr::LOCALHOST.into()], - shared_values.filtering_resolver.listening_port(), + &[shared_values.filtering_resolver.listening_addr().ip()], + shared_values.filtering_resolver.listening_addr().port(), ); let _ = shared_values .dns_monitor @@ -201,7 +201,7 @@ impl ConnectingState { #[cfg(target_os = "macos")] redirect_interface, #[cfg(target_os = "macos")] - dns_redirect_port: shared_values.filtering_resolver.listening_port(), + dns_redirect_port: shared_values.filtering_resolver.listening_addr().port(), }; shared_values .firewall diff --git a/talpid-core/src/tunnel_state_machine/disconnected_state.rs b/talpid-core/src/tunnel_state_machine/disconnected_state.rs index de1d4822cb..7f6e6e79ec 100644 --- a/talpid-core/src/tunnel_state_machine/disconnected_state.rs +++ b/talpid-core/src/tunnel_state_machine/disconnected_state.rs @@ -8,8 +8,6 @@ use crate::firewall::FirewallPolicy; use crate::{dns, tunnel_state_machine::ErrorState}; use futures::StreamExt; #[cfg(target_os = "macos")] -use std::net::Ipv4Addr; -#[cfg(target_os = "macos")] use talpid_types::tunnel::ErrorStateCause; use talpid_types::ErrorExt; @@ -81,7 +79,7 @@ impl DisconnectedState { allow_lan: shared_values.allow_lan, allowed_endpoint: Some(shared_values.allowed_endpoint.clone()), #[cfg(target_os = "macos")] - dns_redirect_port: shared_values.filtering_resolver.listening_port(), + dns_redirect_port: shared_values.filtering_resolver.listening_addr().port(), }; shared_values.firewall.apply_policy(policy).map_err(|e| { @@ -149,8 +147,8 @@ impl DisconnectedState { shared_values.dns_monitor.set( "lo", dns::DnsConfig::default().resolve( - &[Ipv4Addr::LOCALHOST.into()], - shared_values.filtering_resolver.listening_port(), + &[shared_values.filtering_resolver.listening_addr().ip()], + shared_values.filtering_resolver.listening_addr().port(), ), ) } diff --git a/talpid-core/src/tunnel_state_machine/error_state.rs b/talpid-core/src/tunnel_state_machine/error_state.rs index 0cf392a0e5..c525f1398a 100644 --- a/talpid-core/src/tunnel_state_machine/error_state.rs +++ b/talpid-core/src/tunnel_state_machine/error_state.rs @@ -7,8 +7,6 @@ use crate::dns::DnsConfig; #[cfg(not(target_os = "android"))] use crate::firewall::FirewallPolicy; use futures::StreamExt; -#[cfg(target_os = "macos")] -use std::net::Ipv4Addr; use talpid_types::{ tunnel::{ErrorStateCause, FirewallPolicyError}, ErrorExt, @@ -38,8 +36,8 @@ impl ErrorState { if !block_reason.prevents_filtering_resolver() { // Set system DNS to our local DNS resolver let system_dns = DnsConfig::default().resolve( - &[Ipv4Addr::LOCALHOST.into()], - shared_values.filtering_resolver.listening_port(), + &[shared_values.filtering_resolver.listening_addr().ip()], + shared_values.filtering_resolver.listening_addr().port(), ); if let Err(err) = shared_values.dns_monitor.set("lo", system_dns) { log::error!( @@ -81,7 +79,7 @@ impl ErrorState { allow_lan: shared_values.allow_lan, allowed_endpoint: Some(shared_values.allowed_endpoint.clone()), #[cfg(target_os = "macos")] - dns_redirect_port: shared_values.filtering_resolver.listening_port(), + dns_redirect_port: shared_values.filtering_resolver.listening_addr().port(), }; #[cfg(target_os = "linux")] diff --git a/talpid-types/src/drop_guard.rs b/talpid-types/src/drop_guard.rs new file mode 100644 index 0000000000..1327bcf1f3 --- /dev/null +++ b/talpid-types/src/drop_guard.rs @@ -0,0 +1,39 @@ +/// Create a value that executes function `F` when dropped. +#[must_use = "Should be assigned to a variable and implicitly or explicitly dropped"] +pub fn on_drop<F: FnOnce()>(f: F) -> OnDrop<F> { + OnDrop(Some(f)) +} + +/// A type that executes a function when dropped. +/// +/// The default type of `F` is a boxed function. It's also [Send] in order to be useful with async. +/// If you need a less restrictive type for `F` you can specify it explicitly. +pub struct OnDrop<F: FnOnce() = Box<dyn FnOnce() + Send>>(Option<F>); + +impl<F: FnOnce()> OnDrop<F> { + /// Map the wrapped function into some other function. + pub fn map<F2: FnOnce()>(mut self, map: impl FnOnce(F) -> F2) -> OnDrop<F2> { + let f = self.0.take(); + OnDrop(f.map(map)) + } + + /// A drop guard that does nothing + pub fn noop() -> Self { + OnDrop(None) + } +} + +impl<F: FnOnce() + Send + 'static> OnDrop<F> { + /// Box the inner function to erase its type. + pub fn boxed(self) -> OnDrop { + self.map(|f| Box::new(f) as Box<_>) + } +} + +impl<F: FnOnce()> Drop for OnDrop<F> { + fn drop(&mut self) { + if let Some(f) = self.0.take() { + f() + } + } +} diff --git a/talpid-types/src/lib.rs b/talpid-types/src/lib.rs index 34581f074a..df6396ad10 100644 --- a/talpid-types/src/lib.rs +++ b/talpid-types/src/lib.rs @@ -9,5 +9,7 @@ pub mod cgroup; #[cfg(target_os = "windows")] pub mod split_tunnel; +pub mod drop_guard; + mod error; pub use error::*; |
