kochab/src/types/request.rs

404 lines
16 KiB
Rust

use std::{
fmt::Write,
ops,
};
#[cfg(feature = "gemini_srv")]
use std::convert::TryInto;
#[cfg(feature = "scgi_srv")]
use std::{
collections::HashMap,
convert::TryFrom,
path::Path,
};
use anyhow::*;
use percent_encoding::percent_decode_str;
use uriparse::URIReference;
#[cfg(feature="user_management")]
use serde::{Serialize, de::DeserializeOwned};
#[cfg(feature = "gemini_srv")]
use ring::digest;
#[cfg(feature="user_management")]
use crate::user_management::{UserManager, User};
#[derive(Clone)]
/// A request from a Gemini client to the app
///
/// When originally sent out by a client, a request is literally just a URL, and honestly,
/// if you want to use it as just a URL, that'll work fine!
///
/// That said, kochab and any proxies the request might hit add a little bit more
/// information that you can use, like
/// * [What TLS certificate (if any) did the client use](Self::certificate)
/// * [What part of the path is relevant (ie, everything after the route)](Self::trailing_segments)
/// * [Is the user registered with the user database?](Self::user)
///
/// The only way to get your hands on one of these bad boys is when you register an [`Fn`]
/// based handler to a [`Server`](crate::Server), and a user makes a request to the
/// endpoint.
pub struct Request {
uri: URIReference<'static>,
input: Option<String>,
certificate: Option<[u8; 32]>,
trailing_segments: Option<Vec<String>>,
#[cfg(feature="user_management")]
manager: &'static UserManager,
#[cfg(feature = "scgi_srv")]
headers: HashMap<String, String>,
#[cfg(feature = "scgi_srv")]
script_path: Option<String>,
}
impl Request {
/// Construct a new request
///
/// When in `gemini_srv` mode, this is done using a URL. If you do construct a
/// request this way, by default it will not have a certificate attached, so make
/// sure you add in a certificate with [`Request::set_cert()`].
///
/// By contrast, in `scgi_srv` mode, the certificate fingerprint is grabbed out of the
/// request parameters, so you don't need to do anything. The headers passed should
/// be the header sent by the SCGI client.
///
/// When in SCGI mode, the following headers are expected:
///
/// * `PATH_INFO`: The part of the path following the route the app is bound to
/// * `QUERY_STRING`: The part of the request following ?, url encoded. Will produce
/// an error if it contains invalid UTF-8. No error if missing
/// * `TLS_CLIENT_HASH`: Optional. The base64 or hex encoded SHA256 sum of the DER
/// certificate of the requester.
/// * `SCRIPT_PATH` or `SCRIPT_NAME`: The base path the app is mounted on
///
/// # Errors
///
/// Produces an error if:
/// * The SCGI server didn't include the mandatory `PATH_INFO` header
/// * The provided URI reference is invalid, including if the SCGI server sent an
/// invalid `PATH_INFO`
/// * The `TLS_CLIENT_HASH` sent by the SCGI server isn't sha256, or is encoded with
/// something other than base64 or hexadecimal
pub (crate) fn new(
#[cfg(feature = "gemini_srv")]
mut uri: URIReference<'static>,
#[cfg(feature = "scgi_srv")]
headers: HashMap<String, String>,
#[cfg(feature="user_management")]
manager: &'static UserManager,
) -> Result<Self> {
#[cfg(feature = "scgi_srv")]
#[allow(clippy::or_fun_call)] // Lay off it's a macro
let (mut uri, certificate, script_path) = (
URIReference::try_from(
format!(
"{}{}",
headers.get("PATH_INFO")
.context("PATH_INFO header not received from SCGI client")?
.as_str(),
headers.get("QUERY_STRING")
.map(|q| format!("?{}", q))
.unwrap_or_else(String::new),
).as_str()
)
.context("Request URI is invalid")?
.into_owned(),
match headers.get("TLS_CLIENT_HASH").map(hash_decode) {
Some(maybe_hash @ Some(_)) => maybe_hash,
Some(None) => bail!("Received malformed TLS client hash from upstream. Expected 256 bit hex or b64 encoded"),
None => None,
},
headers.get("SCRIPT_PATH")
.or_else(|| headers.get("SCRIPT_NAME"))
.cloned()
);
// Send out a warning if the server did not specify a SCRIPT_PATH.
// This should only be done once to avoid spaming log files
#[cfg(feature = "scgi_srv")]
if script_path.is_none() {
static WARN: std::sync::Once = std::sync::Once::new();
WARN.call_once(||
warn!(concat!(
"The SCGI server did not send a SCRIPT_PATH, indicating that it",
" doesn't comply with Gemini's SCGI spec. This will cause a problem",
" if the app needs to rewrite a URL. Please consult the proxy server",
" to identify why this is."
))
)
}
uri.normalize();
let input = match uri.query().filter(|q| !q.is_empty()) {
None => None,
Some(query) => {
let input = percent_decode_str(query.as_str())
.decode_utf8()
.context("Request URI query contains invalid UTF-8")?
.into_owned();
Some(input)
}
};
Ok(Self {
uri,
input,
#[cfg(feature = "scgi_srv")]
certificate,
#[cfg(feature = "gemini_srv")]
certificate: None,
trailing_segments: None,
#[cfg(feature = "scgi_srv")]
headers,
#[cfg(feature = "scgi_srv")]
script_path,
#[cfg(feature="user_management")]
manager,
})
}
/// The URI reference requested by the user
///
/// Although they are not exactly the same thing, it is generally preferred to use the
/// [`Request::trailing_segments()`] method if possible.
///
/// Returns the URIReference requested by the user. **If running in SCGI mode, this
/// will contain only the parts of the URIReference that were relevant to the app.**
/// This means you will get `/path`, not `/app/path`.
///
/// When running in `scgi_srv` mode, this is guaranteed to be a relative reference.
/// When running in `gemini_srv` mode, clients are obliged by the spec to send a full
/// URI, but if a client fails to respect this, kochab will still accept and pass on
/// the relative reference.
pub const fn uri(&self) -> &URIReference {
&self.uri
}
#[allow(clippy::missing_const_for_fn)]
/// All of the path segments following the route to which this request was bound.
///
/// For example, if this handler was bound to the `/api` route, and a request was
/// received to `/api/v1/endpoint`, then this value would be `["v1", "endpoint"]`.
/// This should not be confused with [`path_segments()`](Self::path_segments()), which
/// contains *all* of the segments, not just those trailing the route.
pub fn trailing_segments(&self) -> &Vec<String> {
self.trailing_segments.as_ref().unwrap()
}
/// All of the segments in this path, percent decoded
///
/// For example, for a request to `/api/v1/endpoint`, this would return `["api", "v1",
/// "endpoint"]`, no matter what route the handler that received this request was
/// bound to. This is not to be confused with
/// [`trailing_segments()`](Self::trailing_segments), which contains only the segments
/// following the bound route.
///
/// Additionally, unlike `trailing_segments()`, this method percent decodes the path.
pub fn path_segments(&self) -> Vec<String> {
self.uri()
.path()
.segments()
.iter()
.map(|segment| percent_decode_str(segment.as_str()).decode_utf8_lossy().into_owned())
.collect::<Vec<String>>()
}
/// View any input sent by the user in the query string
///
/// Any zero-length input is treated as no input at all, and will be reported as
/// [`None`]. This is done in order to provide compatibility with the SCGI header
/// common practice of reporting no query string as a blank input.
pub fn input(&self) -> Option<&str> {
self.input.as_deref()
}
#[cfg(feature="scgi_srv")]
/// View any headers sent by the SCGI client
///
/// When an SCGI client delivers a request (e.g. when your gemini server sends a
/// request to this app), it includes many headers which aren't always included in
/// the request otherwise. Bear in mind that **not all SCGI clients send the same
/// headers**, and these are *never* available when operating in `gemini_srv` mode.
///
/// By using this method, you are almost certainly reducing the number of proxy
/// servers your app supports, and you are strongly encouraged to find a different
/// method.
///
/// Some examples of headers mollybrown sets are:
/// - `REMOTE_ADDR` (The user's IP address and port)
/// - `TLS_CLIENT_SUBJECT_CN` (The CommonName on the user's certificate, when present)
/// - `SERVER_NAME` (The host name of the server the request was received on)
/// - `SERVER_SOFTWARE` (= "MOLLY_BROWN")
/// - `SCRIPT_PATH` (The prefix the script is being served on)
pub const fn headers(&self) -> &HashMap<String, String> {
&self.headers
}
#[cfg(feature = "gemini_srv")]
pub (crate) fn set_cert(&mut self, cert: Option<rustls::Certificate>) {
self.certificate = cert.map(|cert| {
digest::digest(&digest::SHA256, cert.0.as_ref())
.as_ref()
.try_into()
.expect("SHA256 didn't return 256 bits")
});
}
/// Sets the segments returned by [`Request::trailing_segments()`]
pub (crate) fn set_trailing(&mut self, segments: Vec<String>) {
self.trailing_segments = Some(segments);
}
#[allow(clippy::missing_const_for_fn)]
/// Get the fingerprint of the certificate the user is connecting with
///
/// Please not that this is **not** the full certificate, just it's fingerprint
/// represented as bytes. The full certificate is not currently exposed, since some
/// SCGI servers may not receive it.
///
/// If you are planning on displaying the certificate to the user, you may want to
/// consider using [`fingerprint()`], which stringifies the output of this method.
///
/// [`fingerprint()`]: Request::fingerprint
pub fn certificate(&self) -> Option<&[u8; 32]> {
self.certificate.as_ref()
}
/// Get the user's certificate as a [`String`] contain the hex fingerprint
///
/// This is a convenience method for stringiying the certificate fingerprint from the
/// [`certificate()`] method. If you're using this fingerprint as a key for some user
/// data, you may want to perfer the former method. This method should be used when
/// the fingerprint is being displayed to the user.
///
/// The returned fingerprint will always be a 64 character string containing lowercase
/// hex digits, such as
/// `5e7097dc25dc62867ee4e0d3214a74b83156e613fdf92ca05e08c79efb14b90e`
///
/// [`certificate()`]: Request::certificate
pub fn fingerprint(&self) -> Option<String> {
self.certificate.as_ref().map(|c| {
let mut message = String::with_capacity(64);
for byte in c {
write!(&mut message, "{:x}", byte).unwrap();
}
message
})
}
#[cfg(feature="user_management")]
/// Attempt to determine the user who sent this request
///
/// May return a variant depending on if the client used a client certificate, and if
/// they've registered as a user yet.
pub fn user<UserData>(&self) -> Result<User<UserData>>
where
UserData: Serialize + DeserializeOwned
{
Ok(self.manager.get_user_by_cert(self.certificate())?)
}
#[cfg(feature="user_management")]
/// Expose the server's UserManager
///
/// Can be used to query users, or directly access the database
pub fn user_manager(&self) -> &'static UserManager {
self.manager
}
/// Attempt to rewrite an absolute URL against the base path of the SCGI script
///
/// When writing an SCGI script, you cannot assume that your script is mounted on the
/// base path of "/". For example, a request to the gemini server for "/app/path"
/// might be received by your script as "/path" if your script is mounted on "/app/".
/// In this situation, if you linked to "/", you would be sending users to "/", which
/// is not handled by your app, instead of "/app/", where you probably intended to
/// send the user.
///
/// This method attempts to infer where the script is mounted, and rewrite an absolute
/// url relative to that. For example, if the application was mounted on "/app/", and
/// you passed "/path", the result would be "/app/path".
///
/// When running in `gemini_srv` mode, the application is always mounted at the base
/// path, so this will always return the path unchanged.
///
/// Not all SCGI clients will correctly report the application's path, so this may
/// fail if unable to infer the correct path. If this is the case, None will be
/// returned. Currently, the SCGI headers checked are:
///
/// * `SCRIPT_PATH` (Used by [mollybrown] and [stargazer])
/// * `SCRIPT_NAME` (Used by [GLV-1.12556])
///
/// [mollybrown]: https://tildegit.org/solderpunk/molly-brown
/// [stargazer]: https://git.sr.ht/~zethra/stargazer/
/// [GLV-1.12556]: https://github.com/spc476/GLV-1.12556
///
/// For an overview of methods for rewriting links, see [`Server::set_autorewrite()`].
///
/// [`Server::set_autorewrite()`]: crate::Server::set_autorewrite()
pub fn rewrite_path(&self, path: impl AsRef<str>) -> Option<String> {
#[cfg(feature = "scgi_srv")] {
self.script_path.as_ref().map(|base| {
let base: &Path = base.as_ref();
// Make path relative
let mut path_as_path: &Path = path.as_ref().as_ref();
if path_as_path.is_absolute() {
path_as_path = (&path.as_ref()[1..]).as_ref();
}
base.join(path_as_path).display().to_string()
})
}
#[cfg(feature = "gemini_srv")] {
Some(path.as_ref().to_string())
}
}
}
#[allow(clippy::ptr_arg)] // This is a single use function that expects a &String
#[cfg(feature = "scgi_srv")]
/// Attempt to decode a 256 bit hash
///
/// Will attempt to decode first as hexadecimal, and then as base64. If both fail, return
/// [`None`]
fn hash_decode(hash: &String) -> Option<[u8; 32]> {
let mut buffer = [0u8; 32];
if hash.len() == 64 { // Looks like a hex
// Lifted (lightly modified) from ring::test::from_hex
for (i, digits) in hash.as_bytes().chunks(2).enumerate() {
let hi = from_hex_digit(digits[0])?;
let lo = from_hex_digit(digits[1])?;
buffer[i] = (hi * 0x10) | lo;
}
Some(buffer)
} else if hash.len() == 44 { // Look like base64
base64::decode_config_slice(hash, base64::STANDARD, &mut buffer).ok()?;
Some(buffer)
} else {
None
}
}
#[cfg(feature = "scgi_srv")]
/// Attempt to decode a hex encoded nibble to u8
///
/// Returns [`None`] if not a valid hex character
fn from_hex_digit(d: u8) -> Option<u8> {
match d {
b'0'..=b'9' => Some(d - b'0'),
b'a'..=b'f' => Some(d - b'a' + 10u8),
b'A'..=b'F' => Some(d - b'A' + 10u8),
_ => None,
}
}
impl ops::Deref for Request {
type Target = URIReference<'static>;
fn deref(&self) -> &Self::Target {
&self.uri
}
}