From 4ce602c14a1626613f90fd747417cff991be69f2 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Thu, 21 Oct 2021 14:37:37 -0400 Subject: [PATCH] Add weighted tables, use better time lib, use b32 --- Cargo.toml | 7 +++- doc/User-Preference-String-Spec.txt | 2 +- src/lib.rs | 6 +++ src/user_preferences/mod.rs | 37 +++++++---------- src/user_preferences/v0.rs | 11 +++-- src/weighted_table.rs | 63 +++++++++++++++++++++++++++++ 6 files changed, 97 insertions(+), 29 deletions(-) create mode 100644 src/weighted_table.rs diff --git a/Cargo.toml b/Cargo.toml index 33f8340..287f7a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,12 @@ license-file = "LICENSE.md" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -chrono = "0.4.19" -base64 = "0.13.0" +data-encoding = "2.3.2" [dependencies.serde] version = "1.0" features = ["derive"] + +[dependencies.time] +version = "0.3" +features = ["local-offset"] diff --git a/doc/User-Preference-String-Spec.txt b/doc/User-Preference-String-Spec.txt index 3a1a1b4..692f5a8 100644 --- a/doc/User-Preference-String-Spec.txt +++ b/doc/User-Preference-String-Spec.txt @@ -1,4 +1,4 @@ -User preferences are an unpadded base64 string, whose contents are defined in this +User preferences are an unpadded base32 string, whose contents are defined in this document. Because the pref string is passed through the page URL, a small size is a top priority. diff --git a/src/lib.rs b/src/lib.rs index 59d7bf4..1e95384 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,13 @@ pub mod user_preferences; +mod weighted_table; + +use std::fmt; use serde::{Serialize, Deserialize, self}; +pub use weighted_table::WeightedTable; +pub use user_preferences::UserPreferences; + /// Runtime-constant setting that apply to an entire pronouns.today instance /// /// These are values specified by the instance operator through the pronouns.today config file. diff --git a/src/user_preferences/mod.rs b/src/user_preferences/mod.rs index f1622a6..3d358e5 100644 --- a/src/user_preferences/mod.rs +++ b/src/user_preferences/mod.rs @@ -6,16 +6,15 @@ pub mod v0; -use crate::{InstanceSettings, Pronoun}; +use crate::{InstanceSettings, WeightedTable}; -use base64::{decode, encode}; -use chrono::{Local, NaiveDate}; +use data_encoding::{BASE32_NOPAD, DecodeError}; /// A user's preferences for the probabilities of certain pronouns /// /// This is the parsed version of a prefstring. The actual implementation details may vary across -/// versions, but universally they must be able to at least randomly select a pronoun set unique to -/// a given date and name. +/// versions, but universally they must be able to at least produce a weighted list of pronouns, +/// representing the probabilities the user wants for each pronoun set. /// /// To this end, all versions of the user preferences implement [`Preference`]. For convenience, /// `UserPreferences` also implements [`Preference`]. @@ -32,19 +31,12 @@ pub enum UserPreferences<'a> { /// See also: [`UserPreferences`] pub trait Preference<'a> { - /// Randomly select a pronoun set for a given date and name. + /// Produce a weighted list of pronouns based on these preferences /// - /// This function should be *pure*, and any randomness must be emulating using PRNG. That is - /// to say, for any given date and name, this preference object must always produce the same - /// pronoun set. - fn select_pronouns_on_date(&self, date: NaiveDate, name: Option<&str>) -> &'a Pronoun; - - /// A shorthand for calling [`Preference::select_pronouns_on_date()`] with today's date - /// - /// The date is generated for the system's time and timezone - fn select_pronouns(&self, name: Option<&str>) -> &'a Pronoun { - self.select_pronouns_on_date(Local::today().naive_local(), name) - } + /// This is a one-directional conversion to a [`WeightedList`]. This method is a crucial step + /// to randomly selecting a pronoun set based on a user's preferences, as any selection is done + /// by using a [`WeightedList`]. All preference versions must implement this method. + fn into_weighted_table(&self) -> WeightedTable; /// Parse a given prefstring, after it's extraction from base64 /// @@ -67,8 +59,9 @@ pub trait Preference<'a> { /// /// This is the primary method of creating a `Preference` object from a prefstring. The /// default implementation calls the underlying [`Preference::from_prefstring_bytes()`] method. - fn from_prefstring(prefstring: &str, settings: &'a InstanceSettings) -> Result where Self: Sized { - decode(prefstring).map(|ps| Self::from_prefstring_bytes(ps.as_ref(), settings)) + fn from_prefstring(prefstring: &str, settings: &'a InstanceSettings) -> Result where Self: Sized { + BASE32_NOPAD.decode(prefstring.as_ref()) + .map(|ps| Self::from_prefstring_bytes(&ps, settings)) } /// Serialize into a base64 prefstring @@ -76,15 +69,15 @@ pub trait Preference<'a> { /// This is the primary method of creating a prefstring from a `Preference` object. The /// default implementation calls the underlying [`Preference::into_prefstring_bytes()`] method. fn into_prefstring(&self) -> String { - encode(self.into_prefstring_bytes()) + BASE32_NOPAD.encode(&self.into_prefstring_bytes()) } } impl<'a> Preference<'a> for UserPreferences<'a> { - fn select_pronouns_on_date(&self, date: NaiveDate, name: Option<&str>) -> &'a Pronoun { + fn into_weighted_table(&self) -> WeightedTable { match self { UserPreferences::V0(pref) => pref, - }.select_pronouns_on_date(date, name) + }.into_weighted_table() } fn from_prefstring_bytes(bytes: &[u8], settings: &'a InstanceSettings) -> Self { diff --git a/src/user_preferences/v0.rs b/src/user_preferences/v0.rs index 4d4115b..1025164 100644 --- a/src/user_preferences/v0.rs +++ b/src/user_preferences/v0.rs @@ -1,11 +1,14 @@ //! Version 0 Prefstrings -use crate::{InstanceSettings, Pronoun, user_preferences::Preference}; +use crate::{ + InstanceSettings, + Pronoun, + user_preferences::Preference, + WeightedTable, +}; use std::collections::HashMap; -use chrono::{NaiveDate}; - /// A parsed version of the V0 prefstring /// /// See the [prefstring specification][1] for more information about how this is interpretted. @@ -18,7 +21,7 @@ pub struct UserPreferencesV0<'a> { } impl<'a> Preference<'a> for UserPreferencesV0<'a> { - fn select_pronouns_on_date(&self, date: NaiveDate, name: Option<&str>) -> &'a Pronoun { + fn into_weighted_table(&self) -> WeightedTable { todo!() } diff --git a/src/weighted_table.rs b/src/weighted_table.rs new file mode 100644 index 0000000..30f7316 --- /dev/null +++ b/src/weighted_table.rs @@ -0,0 +1,63 @@ +use crate::Pronoun; + +use time::{Date, Month, OffsetDateTime}; + +/// The start of the COVID-19 lockdowns +/// +/// This is used as an epoch in order to convert from a given date to an integer seed. This is +/// specified as part of the algorithm for randomly selecting from a weighted list. +pub const COVID_EPOCH: Date = match Date::from_calendar_date(2020, Month::January, 26) { + Ok(d) => d, + Err(_) => Date::MIN, // This never runs, but we can't unwrap, so this is what we're stuck with +}; + +/// A list of pronouns and their associated weights, used for random selection +/// +/// Weights are typically representative of a user's preference towards a pronoun. A pronoun with +/// a weight of 10 is twice as likely to be selected as a pronoun with a weight of 5. +/// +/// This struct is use to represent these weights before they are used to randomly select a +/// pronoun. Additional methods are provided to perform this random selection on a weighted list, +/// using as a seed both an arbitrary string of bytes and a Date. +pub struct WeightedTable<'a>(pub Vec<(&'a Pronoun, u8)>); + +impl<'a> WeightedTable<'a> { + + /// A shorthand for calling [`Preference::select_pronouns_on_date()`] with today's date + /// + /// The date is generated for the system's time and timezone + pub fn select_today(&self, seed: &[u8]) -> &Pronoun { + self.select_on_date( + seed, + OffsetDateTime::now_local() + .unwrap_or_else(|_| OffsetDateTime::now_utc()) + .date() + ) + } + + /// Randomly select a pronoun set for a given date and name. + /// + /// Is a wrapper for calling [`WeightedList::select`] with the given date mixed into the seed. + pub fn select_on_date(&self, seed: &[u8], date: Date) -> &Pronoun { + let mut new_seed: Vec = Vec::with_capacity(seed.len() + 4); + new_seed.extend( + ( + (date - COVID_EPOCH) + .whole_days() + as u32 + ).to_le_bytes() + ); + new_seed.extend(seed); + self.select(seed.as_ref()) + } + + /// Randomly select a pronoun set for a given seed + /// + /// This function is *pure*, and any randomness is produced internally using PRNG seeded with + /// the given date and seed. That is to say, for any given seed, this table must always + /// produce the same pronoun set. + pub fn select(&self, seed: &[u8]) -> &Pronoun { + todo!() + } + +}