From 43c2a1a3267064a6ab1c715c7cc9d47eea69d1c3 Mon Sep 17 00:00:00 2001 From: Dominic Zimmer Date: Wed, 14 Jun 2023 11:00:25 +0200 Subject: [PATCH] Update text-gen --- hp/hodbook.txt | 12326 +++++++++++++++++++++++++++++++++++++++++++++++ hp/hp.py | 8 +- 2 files changed, 12333 insertions(+), 1 deletion(-) create mode 100644 hp/hodbook.txt diff --git a/hp/hodbook.txt b/hp/hodbook.txt new file mode 100644 index 0000000..da74286 --- /dev/null +++ b/hp/hodbook.txt @@ -0,0 +1,12326 @@ + U00: Let’s Rust + +Welcome at DSys GmbH, we are happy to have you as a new junior engineer. + +On your first day, we present this onboarding slide deck to you and let +you set up your rusty workstation for the time ahead. + +We are looking forward to this, as you are going to work on some +exciting projects and we help you to learn about dependability & +dependable systems and software. + + Rust + +We at DSys believe that Rust (the programming language and the +ecosystem) is the future for dependable software systems. + +We are particularly inclined to the mission of the Rust project, namely +to be “a language empowering everyone to build reliable and efficient +software” — including you! + +Hence, you have to learn Rust in the following weeks. But do not be +afraid, we are here to help. + +So let us dive right in and get our hands rusty. + +Setup Your Development System + +First, make sure you have Rust installed; the project has an excellent +guide to doing so… if you struggle, let us know! + +For development tools, we highly recommend: + +- Visual Studio Code (VSCode) + + - including the VSCode exentsions: rust-analyzer and BetterToml + +- A decent terminal emulator: + + - on Linux/Mac, you are covered already + + - on Windows, we recommend Windows Terminal + +Finally, you must use Git, as software development cannot be dependable +without a version control system. How to set it up is described on their +website. + +Optionally, you might already create an account on GitLab.com. We do not +use it right away, but it is required later on. It could also help you +store the software you produce in a safe place starting from day 1. + +Hello World + +Now with Rust installed on your system, you can run the famous Hello +World program: + + fn main() { + println!("Hello World"); + } + +You can run it right inside this book, but you’re here to build +something, so do the following: + +- In a new folder, run cargo new --bin hello. +- Enter the folder hello, run code . to edit things. +- Run cargo check when you are done (e.g., by putting the above + snippet into main.rs). +- Ideally, this succeeded, and your program is accepted by the + compiler. If not, we hope that the compiler provided you with some + helpful error messages. +- Now use cargo build, which might take a little bit of time, and + afterwards, you have an executable binary in target/debug. Have a + look and execute the binary from your console. +- Back in your project folder, make a change to main.rs and, e.g., + change the text. Now type cargo run and see what happens. +- Interesting! Apparently, run first checks, then builds, and finally + executes your binary. +- Why didn’t we show you run in the first place? Due to Rust being a + compiled language, it often makes sense to only do check while you + work on the project. When you want to share your code, build is the + way. + +println!() + +Let’s have a closer look at this program and, in particular, println!(). +To be precise, this is a macro as indicated by the ! + +The macro prints its arguments to stdout. There is also eprintln!() to +print to stderr. + +Here are some ways to use the macro to format arguments in different +ways: + + println!("Hello"); // no args + println!("Hello {}", "world"); // simple + println!("Hello {1} {0}", "world", 1); // positional + println!("{value}", value=4); // named + println!("Hello {:?}", ("world", 5)); // debug + println!("Hello {:#?}", ("world", 5)); // pretty-print + + More details on formatting in std::fmt. + +Fibonacci + +Now let’s do something more sensible and compute a Fibonacci number: + +{{#playground fib.rs}} + +Here, you see how you 1) define a function, 2) use control flow (if, +else), and 3) call a function (recursively). + +Cargo.toml and Cargo.lock + +The following shows our hello world’s Cargo.toml, specifying package’s +name, the version, and the used Rust edition (see below). There could +also be third-party crates to be imported under [dependencies]: + + [package] + name = "helloworld" + version = "0.1.0" + edition = "2018" + + [dependencies] + ... + +In general, reproducible builds (i.e., building code produces identical +output) are getting more relevant to counter, e.g., security and +consistency problems. + +The file Cargo.lock is created when the current dependencies are present +and cargo build is invoked. Thereby, versions are entirely fixed and +reproduced when another developer reuses this Cargo.lock file. + + # This file is automatically @generated by Cargo. + # It is not intended for manual editing. + version = 3 + + [[package]] + name = "helloworld" + version = "0.1.0" + dependencies = [ + "foobar", + ] + + + [[package]] + name = "foobar" + version = "0.42.1" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "..." + dependencies = [ + ... + ] + +Maintaining Rust + + There won’t be a Rust 2.0 + + Versions + +- Rust 1.0 was released in May 2015. +- New version every 6 weeks. +- The latest Rust version can be found on What Rust is it?. + + Editions (Theme) + +- 2015: Stability +- 2018: ProductivityIntroduced keywords (async, await, try) +- 2021: Sustainability +- 2024: Scale Empowerment + + Editions are compatible and opt-in. Use 2015 crate X in your 2018 + crate Y (and vice versa). + + More details are in the Edition Guide. + + S00: Sample Solution + +Getting Rusty + +- https://www.rust-lang.org/tools/install + +- fn hello(name: &str, age: i32) { + println!( + "Hello, my name is {} and I am {} year{} old.", + name, + age, + if age == 1 { "" } else { "s" } + ); + } + + fn main() { + let name = "Ferris"; + let age = 11; + hello(name, age); + } + +- fn fib(n: u8) -> u16 { + if n == 0 || n == 1 { + return 1; + } + + let mut res = 0; + let mut last = 1; + let mut curr = 1; + for _i in 1..n { + res = last + curr; + last = curr; + curr = res; + } + res + } + + fn main() { + let n = 9; + let res = fib(n); + println!("fib({}) = {}", n, res); + } + +Dependability + +- Discussed in plenum. + + Summary + + What did you learn? + +- Why Rust and Dependability are important and the topic of this + course. +- How to set up Rust on your system. +- How to write first programs in Rust. + + Where can you learn more? + +- Rust Basics: + - Rust Book: Ch. 01 + 02 + - Programming Rust: Ch. 01 + 02 + - Rust in Action: Ch. 01 + - cheats.rs: Hello Rust +- Rust Way of Life: + - Rust for Rustaceans: Ch. 01 + - Rustacean Principles +- Dependability Basics: + - “Basic Concepts and Taxonomy of Dependable and Secure Computing” + by Laprie et al. +- “Are We Really Engineers?” by Hillel Wayne (interesting comparison + of software- and other engineers) +- Modern Unix (a collection of CLI utilities, many of them written in + Rust) + - atuin - 🐢 magical shell history +- Sustainability with Rust + + W00: Work Sheet + +Getting Rusty + +- Setup Rust on your system. + +- Modify Hello World to print your name and age, both provided as + arguments. + +- Rewrite fib to compute the value using a for loop. + +- Setup Rustlings on your system, ideally with Rust-Analyzer support. + You will need this in the upcoming units. + +Dependability + +- Do an online search for definitions of the term dependability and + its attributes. Bring them to the next plenum. + + Computing with Rust + +We already covered how to print on the console and how to compute the +Fibonacci number. In this section, we have a detailed look at how +programs in Rust can look like. + + This section is intentionally kept brief and you should read the + excellent 3rd chapter of the Rust book if you have any doubts or want + a more in-depth introduction to the common programming concepts. + + Variables + +First, let’s have a look at how variables are declared in Rust. We +declare variables using let and can assign a type with : Type: + + let variable : Type = something(); // Type could, e.g., be u16, i64, bool, String, ... + +Type can be omitted if the compiler can infer[1] it, i.e. it is +unambiguous. + + let variable = something(); // fn something() -> Type + + Mutability + +Now, let’s try to change the variable: + + let variable = 3; + variable = 5; + +When we run the code, we get: + + error[E0384]: cannot assign twice to immutable variable `variable` + --> src/main.rs:5:1 + | + 4 | let variable = 3; + | -------- + | | + | first assignment to `variable` + | help: consider making this binding mutable: `mut variable` + 5 | variable = 5; + | ^^^^^^^^^^^^ cannot assign twice to immutable variable + + error: aborting due to previous error + + For more information about this error, try `rustc --explain E0384`. + error: could not compile `playground` + + To learn more, run the command again with --verbose. + +We learn that variables can be immutable (which they are by default) or +mutable. We can specify that using mut: + + let mut variable = 3; + variable = 5; + +This is an aspect where Rust is different from many other languages. +First, by making mutability explicit, it requires the programmer to +state her intents (to others AND herself). Second, by making +immutability the default, it takes a safe route. This is because it is +easier to reason about immutable variables and you immediately detect +the rare mutable variables due to the keyword mut. Later, when we cover +functional programming, you see that you can get far without using any +mutable variables. We can consider the mut annotations as a feature of +Rust that encourages the creation of dependable code. + + Constants + +Rust also allows declaring constants using const instead of let and +specifying the type. The value you assign to them also have to be +constant, i.e. is fixed at compile time. Here is an example: + + const PI: f32 = 3.14; + + Shadowing + +Finally, Rust is different from many other languages in that it supports +shadowing, i.e. a variable name can be reused in a code block. This is +particularly helpful when parsing: + + let mut guess = String::new(); + // ... read from stdin into guess + let guess : i32 = guess.parse().unwrap(); + +This means there is no need to invent variable types or use type +suffixes, e.g. input_str, input_i32 (which is common in older +languages). Thanks to type inference and picking a normal name, the name +is always accurate and refactorings do not lead to having to change the +variable name. + + Functions + +Apart from variables, we also need functions to build reusable blocks of +code. One of the most important functions is main, which serves as the +entry point to programs that compile to executable binaries. You already +saw them in the previous unit. Here is another example: + + fn mul(x: i32, y: i32) -> i32 { + x * y + } + +Function bodies contain a series of statements (none in this case) and +optionally an ending expression that defines the return value (x * y in +this case). Here is how such a declaration is decomposed: + + + Parameters + Identifier | | Return Type + | | | | + --- ------ ------ --- + fn mul(x: i32, y: i32) -> i32 { + -- - --- -- - + | | | | | + Keyword | Type return | + | | + Identifier Begin Function + Body + +In Rust’s function signatures, you MUST declare all parameter types and +they are not inferred by the compiler. + + Recursion + +In Rust you can call functions recursively, just like in this function +for computing the greatest common divisor using Euclid’s algorithm: + + fn gcd(m: i32, n: i32) -> i32 { + if m == 0 { + n.abs() + } else { + gcd(n % m, m) + } + } + + Namespaces + +When you start producing more and more code, you certainly run into the +following issues: + +1. you want to reuse a name (e.g. parse might be defined for multiple + types) +2. you want to group things together +3. you want to hide certain functionality for others + +For this, Rust provides you with several means: + +- Crates: confined collection of functionality by a single vendor. So + far, you have created a single crate and used other crates (if you + experimented). + +- Modules: layered (sub-)sets of functionality within a crate. + +The latter is created and used like this: + + mod math { + pub fn gcd(m: i32, n: i32) -> i32 { + // ... + } + + pub fn fib(n: u32) -> u32 { + // ... + } + } + + fn main() { + use math::fib; + let gcd = math::gcd(30, 12); + let f = fib(3); + println!("gcd: {}, fib: {}", gcd, f); + } + +The mod keyword adds a module (module / package in Python / Java). With +pub we allow gcd and fib to be accessed from the parent module. +Everything in mod math must be accessed via math::. With use, a binding +can be introduced that allows to shorten a path (see use math::fib and +the usage fib above). + +We could have put math into a separate file math.rs and use it like +this: + + mod math; + + fn main() { + // ... + } + +This way of structuring your Rust programs is further discussed in a +later unit. + + Control Flow + +Finally, we need to introduce control flow constructs to allow +conditions, loops, etc. + +Rust is expression-based, which means that control flow expressions have +a value, like here: + + let condition = true; + let number = if condition { 5 } else { 6 }; + println!("The value of number is: {}", number); + + Loops + +loop + +With loop the block of code is executed over and over again (in other +languages this is done using while true, but this is not idiomatic Rust +code). The only way to stop it is with a panic (where the whole program +ends) or a break statement. + + loop { + let interval = time::Duration::from_seconds(1) + match send_heartbeat() { + Heartbeat::Success => { + thread::sleep(interval); + }, + Heartbeat::Timeout => { + break; + }, + Heartbeat::Error => panic!("unexpected condition"), + } + } + // handle reconnection in case of timeout + +Ignore the details of match for now and look at the structure: On +success, the thread waits until one interval has passed. On timeout, the +loop is exited and reconnection happens. Only on error, the program +halts. + +while + +Here is how to compute the greatest common divisor using the iterative +Euclidean algorithm: + + fn gcd(mut m: i32, mut n: i32) -> i32 { + while m != 0 { + let old_m = m; + m = n % m; + n = old_m; + } + n.abs() + } + +for + +Finally, we have the for loop that works on iterators (they are covered +in a later unit). For now, consider (n..m), which gives you a range from +\(n\) to \(m-1\) (i.e. exclusive range). Here, this is used to compute +the Fibonacci number with a loop: + + fn fib(n: u8) -> u16 { + let mut fib = (1, 1); + for _ in 0..n { + fib = (fib.1, fib.0 + fib.1) + } + fib.0 + } + + fn main() { + let n = 4; + let res = fib(n); + println!("fib({}) = {}", n, res); + } + + Fundamentals of Dependability + +Dependability is a broad term with lots of different meanings and +subsumes a large set of properties that all contribute to a system that +one can depend on. The goal of this section is to a) give you an +intuitive introduction to the different terms, using everyday examples, +and b) show you how these terms are defined. These definitions are +important, as developing dependable system often involves people from +very different backgrounds (e.g. safety, security, psychology, +philosophy, …) and is applied in different domains (e.g. medical, +transport, manufacturing, energy, …). + +But first, let’s start from scratch with some psychology. Safety and +security are — relatively fundamental — human needs: + +{{#include img/maslow.svg }} + +Androidmarsexpress, Maslow’s Hierarchy of Needs2, colors changed by +Andreas Schmidt, CC BY-SA 4.0 + +Hence, we as humans long for our environment to be safe and secure, +i.e. that it can be depended upon for living. In the following, we look +at various everyday situations, where concepts are put in place to +provide us with safety and security. + + At Home + +Everyone knows that “most accidents happen at home”. But what is an +accident? + + Definition: Accident is an undesired and unplanned (but not + necessarily unexpected) event that results in a specified level of + loss. - Safeware + +At this stage, this might sound rather cryptic to you. Speaking of our +household example, a loss could be that you cut your finger when using a +knife — which is undesired and unplanned as you have to put something on +the wound and have to stop cutting stuff for a while. + +If at some point you almost cut your finger, that is an incident and +might tell you that you should concentrate more to avoid this… or ask +someone else to do it. + + Definition: Incident or near-miss is an event that involves no loss + (or only minor loss) but with the potential for loss under different + circumstances. - Safeware + +From this definition, it could also be that your small cut is an +incident, while chopping your finger off is an accident. As you see, +these definitions lead to subjective results — a pattern that you +encounter throughout safety considerations. + +Now apart from knives, what else do you have at home that is dangerous? +Consider the following: + +- Electric sockets +- Hot liquids +- Slippery floor +- Toxic material (medicines, sanitizers) +- Sharp edges + +These things cause hazards and expose risks: + + Definition: Hazard is a state or set of conditions of a system (or an + object) that, together with other conditions in the environment of the + system (or object), will lead inevitably to an accident (loss + event). - Safeware + + Definition: Risk is the hazard level combined with (1) the likelihood + of the hazard leading to an accident (sometimes called danger) and (2) + hazard exposure or duration (sometimes called latency). - Safeware + +Now you know what hazards and risks are, we ask you in the work sheet to +find safety concepts. But what is such a concept? + + Definition: A safety concept is a measure taken to remove, contain, or + reduce a hazard. - Own + +But hold on, we have not defined Safety yet? + + Definition: Safety is freedom from accident or losses. - Safeware + +We also have to make a distinction: + +- Safety is freedom from negative impact from the environmente.g. not + getting hurt by a falling roof tile +- Security is freedom from negative impact by a hostile persone.g. not + getting hurt by a falling piano that was pushed to hurt us + +But instead of walking the streets in the city and keeping close to +buildings from which tiles and pianos can fall, we leave the house +towards the train station. + + Construction Site + +When we cross a construction site, we realize that there are concepts +that are used to provide safety. Consider the following: + +- A fence surrounds the place. +- People wear helmets. +- There are warning signs all over the place. + +On the work sheet, we ask you to think about the hazards that lead to +these concepts. + + Train Station + +Arriving at the train station, we ask ourselves what are objects / +concepts that are related to our current idea of dependability (safety + +security)? + +- Safety window glasses. +- Doors only open when the train stops. +- Metro walls between landing and vehicle (in metropolitan areas). +- Staff members with pepper spray walking the place. + +Note that, at the train station, we also care that the train comes on +time, so that we don’t get a delay in our journey’s schedule. This leads +us to reliability: + + Definition: Reliability is the probability that a piece of equipment + or component will perform its intended function satisfactorily for a + prescribed time and under stipulated environmental conditions. - + Safeware + +Before you are allowed to enter a train, you have to purchase a ticket +at the ticket machine. Occasionally, this machine is defective and needs +to be repaired by staff members — it is unavailable. This leads us to +availability: + + Definition: + + Availability is the readiness for correct service. - Laprie et al. + + Availability is the fraction of time the system is operational. - + Better Embedded System Software + + Onward with Dependability + +With these intuitive definitions and examples from the previous section +in mind, we want to stress that for the rest of the course, we are +following: + +- “Basic Concepts and Taxonomy of Dependable and Secure Computing” by + Laprie et al. +- Safeware (engineering terms are prefered over taking misused terms + by computer scientists) + +They define a set of dimensions that form dependability. + +Dependability Dimensions + +- Availability: readiness for correct service +- Reliability: continuity of correct service +- Safety: absence of catastrophic consequences on the user(s) and the + environment +- Integrity: absence of improper system alterations +- Maintainability: ability to undergo modifications and repairs +- Confidentiality: the absence of unauthorized disclosure of + information +- Security: concurrent existence of confidentiality and integrity and + availability +- Survivability: chance of surviving a catastrophic failure + +As you see, all dimensions are about a service provided. The dimensions +are orthogonal to each other and you should not assume any relationship +between them. For instance, a system might be highly available, but +totally unmaintainable because the inventor ceased to exist. Similarly, +a system might be perfectly safe, but not perform its original service +(i.e. it is unavailable). As with other engineering problems that are +quantified with respect to different dimensions, one cannot maximize all +of them simultaneously – hence trade-offs are required. + +During this course, we regularly refer back to these dimensions and +highlight which tool, process, or language construct has an effect on +which dependability dimension. + +The Eternal Chain of Events + +Before we dive into detail, we also look at faults that interfere with +these dimensions: + +Fault (active / dormant) Error Failure Activate next fault … + + Definition: Fault is the adjudged or hypothesized cause of an error. - + Taxonomy of Dependable Computing + + Definition: Error is a design flaw or deviation from a desired or + intended state. - Safeware + + Definition: Failure is the nonperformance or inability of the system + or component to perform its intended function for a specified time + under specified environmental conditions. - Safeware + +Dependability Means + +In essence, achieving dependability is about dealing with faults. This +can be achieved both at system design-time and operation-time using the +following classes of approaches: + +- Fault prevention is about avoiding the occurrence or introduction of + faults in the first place. +- Fault tolerance is about keeping the service operational, even if a + fault happens. +- Fault removal is about reducing the number and decreasing the + severity of faults. +- Fault forecasting is about estimating the current number of faults, + in order to predict future faults. + +These faults can be further divided, depending on when they occur: + +- Development faults may occur while a system is envisioned and + created. +- Physical faults include everything that involves hardware (and + non-electric parts too). +- Interaction faults are everything where the external environment is + the cause. + +For the remainder of the course, we encounter approaches to improve the +different dependability dimensions of a service and improve our systems +and software by tackling faults. + + Dependability Process + +The following diagram shows the development cycle used to produce +dependable products — commonly known as the V (or “Vee”) Model due to +its shape: + + +--------------+ +--------------+ + | Specify |<----- Traceability & Verification ----->| Acceptance |-> Product + | Product | Test Plan & Results | Test | + +--------------+ +--------------+ + Product | ^ Software + Requirements | | Test + V | Results + +--------------+ +--------------+ + | Specify |<--------------------------->| Software | + | Software | Test Plan & Results | Test | + +--------------+ +--------------+ + Software | ^ Integration + Requirements | | Test + V | Results + +--------------+ +--------------+ + | Create SW |<--------------->| Integration | + | Architecture | Test Plan & | Test | + +--------------+ Results +--------------+ + High | ^ Unit + Level | | Test + Design V | Results + +--------------+ +--------------+ + | Design | <-> | Unit | + | Modules | | Test | + +--------------+ +--------------+ + Detailed | ^ + Design | | Source + V | Code + +--------------+ + | Implement | + +--------------+ + +What we see from this diagram are multiple things: + +- in the left half, we go from high-level product specification down + to the minutiae of implementing software code +- in the right half, we go from pieces of source code to a + full-fledged product +- on each horizontal layer, we have a specification on the left and a + verification means on the right — both having the same abstraction + level + +The V process is, quite helpfully in terms of abbreviations, amended by +so-called verification & validation (V & V) activities. Note that these +two V-terms are often used in confusing or even wrong ways — even by +laws and standards. We use the following (German) article as a basis for +this course. + + Definition: Verification is the check, using objective means, that + specified properties (of products or components) are fulfilled. - + Translation of Johner-Institute Definition + +In our diagram, verification activities deal with the horizontal, +left-to-right, double-ended arrows. Hence, a verification always deals +with a single layer in the V-model, e.g. correctness of software modules +is proven by unit tests. + +The article further defines validation: + + Definition: Validation is the check, using objective means, that the + specified users can, in a specified context, reach specified usage + goals. - Translation of Johner-Institute Definition + +Note that this is a high-level activity where (the whole / one iteration +of the) V process has been executed. We often find another definition of +validation in everyday dependability conversation, which works as +follows: + + Definition: Validation is the check that a step in the development + process produces the intended outputs. + +Looking at the diagram, this means that validation activities deal with +the top-down/bottom-up, single-ended arrows. For instance, peer-review +can be used as a means to validate the transformation of software +requirements into a high-level design. + + U01: Computing Dependably + +Now that you have your system up and running, we want to get our hands +dirty with Rust by learning how to compute with Rust. But we also have a +bit of brain work to do by digging into what dependability is. + + S01: Sample Solution + +Rust + + fn is_prime(n: u32) -> bool { + let limit = (n as f64).sqrt() as u32; + !(2..limit).any(|a| n % a == 0) + } + +Dependability + +- Household Safety Concepts: + - Knives are stored in a drawer; sharp knives have a sheath. + - Electric sockets are connected to a fuse. + - Slippery floors get warning signs. + - Toxic materials are stored behind locked doors. + - On edges you put on bumpers. +- Journey reliability concepts: + - Aim for an earlier train. If there is a delay, you might still + be on time. + - Be early at the train station to make sure you don’t miss the + departure. +- Kitchen availability concepts: + - Have a french press if you Kaffeevollautomat fails. + - Have a microwave to prepare food in case your oven is broken. + - Have more knives than you need, so that more people can work. + + Summary + + What did you learn? + +- Rust: + - How variables, statements and expressions form functions. + - How control flow can be specified. + - How modules allow to group related code together. +- Dependability: + - What dependability, safety, security, … are and why they are + important. + - How the world around you is full of hazards, risks, as well as + accidents and incidents. + - How faults lead to errors to failures and potentially repeat. + + Where can you learn more? + +- Rust: + - Rust Book: Ch. 02, 03, 07.2 + - Programming Rust: Ch. 03 + 06 + - Rust in Action: Ch. 02 + - cheats.rs: Control Flow + - Exercism Rust Track +- Dependability: + - Embedded Software Development for Safety-Critical Systems: Ch. + 02 + - Safeware: Ch. 08 + 09 + - Safety is a System Property, not a Software Property + + W01: Work Sheet + +Rust + +- The section on Rust programming concepts is intentionally kept + brief. Make sure you read the associated Rust Book chapter if you + couldn’t follow or have doubts. This allows you to answer the + following questions: + - How are immutable variables different from constants? + - How is shadowing different from reassignment of a mut variable? + - Why does let number = if condition { 5 } else { "six" }; not + compile? +- Implement a function for computing if a number is a prime number: + fn is_prime(n: u32) -> bool. % is the modulo operator, which should + be helpful. + +Rustlings + +The Rustlings project provides small exercises to practice specific +features of the Rust language. + +- Setup rustlings following this tutorial. + +- Do the Rustlings exercises variables, functions, and if. + +Dependability + +- Based on the hazards we identified at home, name safety mechanisms + that avoid that these hazards cause harm to a human. State if they + prevent/tolerate/remove/predict faults. + +- For the construction site, you learned about safety concepts. Which + hazard are they tackling? Do they prevent/tolerate/remove/predict + faults? + +- Your journey involves using the train. What can you do to improve + the reliability of your journey (i.e. the odds of you reaching the + destination on time)? + +- Consider your kitchen. Where do you have availability concepts? + + cargo Tools + +When you develop code, there are many things that can bug you: + +- Broken (aka non-compiling) code on main/development branches. + +- Badly formatted code. + +- Smelly code (e.g. unnecessary mutability, &Vec). + +- Inappropriate 3rd party licenses.You maintain a permissive FOSS + project and someone adds a GPL3 dependency. + +- Uncovered code.Code not covered by tests. + +- Undocumented code. + +- Manual builds. + +- No cross-platform support. + +- Manual releases… sent via email. + +The good news is, Rust’s cargo is here to help with its many functions +(advanced tools in brackets): + +- cargo check + +- cargo-about + +- cargo-udep + +- cargo clippy + +- cargo fmt + +- cargo test (cargo-tarpaulin) + +- cargo doc + +- cargo build (cargo-cross) + +- cargo publish + + check + + Comes with your rustup installation. + +First and foremost, the cargo command you will probably use the most: +check. This command checks your code and all of its dependencies for +errors (type system, ownership, …). At the same time, it does not create +compiled artifacts, which means it completes very quickly so you have +rapid feedback. + + about + + cargo install cargo-about + +Some of the software we develop at DSys is open source software. This +means that it must be appropriately licensed and we have to track the +licenses of our third-party libraries as well (more on this later). +cargo-about helps you by: * listing crates you use * collecting their +licenses * checking for acceptance + +To support this, there is a about.toml configuration file that +defines: * which licenses are [accepted] * [[DEPENDENCY.additional]] if +the license is not discoverable by cargo + +Finally, there is a about.hbs template HTML file to generate a webpage +that contains all licenses of third-party crates. cargo-about exits with +non-zero when a crate uses a non-accepted license, which makes it ideal +for continuous integration tests. + +You can set up cargo-about for your project with cargo about init. +Afterwards, the following let’s you generate the licenses page: + + cargo about generate about.hbs > license.html + + udeps + + cargo install cargo-udeps --locked + +During development, it can happen that you add a crate that later +becomes unused, i.e. you are no longer using any of its functionality. +cargo-udeps helps you identify exactly these crates and makes your +Cargo.tomls more cleaned up. It requires nightly, so you typically run +it like this: + + cargo +nightly udeps --all-targets + +Note however, that it does not recognize an unused dependency that is +relevant transitively. + + clippy + + rustup component add clippy + +Remember Karl Klammer? He is back in Rust and way less annoying. clippy +works similar to check, but provides more information, e.g. warnings for +common mistakes: + +Note that after check you need to clean before clippy (so it is advised +to use clippy over check when you are interested in hints). + + fmt + +cargo fmt allows to automatically & consistently format all the files in +your crate. Furthermore, it can be used as a linter, indicating if the +crate fulfills all formatting rules or not. + +When reading code, formatting can help or impede understanding what is +going on. While a particular formatting rule might not be measurably +better than another (i.e. having all language elements of a certain type +in camelCase vs. snake_case makes no difference), it is important that +the formatting is consistent, so that readers can focus on the code +itself and not the formatting. With Rust, we tend to build systems out +of many third party dependencies, which means that the total number of +different authors that contribute to the code used to compile a single +piece of software can easily be in the 10s or even beyond 100. Hence, +cargo fmt is a valuable tool, as it comes with a default configuration +that is the convention for most Rust developed (and published as FOSS). +While you can configure it using rustfmt.toml, you should not and +instead stick to the default configuration. This should also help to +keep yourself out of religious discussions that sometimes emerge in +communities where there is no well-established standard. + + build + +So far, we have only checked our code for functional or esthetical +issues, but never actually created working software. With cargo build +you can build a binary or library. If you want to use it productively, +add the --release flag, which tells the compiler to optimize: + + cargo build --release + + cross-compilation made easy + + cargo install cross + +If you want to create software for multiple target platforms (Windows, +Linux, different architectures, …), you can use cross, behaves as a 1:1 +replacement for cargo (i.e. it uses the same CLI parameters). cross +makes use of Docker to pull in appropriate build environments. If you, +for instance, want to create a standalone linux binary (using musl) you +can do so like this: + + cross build --release --target x86_64-unknown-linux-musl + + publish + +You have already worked with many other crates that you have downloaded +from crates.io. Now you might ask yourself how you can publish something +there? In order to learn this (and not pollute crates.io with our +experiments), we provide you with a private crate registry based on +kellnr. To work with this, you have to execute the following steps: + +- Log in at kellnr.hod.cs.uni-saarland.de using the credentials + provided to you. +- Create an Authentication Token by going to Settings. Keep that token + somewhere, it is only displayed once. +- Change your local ~/.cargo/config.tomland add the following: + + [net] + git-fetch-with-cli = true + + [registries] + kellnr = { index = "git://kellnr.hod.cs.uni-saarland.de/index", token = "YOURTOKEN" } + +- Alternatively, you can use cargo login to connect to the registry or + use --token when you publish. +- Now in the crate you want to publish, make sure the Cargo.toml looks + like this: + + [package] + # ... + publish = ["kellnr"] + +Now you are ready to publish. But keep in mind: + + This is irrevocable! Once published, forever it shall remain! + Probably. + +cargo publish will only work if some requirements are met: + +1. The name is not taken +2. Your crate can be built +3. Your Cargo.toml does not prohibit publishing +4. You specified the authors, license, homepage, documentation, + repository, and readme file plus provided a description in your + Cargo.toml. (only true for crates.io) +5. Your local files do not diverge from the ones in the repository + +A dry run performs all checks without publishing and does not require a +login → perfect for continuous integration tests. + + Always check first with cargo publish --dry-run. + + GitLab + + GitLab is open source software to collaborate on code. + +GitLab offers: + +- Git repositories and source code management +- Continuous integration and deployment +- Issue trackers +- Wikis +- Hosting static websites +- Package registries + +In Free Open Source Software (FOSS) jargon, platforms such as GitLab, +GitHub, BitBucket are called  software forges. + + In case you do not know Git, please check out Learn Git Branching and + this chapter of the Missing Semester. + +For the sake of this course, you are going to use the first two +features, as they relate the most to dependability. Notably, the +repository creates a traceable history of changes to files that are part +of the repository. + + Projects + +Projects can be created on the projects view and hit the New Project +button. Afterwards go for Create blank project, pick a name and +description. It is good practice to initialize the project with a +README.md. This file can be used to store helpful information that +first-time users of your repo see immediately. During the course, we ask +you to either 1) turn some of your projects public or 2) give a special +user access to the project, so that we can access them. + + Continuous Software Development + +There are various terms you find online (like Continuous Integration, +CI/CD, DevOps) that relate to the following practice: + + When developing code collaboratively, regularly merge, check, test, + build, and even deploy your software in a shared environment. In this + context, regular means once per day or even multiple times a day. + +The idea behind this is that the changes a developer makes only deviate +from the mainline (the shared ground truth) for a short period of time +(while developing fixes or new features). Afterwards, the code is merged +with changes by others, and it is checked if the changes still conform +with good practice in the project (e.g. they always build successfully, +don’t introduce failing tests, …). + +If such a regular integration happens, we speak about continuous +integration (CI). Some companies even take one step further, i.e. when +an integration is successful, the changed code is released +(e.g. deployed to production environment, packaged, containerized, …). +The latter is called continuous delivery (CD). + +With GitLab and CI/CD, every time you push your Git commits, a set of +jobs (called pipeline) is executed to integrate and deploy your +software. + +Using continuous methods is recommended when developing dependable +software. This approach ensures a sufficient level of quality for new +commits that get pushed or merged to the mainline — in an automated +fashion. Depending on the tools used in the pipelines (compilers, static +checkers, linters, …), different qualities can be assessed. + + gitlab-ci.yml + +In GitLab, the .gitlab-ci.yml in the root of your project declares +almost everything related to your CI/CD pipeline: + + image: registry.gitlab.com/hands-on-dependability/docker-rust:latest + + stages: + - check + - test + - deploy + ... + + check: + stage: check + tags: + - docker + before_script: + - rustc --version + - cargo --version + - mkdir -p .cargo_cache + - export CARGO_HOME="${PWD}/.cargo_cache" + script: + - cargo check + cache: + key: ${CI_COMMIT_REF_SLUG} + paths: + - .cargo_cache/ + - target/ + +When using private GitLab repositories as cargo dependencies within your +CI/CD pipeline, create a deploy token and use it like this: + + before_script: + - git config --global url."https://gitlab-ci-token:${REPO_ACCESS_TOKEN}@${CI_SERVER_HOST}/".insteadOf "https://git.example.com/" + + There’s much to learn about CI/CD, check it out. + + Upload & Release + +Before, we learned how to publish crates. Another common form of +releasing your software is by providing a release in your software +forge. For Gitlab, you can use the Package Registry for various package +managers. There is no crates support yet, so we upload generic files. + +After we have called cross (for our fancy CLI app fcapp), we also set +the following environment variable: + + export LINUX_X86_64_ASSET="fcapp-v${PACKAGE_VERSION}-x86_64-unknown-linux-musl.tar.gz" + +Afterwards, our upload job looks like this: + + upload: + stage: upload + image: curlimages/curl:latest + needs: + - job: build + artifacts: true + rules: + - if: $CI_COMMIT_TAG + script: + - | + tar -czvf ${LINUX_X86_64_ASSET} -C target/${LIN_TARGET}/release fcapp + - | + curl --upload-file ${LINUX_X86_64_ASSET} \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" ${PACKAGE_REGISTRY_URL}/${LINUX_X86_64_ASSET} + +To add a release to GitLab’s Release section (/-/releases), we +do the following: + + release: + stage: release + image: + registry.gitlab.com/gitlab-org/release-cli + needs: + - job: build + artifacts: true + - job: upload + artifacts: false + rules: + - if: $CI_COMMIT_TAG + script: + - | + release-cli create --name "Release $PACKAGE_VERSION" --tag-name v$PACKAGE_VERSION \ + --assets-link "{\"name\":\"${LINUX_X86_64_ASSET}\",\"url\":\"${PACKAGE_REGISTRY_URL}/${LINUX_X86_64_ASSET}\"}" + + U02: Fill Your Toolbox + +Excellent, you got the first pieces of Rust software running on your +system. But as you can imagine, your job as DSys involves more than only +running your software on your own system. + +Hence, this unit gives you a deep dive into Rust’s swiss knife cargo +that helps in many everyday activities (like testing, linting, …). +Further, we introduce the GitLab collaboration software that allows you +to work together with the other DSys engineers. + +As you are now setup to contribute to production code, we have to +introduce you to Test-First Coding, as that is the way DSys implements +new features (our goal is to have almost all code covered by unit +tests). In our experience, this paradigm leads to more dependable code +and made us more productive when turning requirements into code. + +Test-First in Action + +- Project Template +- Consider Task +- Setup Testing Architecture +- Develop Logic Test-First +- Discuss about Testing Dimensions (Qualitative, Quantitative) + + S02: Sample Solution + +Test-First Rust Coding & Source Control + +- Problem Domain: + + - FizzBuzz algorithm (complicated), as it is not perfectly clear + how this should be done. + - CLI (complicated). + +- cargo tarpaulin --verbose --all-features --ignore-tests --workspace --timeout 120 --out Xml + +- Consider fizzbuzz.zip in dCMS. + +GitLab CI + +- Straightforward. + + Summary + + What did you learn? + +- How cargo’s various commands help you in your process of developing + software (building, checking, formatting, releasing). +- How GitLab provides you with a place to store and work on your code + projects; including ways to automatically run cargo and other jobs. +- How test-first coding helps to produce dependable code that is + testable and well-structured. + + Where can you learn more? + +- Rust & cargo: + - Rust Book: Ch. 11 + - Programming Rust: Ch. 02 + 08 + - Rust for Rustaceans: Ch. 06 + - cheats.rs: Cargo +- GitLab + - GitLab Documentation + - Netways GitLab Training +- Test-First Coding: + - Test-First Coding by Ralf Westphal (in German) + - Test-Driven Development Munich School (in German) + - Effective Software Testing + + Test-First Coding + +You might have heard about legacy code. According to Michael +Feathers[2], this is code that is not covered by tests to check for +correct behaviour — hence it is not dependable as the maintainability is +lacking. + + There is also the notion of ancient code. Code created by one or more + people that have left the organization that maintains the code. + +If you strive for dependable systems, it is important to avoid both +legacy and ancient code. Avoiding ancient code is an organizational +matter, i.e. making sure that multiple people know the code well and +that information is available in the organization[3]. Avoiding legacy +code is done by writing tests. In the domain of safety-critical +software, tests are even checked during certification activities to +prove that the code is dependable. + +In practice, there are multiple approaches as to when to write tests. +Some argue that all tests must be specified before any coding starts, +while a large portion of industry writes tests after the code was +produced or to reproduce a bug that has been found in production. + +In this section, we have a look at test-first coding, a practice that +helps you develop dependable code irrespectively of where you work… and +ensures you do not produce legacy code. + +Motivation + +But before we get started, let’s think about why we would write +automated tests. There are lots of good reasons to do so: + +- Comfortable: Automated tests are easy to run and require no manual + effort. +- Reliable: There is no way to introduce manual errors while testing. +- Traceable: Requirements are documented, as tests are executable + specifications. +- Usable: Usage of code is documented, as tests are examples. +- Cheap: Tests have low costs, particularly lower than having a bug in + production code. +- Stable: Acceptance tests become regression (i.e. the behaviour has + changed in an increment) tests over time, making software less + brittle. +- Automatable: Tests can integrate into a larger automation framework + (CI). +- Observable: Code test coverage can be observed. +- Ordered: Code has more order as test automation requires code to be + ready for testing. + +Now that you are convinced that you must write tests, the question is +why should you write them first? + +- With test-first, our mind is still in conceptual solution mode and + not in technical coding mode. Hence we think about the problem and + not the concrete approach to solve it — leading to more expressive + solutions. +- Test-first ensures that no feature is added without tests, making + sure that logic is not an accident. +- Test-first enables better interfaces, as we approach a problem from + the user perspective of an interface and not from the solution + provider. + + The ideal starting point for implementing logic is, when you have an + explicit function signature and a set of acceptance test cases. + +Everything else is premature coding — creating production code without +having at least one “red” (failing) acceptance test. + +Problem Complexity Continuum + +Before we dig into writing tests, we want to have a look at problems of +varying difficulty. We start with the domain of travelling as an anology +and head over to coding problems right away. + + Traveling Problems + +Here are four tasks, with increasing difficulty: + +- Commute to your school.You almost do it without thinking, as you did + it every day. + +- Travel to Norddeich Mole.You (probably) weren’t there yet, but know + how to drive a car or book a train. + +- Travel to Chhatrapati Shivaji Maharaj Vastu Sangrahalaya(formerly: + Prince of Wales museum in Mumbai).Even if you know how to book + international flights, using the Indian local transport is novel to + you. + +- Travel to Mars.Nobody did that before… + + Coding Problems + +Assume your supervisors asks you to: + +- Implement a Fibonacci function.You might have to look it up, but + there is a best practice for writing it. + +- Implement a French Deck of Cards data structure and methods + (supporting sorting, shuffling, …).Using Ord, rand::SliceRandom and + other traits, you can make it work. + +- Implement a ToDo app.Though this is the typical “Hello World” + example for MVC frameworks, the customer might have special things + in mind… you have to figure out things while you go. + +- Implement a Corona Warning app.Assume for a moment it is March 2020… + nobody has done it before and there are tons of technical and legal + challenges ahead. + + Cynefin + +The previous examples show different groups of problems, depending on +their complexity/difficulty/novelty. We consider the Cynefin framework +(Welsh for “habitat”), which can also be used for non-coding tasks: + + + +---------------------------+---------------------------+ + | - Complex - | - Complicated - | + | | | + | Enabling constraints | Governing constraints | + | Loosely coupled | Tightly coupled | + | Probe-Sense-Respond | Sense-Analyse-Respond | + | Emergent Practice | Good Practice | + | +------+-------+ | + +--------------------| - Disorder - |-------------------+ + | - Chaotic - +------+-------+ - Clear - | + | | | + | Lacking constraints | Tightly constrained | + | Decoupled | No degrees of freedom | + | Act-Sense-Respond | Sense-Categorise-Respond | + | Novel Practice | Best Practice | + | | | + +---------------------------+---------------------------+ + +Depending on the habitat in which your problem lies, you change your +behaviour when coding: + +- If you are in “clear” habitat, start coding immediately based on the + tests. The problem is trivial, i.e. you know exactly what code to + write right away. Note that even in this case, tests are a must. If + you leave them out, you risk leaving logic uncovered that might at a + later point grow to non-trivial size. +- If you are in the “complicated” habitat, try decomposing your + problem step by step. If you are successful, partial problems are in + the clear habitat and composing them again leads to a solution for a + complicated problem. +- If you are in the “complex” habitat, use trial-and-error to learn + more about the problem. Do not touch production code, but rather + experiment in the testing code. +- If you are in “chaos” habitat, don’t work in your normal codebase, + rather create prototypes (standalone project, paper) to come up with + acceptance tests. “Chaos” is also the habitat in which legacy code + lives: no one knows what effect a change causes. +- If you are in “disorder” habitat, try segmenting your problem into + domains where you know what the habitats are and continue from + there. + +Step-Wise Coding in the Clear + +In the clear domain, one distinguishes between trivial problems (writing +the logic is totally straightforward) and simple (it is not 100% +straightforward). A simple problem is when it is straightforward to +derive test cases from the requirements with increasing difficulty (baby +steps). + +This stepwise/nested approach tries to trivialize the simple problem. +The incremental test cases form a strict total order on difficulty, +i.e. a more difficult problem subsumes the less difficult one. All tests +are associated with a single API function. + +Variation Dimensions + +When writing incremental tests, we look at the problem from two types of +dimensions: a) qualitative, b) quantitative. These dimensions have +effects in our solution like this with respect to the employed data +structures and algorithms: + +- Qualitative: handling different problem aspects + - Data: structs, enums + - Logic: cases +- Quantitative: handling different problem sizes + - Data: arrays, lists, iterators + - Logic: loops + +In order to achieve increasing difficulty, the steps along a dimension +must be ordered: + +- Quantitative: 0, 1, 2, many +- Qualitative: whatever suits the dimension; it is non-trivial to + decide which is harder + +In the example at the end of this section, we specify these domains and +give the increasing difficulty steps. + + The remaining domains “complex” and “complicated” are not tackled in + this section as they require more advanced techniques. + +Testing in Rust + +Now with this theoretical knowledge, we start doing some actual testing +in Rust. First, we learn how to run and implement tests. + + cargo test or cargo tarpaulin + +With cargo test, all your tests are executed in parallel. If you add +more text behind cargo test, it filters for these tests by name. + +If you are also concerned for test coverage (how much of your code is +examined by a test), cargo-tarpaulin provides this (on x86_64 and +Linux). #[cfg(not(tarpaulin_include))] helps to ignore parts where you +definitely don’t want/need coverage, e.g. getters/setters. + + cargo install cargo-tarpaulin + + cargo tarpaulin --verbose --all-features --ignore-tests --workspace --timeout 120 --out Xml + + Writing Unit Tests + +Unit tests are used to check a single unit of functionality (often one +or more functions). They are defined alongside the code, usually inside +the module like this: + + // code under test + fn function(n: u32) -> u32 { + // ... + } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn test_something() { + assert_eq!(function(31), 42); + } + } + + Writing Integration Tests + +In contrast to unit tests, integration test check the interaction of +functional units in an end-to-end fashion. These are defined in .rs +files in /tests and not part of the normal source code. +The tests are external, i.e. they have to import the crate that they are +testing and they can only access public APIs. The integration test file +usually looks like this: + + use crate_under_test::function; + + #[test] + fn test_something() { + assert_eq!(function(31), 42); + } + + Assertions + +Core to your tests are assertions that separate passing from failing +tests: + +- assert!(arg), check for arg to be true. +- assert_eq!(left, right), check for left to be equal to right. +- assert_ne!(left, right), check for non-equal +- #[should_panic], annotate the test to expect a panic + +Also consider pretty_assertions as a drop-in replacement to make test +failures and causes better visible. + + Writing Documentation Tests + +Eventually, Rust’s documentation allows to include code examples with +assertions. These are called documentation tests and make sure that your +documentation and code stay in sync. + + /// # Fibonacci + /// Generates the n-th fibonacci number. + /// + /// fib(n) = fib(n-1) + fib(n-2) + /// + /// + /// Example usage: + /// ```rust + /// let n = 5; + /// + /// assert_eq!(fib(n), 8); + /// ``` + pub fn fib(n: u32) -> u32 { + if n == 0 || n == 1 { + 1 + } else { + fib(n-1) + fib(n-2) + } + } + +The resulting testable documentation looks like this when accessed via a +web interface: + +[Documentation] + + Table-Based Testing + + Here, make use of macros, which will be explained later in U10. + +Often, you have a certain pattern to your test cases, i.e. you have a +string that gets converted to a well-known value, like this: + + acceptance_test!(simple, + first: "XIV", 24, + second: "MCDIX", 1409, + third: "MMXXII", 2022, + ); + +The approach is that we pick a module name, a test-case name and then a +list of input and output values. In Rust, this kind of table-based +testing is implemented using macro_rules!: + + use crate_under_test::function; + + macro_rules! acceptance_test { + ($suite:ident, $($name:ident: $input:expr, $output:expr,)*) => { + mod $suite { + use super::*; + $( + #[test] + fn $name() -> () { + let out = function($input); + assert_eq!($output, out); + } + )* + } + }; + } + + Roman Numbers Hands-On + +In the following video, we put this practice in action to solve the +following problem: + + Develop a library function that converts a roman number (e.g. XIV) to + a decimal number (e.g. 14) — and vice-versa. + + GitLab & Testing + +Test coverage results can be observed by GitLab. In -/settings/ci_cd, go +to “Test Coverage parsing” and enter ^\d+.\d+% coverage. The resulting +chart under -/graphs//charts looks like this: + + W02: Work Sheet + +Test-First Rust Coding and Source Control + +Develop Fizz buzz test-first and using a Git repository. Here are the +requirements in prose: + + fizzbuzz is a command-line utility that takes a command-line argument + n and prints all numbers 1 to n (each on a separate line) while + following the Fizz Buzz rules. Every number that is divisible by 3 is + replaced with “Fizz”. Every number that is divisible by 5 is replaced + with “Buzz”. If it is divisible by both, print “FizzBuzz”. + +- Think about which habitat this problem has (consider the Cynefin + model). Explain your choice. + +- Make sure you watched the “Roman Numbers Hands-On” video, showing + you the test-driven development process. + +- Create a GitLab project with a Git repository, named “Fizz Buzz”. + Add the template code to the repository. + +- Optionally set up cargo-tarpaulin (if you are on x86 Linux) and + track your coverage while your code tests and algorithm. Check what + happens if you disable certain tests. + +- Create acceptance tests for a function fizzbuzz(n: u32) -> String. + For each case of the requirements (actual number, Fizz, Buzz, + FizzBuzz), create a dedicated #[test] function. You might also use + the macro-based approach for table-based testing. Each commit should + add either a test or respective incremental code changes (and have a + special form for the commit message, you need that for a later + unit). Use increments, where in each increment you: + + - add test for one more requirement case (commit with message + starting with “test: …”) or + - change the code to make the test pass (commit with message + starting with “feat: …”). + +- Finally, implement the full program that reads the CLI argument and + prints to stdout (commits should again start with “feat: …”). + +GitLab Continuous Integration + +- Extend your fizzbuzz project by your first CI Pipeline with + individual jobs that do the following: + - cargo tarpaulin + - cargo fmt + - cargo clippy +- Verify that they work by temporarily introducing code changes that + make the jobs fail. + + Learning from the Borrow Checker + +The previous sections already showed that the borrow checker might be +strict, but its help is highly appreciated as it ensures memory and +thread safety. So keep in mind: + + The borrow checker is your friend, not your foe. + +In addition to helping with safety, it helps to make programs more +structured. + + Sea or Forest? + +(Source: Programming Rust) + +With the ownership system, Rust discourages the Sea of Objects that is +common in other languages: + + | + V +-------+ + +-------+ +------->| |-------------+ + | |----+ +-------+ V + +-------+ +-------+ + | +------------->| |-------> + | +-------+ | +-------+ + +->| |--+ +-------+ | + +-------+ +--->| |<-------------+ + +-------+ + +In this situation, testing gets hard, as does creation of objects, +following interactions, … + +Rust instead, through ownership, encourages Trees of Objects[4] which +are much easier to reason about, change, and in general: maintain. +Hence, the software can be more dependable, as it’s easier to verify and +adapt. + + +-------+ + | | + +-------+ + | + +------------------+-------------------+ + V V + +-------+ +-------+ + | | | | + +-------+ +-------+ + | + +------------------+-------------------+ + V V + +-------+ +-------+ + | | | | + +-------+ +-------+ + | + +-------------+--------------+ + V V + +-------+ +-------+ + | | | | + +-------+ +-------+ + + Coupling Components + +(Source: Florian Gilcher’s Talk “Ownership and Borrowing from a systems +construction point of view”.) + +When we write software, we develop different components (could be as +simple as a function for now) that are dependent on each other — they +are coupled. You also learned that the borrow checker takes care of +resources (files, sockets, …), making sure that they are dropped when +they are no longer in use. With function signatures, we make it very +clear how the coupling between the caller and the callee is and we +define the handover mechanism for the function parameters. + +Now assume we implement a function that writes a string to a file, and +returns success/error when completed. Let us also assume that the +function is called from some other part of our code e.g. + + // ... mystery code before + let writeResult = write(exampleFile, exampleStringBuffer); + // ... mystery code after + +The location in code from which the write function is called is the call +site, whereas the overall function calling write is the caller. In this +case, write is the callee i.e. the function being called. + +We can come up with at least three different variants: + + fn write(file: File, string_buffer: String) + -> Result { + } + +This variant is called the independent, as caller and callee are not +coupled. Instead, the callee gets both the file and the string and is by +itself responsible for cleaning up (i.e. closing the file eventually and +releasing the string buffer). + + fn write(file: File, string_buffer: &str) + -> Result { + } + +This variant is called the coupled, as the caller maintains ownership of +the string buffer but passes (moves) the file to the callee. However, +the callee can break the coupling as &str can be copied into a String. +So the write function could create its own copy and become independent +from the caller. + + fn write(file: &mut File, string_buffer: &str) + -> Result { + } + +This variant is called the tightly coupled, as File is neither Clone nor +Copy. Hence the callee is dependent on the caller to borrow the file and +maintain ownership. + +Apart from coupled functions, there are also examples in the Rust +standard library where we have coupled types, i.e. a type is depending +on another. An example (about which we learn more in U04) is Vec to +with Iter can be coupled. + +In summary, the ownership and type systems go a long way in making +component coupling clear — and not relying on natural language +explanation in the documentation that is easy to miss/misunderstand. + + U03: Own Your Memory and More + +Are you ready for a short, but highly important, excursion into the one +language feature that sets Rust really apart from other programming +languages? Yes? Ok, then let’s first have a look at memory management +and its pitfalls. With these challenges in mind, Rust’s dependable +Ownership Model will be eye-opening. Its “enforcer”, the so-called +Borrow Checker is a tool to learn from, allowing you to write more +dependable code. + + Memory and its Management in a Nutshell + +Before we look into how Rust enables automatic & safe memory management, +we first have to understand what can go wrong with memory in the first +place. + +Here is a view into a 16-bit- / 2-byte-aligned memory[5] region (each . +is a bit): + + | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | + +--------------------------------------------------| + 0 | . . . . . . . . . . . . . . . . | + 16 | . . . . . . . . . . . . . . . . | + 32 | . . . . . . . . . . . . . . . . | + +An ‘aligned’ memory address is for example 16, which points to the byte +marked with x in the following: + + | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | + +--------------------------------------------------| + 0 | . . . . . . . . . . . . . . . . | + 16 | x x x x x x x x . . . . . . . . | + 32 | . . . . . . . . . . . . . . . . | + +Memory is provided to your program from, for instance, an operating +system or a different lower layer. Working with memory appropriately was +— and is still today — a challenging task. In languages such as C, C++, +and others, it is the software developer’s task to handle memory. This +means to allocate memory (known as malloc) when needed, read and write +to appropriate memory locations and free/deallocate memory when it is no +longer needed. In these languages, you can also use pointers to refer to +memory — even if the location pointed to + +1. does not belong to the program (i.e. cannot be read/written or + both), +2. has not yet been allocated by the program, +3. has been deallocated by the program, or +4. does not exist at all (e.g. pointer to 4711 if you only have 2k = + 2048 bytes of memory or pointer to NULL) + +In computing, some of these mistakes in memory management have special +names. Let’s have a look at each of them individually: + +Using Uninitialized Memory + +Assume that you are allowed to use the following region of memory, but +it has not been initialized. This means that nobody took the effort to +bring it to a well-defined state (e.g. all bits set to 0). Instead, we +find the following seemingly random memory content: + + | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | + +--------------------------------------------------| + 0 | 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 | + 16 | 0 1 0 1 0 1 0 0 1 1 0 1 0 1 0 1 | + 32 | 1 0 0 1 1 1 0 1 1 1 1 1 0 0 1 1 | + +Now assume we say that our variable a is at location 16, hence it has +the bit pattern (right to left = high to low) of 00101010 = 42 [6]. If +we get another region of uninitialized memory, the value might differ. +Hence if our program relies on the value being 0 on initialization, we +are in a bad situation. + +Use After Free / Double Free + +Assume you use the byte following location 32 to store foo in this code: + + #[derive(Debug)] + struct Foo { + bar: u16, + } + + let foo = Foo { bar: 5 }; + println!("{:?}", foo); + drop(foo); // `foo` is freed + println!("{:?}", foo); // `foo` is used after free + +Note that this Rust code does not compile for a reason you learn later. +For now, you should notice that the println! after the drop would be a +use after free. If this were allowed it could happen that the freed +memory is used by someone else and filled with another value than 5, +leading to surprising results. + +A similar situation is caused when a region of memory is freed twice, +which can (in languages such as C) lead to invalid state of memory +allocations. This is called a double free and can lead to security +issues. + +Buffer Over- or Underflow + +While we will look in more detail at arrays later, for now just imagine +that they are a fixed number of elements of same type. Let’s take for +instance foo : [u8; 3], so three bytes located at 16 and marked with 0, +1, 2 in the following memory diagram. + + | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | + +--------------------------------------------------| + 0 | . . . . . . . . . . . . . . . . | + 16 | 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 | + 32 | 2 2 2 2 2 2 2 2 . . . . . . . . | + +If we access foo[n], the compiler translates this into reading bits +\(16 + n \) to \(16 + (n+1) - 1\). Now with this formula, it is +certainly possible to compute bit ranges for \(n = -1 \) or \(n = 5\). +However, if we do this, we access memory that is outside of the region +allocated for foo — a buffer under- or overflow. In memory-safe +languages, this causes an index-out-of-bounds error. In languages such +as C/C++, this is not checked automatically and it is the job of the +developer to ensure the value is never above or below it. + +Null Dereferences + +For a long time, NULL is known to be a dangerous idea[7], however, we +still face it in many popular programming languages. The issue is the +following: If you have a pointer that should point to an object, but, +e.g., does not yet do so, Hoare decided that one would give it the value +of NULL (0 in most languages) to make it clear that it is not yet there. +If a program is to access it, one would first need to check for NULL and +depending on the result do this or that. However, this check is not +mandatory or enforced in many languages. In memory-managed languages, +this leads, e.g., to an NullReferenceException, which is safe but might +crash your program — and can be particularly hard to debug (i.e. finding +out where it became or should become non-null). + +Data Races in Concurrent Access + +Assume for a moment that two threads share this region of memory: + + | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | + +--------------------------------------------------| + 0 | 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 | + 16 | 0 1 0 1 0 1 0 0 1 1 0 1 0 1 0 1 | + 32 | 1 0 0 1 1 1 0 1 1 1 1 1 0 0 1 1 | + +At location 16, a counter variable is stored, which is initially 42. +Both threads now have the task of incrementing it by 10, which should +eventually lead to the counter being 62. The naive version for this +looks like this: + + use std::thread; + + fn increment(mut counter: Counter) { + for _ in 0..10 { + counter.count += 1; + } + } + + #[derive(Debug)] + struct Counter { + count: u32, + } + + fn main() { + let mut counter = Counter { count: 42 }; + let t1 = thread::spawn(|| increment(counter)); + let t2 = thread::spawn(|| increment(counter)); + t1.join().unwrap(); + t2.join().unwrap(); + println!("{:#?}", counter); + } + +At this point, it is essential to tell you that the += operation is +composed of at least three operations: + +- register = load(memory) +- increment(register) +- store(memory, register) + +In a concurrent setting, the three operations for both threads can +interleave in arbitrary order. For example, thread 2 could read 42, then +thread 1 executes fully, and then thread 2 continues. What would be the +result? We assure you that 62 is certainly not the answer. + +A Sidenote on Garbage Collectors + +As of today, there are two approaches to memory management: manual +management and garbage collection. While the former puts a focus on +control, the latter puts it on safety. With Rust, you get both as we see +in the next section. Now why is control important? If you are writing +systems that should impose dependable timing, it is imperative that they +allocate and free memory in an automated and deterministic fashion or +provide you with primitives that allow you to make it deterministic. In +C/C++ these primitives are provided, but the compiler drops all safety +guarantees. In Java, safety is provided, but the compiler drops all +timing guarantees as a piece of memory can be freed at any time after +the last reference to it was invalidated. In the past, there has been +work on real-time garbage collection, but this hasn’t made it into +readily available technology stacks. So Rust provides an interesting +trade-off here so that you neither miss the predictable timing of manual +memory management, nor the safety of garbage collection. This leads to +automatic dependable memory management. + +Onward + +With these five dangerous memory operations in mind, we are ready to +look at Rust’s ownership model as well as other language features that +make these five causes of bugs impossible. + + Ownership in Rust + +Ownership Model and Borrowing + +In Rust, any piece of data (typically called value) is owned by exactly +one owner (i.e., variable or other data structure). When the owner of a +value goes out of scope, the value is Droped. + +Ownership Trees + +The variables of your program act as roots of ownership trees. Let’s +consider the following program: + + fn main() { + let a : (u8, u8) = (5, 7); + } + +Here is how this tuple looks like in memory (we do not show the byte +values): + + | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | + +--------------------------------------------------| + 0 | . . . . . . . . . . . . . . . . | + 16 | +--------------------------------------------+ | + 32 | | a | | + 48 | +--------------------------------------------+ | + 63 | . . . . . . . . . . . . . . . . | + +With a.0 we can access the 0th element of the tuple a. So another view +would be: + + | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | + +--------------------------------------------------| + 0 | . . . . . . . . . . . . . . . . | + 16 | +--------------------+ +---------------------+ | + 32 | | a.0 | | a.1 | | + 48 | +--------------------+ +---------------------+ | + 63 | . . . . . . . . . . . . . . . . | + +Note that in this example, a tree is constructed. a is the root and a.0 +as well as a.1 are children. We can also display it like this: + + Stack of main() + └── a + ├── .0 + └── .1 + +Why Less Power is Sometimes Better + +With the ownership system, Rust becomes less powerful than other +languages, i.e., there are algorithms and data structures you can +express in other languages that you cannot in Rust. In practice, less +power is not always a bad thing. In fact, with the restrictions Rust +imposes, we rule out a lot of programs that are hard to analyze for +correctness or are even fundamentally broken. So you see that this +limited power contributes to our software’s dependability. Later in U13 +we look at unsafe Rust, a superset of safe Rust that allows certain +operations which could (but should not) circumvent ownership. + +Now if we only had single owners for values and they could not be +changed for safety reasons, Rust would indeed be rather limited in its +functionality. Instead, Rust allows the following four operations to +increase its power again: + +- ownership can be moved, i.e. the ownership can be transferred from + one variable to another, e.g. in assignments or function calls +- primitive types that can be copied, allowing for functions to be + called by value +- it is possible to borrow a reference to a value +- the standard library contains generic, reference-counted types + (e.g. Rc) + +move vs. Clone vs. Copy + +As mentioned before, ownership in Rust does not need to be static. The +value can move from one owner to another. In this case, the old place +becomes uninitialized and can no longer be used. Rust checks for this by +disallowing access to the previous owner after the move. + +A type can implement the Copy trait, which indicates that one can create +a duplicate of the original value by copying it bit-by-bit. If a type +implements Copy, moves become copies (e.g. a function parameter that is +Copy is copied when the function is called). This also means that +copying happens implicitly — we never explicitly call a function to do +so (Copy is a marker trait, having no functionality except giving +information to the compiler). The copy is complete (often called deep) +and independent of the original value — changing your copy does not +affect the original. + +A type can implement the Clone trait, which allows us to create +duplicates of types that cannot be copied bit-by-bit. The duplication +logic is implemented in a custom clone function or it can be +automatically derived (if all elements of a type are Clone themselves). +Afterwards, a user can do this explicitly by calling value.clone() and +continuing to work with the return value. Whether the duplicate is +deep/independent is governed by the type for which Clone is +implemented (this is not formalized; you must consider the type +documentation). For String, a clone creates a deep copy that acts +independently from the original. For Rc (reference counter), a clone +creates a shallow copy that stays connected to the other instances. + +Finally, there are types that are neither Copy nor Clone. The major +reason is that safe duplication cannot be done within Rust alone or it +would be misleading/unidiomatic. For example, a File does not implement +either of the traits: Copy does not work as a bit-by-bit copy of the +File struct would not create an independent file on the file system. +Clone could technically work, but what would be the exact semantics? If +we file.clone() what file name would the duplicate have? + +You will learn more about traits later. For now, note that Clone is a +supertrait of Copy, so everything that is Copy must be Clone as well. +While Clone is a trait with an implementation you must derive or +implement, Copy only marks the type. Hence, it should be used with care +and only added to types that really fulfil the bit-by-bit copy-ability. + +Owning Heap-Values + +When you declare variables, the value that they are assigned to +typically lives on the stack. The stack is the area of your memory where +data related to the current scope is stored (e.g. the current function’s +body). However, if you plan to have values that live longer or they are +too large to store and move around on the stack, you must place them on +the heap. In Rust, you can do so by using various types, the easiest of +which is Box. With Box::new(42u16), Rust allocates enough memory on +the heap to store a u16 and returns a Box pointer. Box implements Deref, +which means that in many cases you use it like you would use a u16 +(e.g. calling methods on it). If you want to use it in an operation +(e.g. addition), you have to dereference explicitly using *. Another +example is Vec, which stores a continuous collection of Ts on the +heap (cf. ArrayList in Java). Such continuous memory sections are also +referred to as being contiguous. + +Borrowing References + +With move, Clone and Copy, we are moving and copying data around, which +is not what we want in all cases. Particularly, when the data we operate +on is large and the function we give it to does not need ownership +(because it does not manipulate it in a way that requires ownership). +For these use cases, Rust provides references. You might have heard +about a pointer before in languages such as C/C++. A pointer is a value +that is interpreted as a memory location. These languages also give the +pointer a type to allow for compile-time checks for compatibility +(though in C, e.g., it is easy to cast a pointer to a different type +which is why this is forbidden in MISRA C). + +In Rust, references represent non-owning pointers to data. Doing math on +them is not possible (e.g. shifting it by a couple of bytes) as this can +lead to memory issues. References (e.g. to Point) come in two +flavours: * Shared references, indicated by &, can be used to access the +data read-only. * Exclusive references, indicated by &mut, can be used +to mutate the data. + +Furthermore, at compile-time, they are associated with a lifetime. +Lifetime is a concept within the Rust compiler that tracks the “time” +(portion of the program, actually) between a value being created and +droped. The borrow checker enforces the following about references: * A +reference cannot be created to null (or any other invalid memory +region). * No reference must outlive their referent (this avoids +dangling pointers). * At one point in “time”, there can only ever be +either an arbitrary number of shared or exactly one exclusive +references. * As long as there is an exclusive reference, the original +owner cannot do anything with the data. + +Note that at runtime, the reference again is a memory address +(i.e. pointer), as the lifetimes (and types) are only used at +compile-time and dropped afterwards. As soon as safety has been checked, +there is no need to redo this at runtime. + +You might have heard that lifetimes are hard to understand and an aspect +that really sets Rust apart from other languages. For now, you should +not care too much about lifetimes, because you will not need to +explicitly use them. There are multiple reasons behind: + +1. When you write application code (not libraries), you are in full + control and don’t have to accomodate various use cases of your code. +2. If you run into lifetimes issues by the borrow checker, you can + often cheat by .clone()ing the value. This is not ideal in terms of + performance (you might not really need a clone), but can help you + make progress. Later, you can do performance profiling and figure + out if this clone is really a bottleneck. +3. Lifetime Elision leads to the situation, where Rust can infer the + lifetimes for many common use cases. +4. If you want to write a high-performance library, e.g. some zero-copy + data processing, you should learn in detail about lifetimes. But + this is out of the scope of this course. + +Ownership by Example + +Now, let’s put ownership into practice: Say we have a collection of +numbers that we want to square (we discuss collections in more detail in +U04). A first attempt would look like this: + + fn square_list(list: Vec) -> Vec { + let mut squares = vec![]; + for item in list { // item: u8 + squares.push(item.pow(2)) + } + squares + } + + fn main() { + let list = vec![2,3,4]; + let squares = square_list(list); + // println!("{:#?}", list); <-- does not work as square_list takes ownership of list + println!("{:#?}", squares); + } + +What you see is that we move the list into the function (list parameter +has no & and accessing it afterwards fails to compile). As the result of +our function is a list as well, we return a new Vec (squares) and +list is droped at the end of square_list. This seems to be rather +complicated, given that we only want to generate a list of squares based +on an existing list. + +First, we remove the cannot access list after square_list() issue by +using a reference instead of a move: + + fn square_list(list: &Vec) -> Vec { + let mut squares = vec![]; + for item in list { // item: &u8 + squares.push(item.pow(2)) + } + squares + } + + fn main() { + let list = vec![2,3,4]; + let squares = square_list(&list); + println!("{:#?}", list); // list is no longer moved into square_list + println!("{:#?}", squares); + } + +Instead of using &Vec, we can use &[T], which is a shared Slice a +special form of reference (there is also a &mut [T] exclusive slice). A +slice has a type, a start of a memory region and a count of elements. +Hence, we can do the following (which also allows us to call square_list +with arrays of numbers): + + fn square_slice(list: &[u8]) -> Vec { + let mut squares = vec![]; + for item in list { // item: &u8 + squares.push(item.pow(2)) + } + squares + } + + fn main() { + let list = vec![2,3,4]; + let array = &[5,6,7]; + let squares = square_slice(&list); + println!("{:#?}", list); + println!("{:#?}", squares); + let squares = square_slice(array); + println!("{:#?}", array); + println!("{:#?}", squares); + } + +At this point, the users of our function complain about its performance. +When calling it with large quantities of data, the algorithms seems to +be slow. They also mention that when calling the function, they are only +interested in the result and do not care about the original list. We +look at the function and see that we create a new Vec to insert the data +instead of manipulating the existing data. So we decide to change the +function as follows: + + fn square_slice(list: &mut [u8]) { + for item in list { // item: &mut u8 + *item = item.pow(2); + } + } + + fn main() { + let mut list = vec![2,3,4]; + let array = &mut [5,6,7]; + square_slice(&mut list); + println!("{:#?}", list); + square_slice(array); + println!("{:#?}", array); + } + +Here, we used two additional pieces of syntax: + +- with ., we can interact with the reference and Rust automatically + borrows/dereferences the data. +- with *, we explicitly dereference the mutable borrow so that we can + assign the value to the original memory location. + +A Visual Overview of Ownership + +The concept and syntax associated with Ownership is visualized in the +following diagram: + +{{#include img/rust-move-copy-borrow.svg }} + +Rufflewind, Graphical depiction of ownership and borrowing in Rust, CC +BY 4.0 + +Legend: + +- Move () +- Copy () +- Locked (): original object is locked while borrowed — nothing can be + done with it. +- Frozen (): original object is frozen: non-mutable references can be + taken (but no mutable references and it cannot be moved). + +Revisiting Memory Management Issues in Rust + +Now that we are equipped with some knowledge about the Ownership Model +and the Borrow Checker, we can revisit the memory issues we identified +before. Note that most of these checks are executed at compile-time, +making sure you never can ship software with this issue. + +Uninitialized Memory + +In Rust, you are not allowed to read from a variable that has not been +initialized: + + let v : u32; + println!("{}", v); + +Hence before you read from a variable, you have to first assign it an +initial value. Some data types work in a way that they have a +well-defined initial (or default) value, in which case you are not +required to specify it. In summary, you can rely on the fact that you +are never accessing uninitialized memory and have a memory safety issue. + +Use After Free / Double Free + +In Rust, if a variable is moved or dropped, the original variable +becomes no longer usable. The following example does not compile: + + #[derive(Debug)] + struct Foo { + bar: u16, + } + + let foo = Foo { bar: 5 }; + println!("{:?}", foo); + drop(foo); // `foo` is freed + println!("{:?}", foo); // `foo` would be used after free + +As the compiler states, the drop function takes its parameter by move, +so foo is no longer valid after the call to drop — use after free is +impossible. This also means that a second drop(foo) fails for the same +reason, hence a double free is impossible as well. + +Buffer Over- or Underflow + +In contrast to the other checks, this one is done at run-time — +particularly because the index into a buffer is most of the time dynamic +and not known at compile-time. As opposed to other languages, Rust adds +bound-checking code to all accesses of buffers. Depending on which +access method you use, the out-of-bounds could either trigger a panic!() +or yield an Option::None. + + let v = vec![5, 7, 8]; + let oob = v.get(4); + println!("{:#?}", oob); + println!("{}", v[4]); + +Note that the runtime cost of this check is often negligible as branch +prediction of modern CPUs can often figure out whether the bounds check +succeeds or not. + +Null References + +In Rust, there is no such thing as a NULL constant that can be used. +Instead, null-able references come as Option<&T>, which are None if they +are non-existent. Hence, a developer has to write code in a way that the +None case is handled. There is no way to, by accident, work with a +reference if there is none. + +Data Races in Concurrent Access + +If we take the code from before, consider that we want to work on +mutable references in increment, and have a conversation with the +compiler (implementing suggested fixes iteratively), we eventually +arrive at this: + + use std::thread; + + fn increment(counter: &mut Counter) { + for _ in 0..10 { + counter.count += 1; + } + } + + #[derive(Debug)] + struct Counter { + count: u32, + } + + fn main() { + let mut counter = Counter { count: 42 }; + let t1 = thread::spawn(move || increment(&mut counter)); + let t2 = thread::spawn(move || increment(&mut counter)); + t1.join().unwrap(); + t2.join().unwrap(); + println!("{:#?}", counter); + } + +Here, we again see ownership at work: Rust mandates the move keyword to +bring the counter as a reference to the threads[8]. However, you cannot +move it twice (and eventually try to print it) and violate the only one +single write access rule mentioned above. Also note that the closure +move || ... &mut counter does not make much sense, as you do not have to +move something to get a mutable reference to it. How we can write a +thread-safe variant of this, which counts to 62 as expected, will be +discussed later in U11. + +Other Resources than Memory + +Note that the concept of ownership also helps with other resources that +are not memory: If a variable owns, for instance, a file or network +socket, the ownership enforces safe access and makes sure that the +resource is released on drop. + + Summary + + What did you learn? + +- How Rust’s automated memory management can save you work and avoid + mistakes. +- What ownership means, how it is enforced and how your code is + affected by it. +- How the concept of ownership can contribute to software that is + clearer and easier to maintain. + + Where can you learn more? + +- Rust Book: Ch. 4.1 & 4.2 +- Programming Rust: Ch. 4, 5 +- Rust in Action: Ch. 4.3 & 4.4 +- Rust for Rustaceans: Ch. 2 +- cheats.rs: References & Pointers, Memory & Lifetimes +- Compile-Time Social Coordination (RustConf2021): “This is the story + of how [Zac] stopped stepping on everyone’s toes and learned to love + the borrow checker” +- RAII: Compile-Time Memory Management in C++ and Rust +- Memory Safety Project is about rewriting core Internet technology in + Rust + + W03: Work Sheet + +- Practice Ownership Rules using the Rustlings move_semantics and + primitive_types. + + Closures + +In the previous unit, you have already seen closures in action, often in +the form of helper functions: + + let pow_of_2 = std::iter::successors(Some(1u8), + |n| n.checked_mul(2) // <--- closure + ); + +Closures are anonymous functions with a distinct type and potentially +state associated with them. They are commonly used in iterator methods +(see above), for threading (std::thread::spawn(|| ...)), or default +value methods: + + use std::collections::HashMap; + + let mut map = HashMap::new(); + map.insert("Ferris", 42); + map.entry("Crab").or_insert_with(|| 47); + println!("{:#?}", map); + + Save the Environment + +Closures have a special power, namely that they are able to save their +environment[9]. Again, we already had an example for this in the +previous unit: + + fn fib_iter(n: usize) -> impl Iterator { + let mut state = (1,1); + std::iter::from_fn(move || { + let current = state.0; + state = (state.1, state.0 + state.1); + Some(current) + }).take(n) + } + + fn main() { + for i in fib_iter(5) { + println!("{}", i); + } + } + +Here, from_fn takes a closure. The closure steals the state variable, +which is for now stored next to it and updated whenever the closure’s +code is executed. For the iterator, every time next() is called, the +closure is executed. Note that we have to write move before the closure +to indicate that we want the closure to steal the environment. Without, +the closure is only allowed to borrow its environment (i.e. get & and +&mut references to variables). In this case, it must be ensured that the +closure does not outlive the variables to which it holds references. + +Closures also, practically, save the environment because they are fast +and safe to be used. The compiler is allowed to inline them, achieving +zero overhead costs. + + Function and Closure Types + +Every closure has a distinct type (i.e. two closures with identical +input-output types are still considered different). All closures +implement the FnOnce trait. For reference, all functions are of type +fn(??) -> ?? (lower case) and one can obtain a function pointer for +them. + +As you might already anticipate, there are more traits a closure can +implement. First, let’s look at a closure that drops something it stole +from the environment: + + let v : Vec = vec![]; + let f = || drop(v); + +This closure implements FnOnce because it can only be called once +(otherwise, it would cause a double-free error). Pseudocode for this +trait would look like this: + + trait FnOnce() -> R { + fn call_once(self) -> R; + } + +So self is moved and hence consumed. A different closure is one that +only modifies the environment: + + let mut i = 0; + let mut incr = || { + i += 1; + println!("Incremented! i is now {}", i); + }; + incr(); + incr(); + +This closure implements FnMut, as it can mutate the environment. The +pseudocode looks like this: + + trait FnMut() -> R { + fn call_mut(&mut self) -> R; + } + +Finally, a closure that only reads from the environment is a Fn: + + trait Fn() -> R { + fn call_mut(&self) -> R; + } + +Here is a Venn Diagram of closure traits: + + +-------------------------------------+ + | FnOnce(), e.g. || drop(v) | + | +---------------------------------+ | + | | FnMut(), e.g. |arg| v.push(arg) | | + | | +-----------------------------+ | | + | | | Fn (), | | | + | | | e.g. |arg| arg + 1 | | | + | | | or |arg| v.contains(arg) | | | + | | +-----------------------------+ | | + | +---------------------------------+ | + +-------------------------------------+ + +What we can deduce from this is that it is possible to pass a Fn to a +function that takes a FnOnce, but the opposite does not work. + +Closures are like any other value, hence they can be assigned to +variables, as you have seen above. They can also be moved/copied or +cloned, depending on their type. If a closure only holds references and +does not mutate (Fn), it can be copied and cloned. If it mutates, it can +be neither Clone nor Copy, because we would then have multiple mutable +references and violate memory safety guarantees. For move closures, it +depends on the type of values that are moved into the closure. If they +are all clone or copy, the closure is clone or copy, respectively: + + let mut greeting = String::from("Hello, "); + let greet = move |name| { + greeting.push_str(name); + println!("{}", greeting); + }; + greet.clone()("Ferris"); + greet.clone()("Hobbes"); + +Orthogonal to the traits a closure implements, the lifetime of the +closure is also part of its type. Hence, you can have 'static Fn (which +can be called everywhere as its lifetime is the whole program) or +'a FnOnce (which can only be called as long as 'a lives and only once) +as well as all other permutations of lifetimes and closure traits. + + Collections + +In this section, we take a closer look at three common collections that +help you work with multiple items at the same time. + + Vector Vec + + This section is intentionally kept brief and you should read the + excellent chapter 8.1 of the Rust book if you have any doubts or want + a more in-depth introduction to vectors. + +Our first type is the vector Vec, which can be created and updated as +follows: + + struct Point { + x: u8, + y: u8 + } + + let points: Vec = Vec::new(); + + let mut points: Vec = vec![Point { x: 0, y: 1 }, Point { x: 2, y: 3 }]; + points.push(Point { x: 0, y: 0 }); + +A vector represents a continuous memory region on the heap, consisting +of elements of type T: + + +---------+--------------+---------+ + Stack: v = | buffer | capacity = 4 | len = 3 | + +----+----+--------------+---------+ + | + V + Heap: +----+----+----+----+ + | 27 | 31 | 42 | | + +----+----+----+----+ + +In contrast to LinkedList, vectors are known to be more efficient as +fewer pointers must be dereferenced and fewer random accesses happen. +When we access element with index i in the vector, we can use either +v[i] (which panics if the index is out of bounds) or the more robust +v.get(i) that returns an Option, which is None if the index is out of +bounds. These index-based accesses are very efficient due to the fact +that the elements are stored contiguously. The same holds for iteration, +which can be easily done with for element in v. + +The vector also supports adding elements at the end using +push(element: T) and removing elements from the end with +pop() -> Option. Further, swap(a: usize, b: usize) is efficient as +the two memory regions can be moved. Note that, when you insert or +remove elements, the data structure can do “reallocations” (e.g. when +the capacity is reached and another element is added, or when we remove +from the front). Hence, it is good practice to: + +- Not use this data structure if you often remove from the front. A + better choice would be VecDeque in this case. +- Use capacity information whenever it is available. For instance, + when you create a new vector to put a list of elements into it, + initialize the Vec::with_capacity, avoiding reallocations. + +Finally, a vector implements the following useful methods: + +- with join(&self, sep: Separator), we can flatten the vector, + inserting a separator in between elements + (e.g. ["Hey", "Ferris"].join(" ") -> "Hey Ferris") +- we can sort and search a vector +- using the third party rand crate, it is easily possible to shuffle + or choose from a vector. + +In other languages, you might have already encountered iterator +invalidation errors (in Java this is known as the runtime +ConcurrentModificationException) which happen when you attempt to +manipulate an iterator while you iterate over it. Consider the following +attempt to extend a list of even numbers by the missing odd numbers: + + let mut v = vec![0,2,4,6]; + for element in v { + v.push(element + 1) + } + +Note that in this situation we have undefined behaviour… how would you +handle adding the element to the list? Would it become part of the +iteration, which in this case would lead to an infinite loop? Or would +you keep the old iterator and the new elements separate? + +Fortunately, Rust prevents this behaviour using its ownership system: +The code does not compile, which is advantageous over Java’s runtime +error. To understand this, let’s have a closer look at the expansion of +the for-loop: + + let mut iterator = (v).into_iter(); + while let Some(element) = iterator.next() { + v.push(element + 1) + } + +We see two accesses to v with the following function signatures: + +- fn into_iter(self) -> Self::IntoIter which takes v as self +- fn push(&mut self, value: T) which takes v as &mut self + +Due to the move in into_iter, v can no longer be borrowed mutably for +push. rustc suggests to borrow v instead of move, which leads to the +following situation: + + let mut iterator = (&v).into_iter(); + while let Some(element) = iterator.next() { + v.push(element + 1) + } + +Now we hold an immutable reference to v, which disallows to get another +mutable reference to v to execute push. So whatever we do, iterator +invalidation is not possible. + + Dictionary HashMap + + This section is intentionally kept brief and you should read the + excellent chapter 8.3 of the Rust book if you have any doubts or want + a more in-depth introduction to hash maps. + +For use cases where each element of type V has an associated key of type +K, we can employ a HashMap that acts as a lookup table or dictionary. +This data structure is particularly efficient when we want to look up a +value with a special key. + +In memory, a HashMap looks like this: + + +---------+---------------+-------+ + Stack: v = | len = 4 | table_size: 8 | table | + +---------+---------------+---+---+ + | + +-------------------------+ + V + Heap: +------+------+------+------+------+------+------+------+ + Hash Code: | cafe | 0 | c0de | dead | 0 | 0 | 0 | 4b1d | + +------+------+------+------+------+------+------+------+ + Key: | 7 | | -3 | 42 | | | | 28 | + | | | | | | | | | + Value: | H | | e | H | | | | o | + +------+------+------+------+------+------+------+------+ + +Similar to vectors, we can collect into HashMaps and add elements like +this: + + use std::collections::HashMap; + + let key_values = vec![(7, 'H'), (-3, 'e')]; + let mut map : HashMap<_, _> = key_values.into_iter().collect(); + map.insert(42, 'H'); + println!("{:#?}", map); + +Again, if we know how many elements we are going to have, initializing +with_capacity is more efficient. + +What is special about HashMaps is how to get elements. While get is +implemented similar to Vec::get, the entry() API is more commonly used: + + use std::collections::HashMap; + + let mut letters = HashMap::new(); + + for ch in "a practical course for computer scientists".chars() { + let counter = letters.entry(ch).or_insert(0); + *counter += 1; + } + println!("{:#?}", letters); + +Here, the entry() call returns either a Occupied or Vacant variant. This +makes it very easy to initialize an entry with a default value, +e.g. using the or_insert(self, default: V) or or_default(self) methods +as shown above. + +Finally, we can iterate over a HashMap, which gives us both keys and +values: + + use std::collections::HashMap; + + let key_values = vec![(7, 'H'), (-3, 'e')]; + let mut map : HashMap<_, _> = key_values.into_iter().collect(); + map.insert(42, 'H'); + + for (k,v) in map { + println!("K: {}, V: {}", k, v); + } + + Set HashSet + +Finally, we look at HashSet, which is used for situations where you want +to have set semantics: any instance of type T can be in the set only +once. A major benefit of sets is their fast membership testing function +contains. A set can be used as follows: + + use std::collections::HashSet; + + let mut set : HashSet<_> = [4,5,4].into_iter().collect(); // duplicates are removed + set.insert(5); + set.insert(8); + println!("{:#?}", set); + set.extend(vec![7, 5, 3].into_iter()); + println!("{:#?}", set); + +Sets also support typical set operations such as intersection, union, +and difference, and we can also iterate over sets: + + use std::collections::HashSet; + + let setA : HashSet<_> = [1,2,3].into_iter().collect(); + let setB : HashSet<_> = [2,3,4].into_iter().collect(); + for i in setA.intersection(&setB) { + print!("{} ", i); + } + println!(""); + + for i in setA.union(&setB) { + print!("{} ", i); + } + println!(""); + + for i in setA.difference(&setB) { + print!("{} ", i); + } + println!(""); + + BTrees + +Finally, it should be noted that there are also collections that +leverage B-trees, namely BTreeMap and BTreeSet. While the Hash* +variants require you to implement Hash for T, the BTree* variants +require the Ord trait. Depending on your usecase and performance +considerations, one might be better suited than the other. + + Enumerations + +While structures serve to group behaviour and data, this section covers +enumerations (also known as sum types, discriminated unions, or +algebraic data types) that groups variants and behaviour. First, we +cover C-style enumerations that only cover variants, while later, we see +that Rust also allows variants to carry data. + + This section is intentionally kept brief and you should read the + excellent 6th chapter of the Rust book if you have any doubts or want + a more in-depth introduction to enumerations. ## C-Style Enumerations + +Here is how you can define a simple enum: + + enum Ordering { + Less, + Equal, + Greater, + } + +In memory, these values are stored as integers. You can also pick +distinct values for it: + + enum HttpStatus { + Ok = 200, + NotModified = 304, + NotFound = 404, + ... + } + +When you want to convert, you can use the as syntax: + + assert_eq!(HttpStatus::NotFound as i32, 404); + +The other direction, however, is not allowed easily, as you could +attempt to convert a number that has no matching enum variant. Instead, +you have to write your own checked conversion: + + fn http_status_from_u32(n: u32) -> Option { + match n { + 200 => Some(HttpStatus::Ok), + 304 => Some(HttpStatus::NotModified), + 404 => Some(HttpStatus::NotFound), + ... + _ => None, + } + } + +The enum_primitive crate provides similar functionality. + +Similar to deriving traits for structs, you can also derive traits for +enums. Finally, you can also implement methods on enums as you will see +in the next section. + + Enum Variants with Data + +Adding data to enum variants can use tuples or structs (and even +arbitrary combinations of the two). Here is how to declare enum tuple +variants: + + enum HttpMessage { + Empty(HttpStatus), + Content(HttpStatus, String) + } + +Certain HTTP messages do not contain a body (e.g. Not Modified), while +others carry both a status and the content: + + let awesome = HttpMessage::Content(HttpStatus::Ok, "Ferris is awesome!".to_string()); + +Here is how to declare structure variants; the major benefit being that +fields are named: + + enum Shape { + Rectangle { width: u32, height: u32 }, + Square { side_length: u32 }, + } + + Generic Enums + +While you learn about generics in a later unit, assume for now that +generic enums can be defined once and are instantiated for different +types. You already met two of these: + + enum Option { + Some(T), + None, + } + + enum Result { + Ok(T), + Err(E), + } + +These two types are common in the Rust standard library and are covered +in detail in a later unit. + +Let’s define a generic list that can store any type T: + + enum List { + Empty, + NonEmpty(Box>), + } + + struct ListNode { + element: T, + next: List, + } + +Each list is either empty or non-empty. If it is non-empty, it contains +a heap-allocated ListNode. Each list node has an element of type T and a +next list. Here is how we build a list: + + use self::List::*; + let cah = NonEmpty(Box::new(ListNode { + element: "Calvin & Hobbes", + next: Empty, + })); + let peanuts = NonEmpty(Box::new(ListNode { + element: "Peanuts", + next: cah, + })); + +As soon as we know more about pattern matching, we learn how to create a +convenient add method. + + Enums for Dependability + +Enumerations support dependable code in at least two ways: + +1. Misuse-resistant storing of data in related variants. +2. Misuse-resistant encoding of boolean values. + +Store data where it belongs + +By allowing to store data in an enum variant, we get the opportunity to +only store it where it is needed. Languages that do not provide enums +with data often resort to solutions that are not safe to use by a +developer. This safe solution: + + enum Variants { + First(boolean), + Second(i32), + } + +is then replaced with an easy-to-misuse solution: + + enum Variant { + First, + Second + } + + struct Variants { + variant: Variant, + first_boolean: boolean, + second_i32: i32, + } + +In this solution, the variant is decoupled from the data that is stored +inside, leading potentially to invalid accesses (variant is First, but +access second_i32). + +Boolean values revisited + +Another use case of enums are boolean values. In languages where enums +are not commonplace, you often run into the following issue. Assume your +hardware access library has the following method defined: + + fn configure_pin(is_disabled: boolean, is_output: boolean); + +Assume it is used here: + + configure_pin(false, false) + +Now, as a developer, it is your job to quickly and faithfully state if +the pin is enabled and an output pin. As you might realize, you easily +get confused with the negations (enabled = (is_disabled == false)). +Often, people argue that this is the only way to do it for efficiency +reasons (i.e. bools are cheaper to store than other types). On most +systems, this is non-sense as booleans are put into the smallest unit of +memory, which is often a byte. Hence, we can afford to replace boolean +with expressive enums: + + enum Status { + Enabled, + Disabled, + } + + enum Mode { + Output, + Input, + } + + fn configure_pin(status: Status, mode: Mode); + +The equivalent usage to the statement above then reads like: + + configure_pin(Status::Enabled, Mode::Input); + +making it crystal clear what the developer intended — without +compromising on efficiency (enum size is still a byte as this is enough +to express two variants). + + Iterators + +Ever since the creation of LISP (short for LISt Processor), developers +have been concerned with effective ways to work on lists of things. +Nowadays, we often talk about streams or iterators, which are a +generalization of lists; an iterator produces elements until it is +exhausted. A list could be the source of an iterator (we iterate over a +list) or the target of an iterator (we collect an iterator into a list). + +In general, iterator pipelines have the following shape: + + + ++=========++ +--------+ ++==========++ + || Produce ++---+> Adapt +---++> Consume || + ++=========++ +--------+ ++==========++ + +First, items of an iterator are produced (e.g., using a range or +collection). Afterwards, they might be adapted through one or more steps +(e.g., filtered, mapped, …). Eventually, they must be consumed (i.e., +touching each item or storing it into a value). + +The last step is extremely important, as iterators in Rust are lazy. +This means that without a consuming step, no item will ever be produced +or adapted. Instead, the consumer drives the iterator, by attempting to +consume item after item from the previous step, which in turn consumes +its previous step and so on. + + Iterator Trait + +Before we look at different ways to use producers, adapters, and +consumers, we look at the general form an iterator has, which is defined +by the Iterator trait in the standard library: + + trait Iterator { + type Item; + fn next(&mut self) -> Option; + } + +This tells us that each iterator has a unique Item type, specifying +which kind of items it produces. next is the function that is called to +get another item from the iterator. As it is an Option, we can either +have Some(Item) or None. In the latter case, the iterator is considered +as consumed or depleted, i.e. it is not yielding more items. + + A Minimal Pipeline + +The most common and straightforward producer is an inclusive range that +is implemented with (1..=n) (if you leave out the = it becomes +excluding, leaving n out). A common way to consume it is by using a for +loop that is designed for this use case: + + let r = (0..=5); + for element in r { + println!("{}", element); + } + +The for loop is shorthand for directly accessing the iterator’s next +method like this: + + let r = (0..=5); + let mut iterator = (r).into_iter(); + while let Some(element) = iterator.next() { + println!("{}", element); + } + + Producers + +Let’s have a look at how we can produce an iterator in the first place. +A general form is the std::iter::from_fn function, where the closure we +pass to the function produces one item after the other: + + fn fib_iter(n: usize) -> impl Iterator { + let mut state = (1,1); + std::iter::from_fn(move || { + let current = state.0; + state = (state.1, state.0 + state.1); + Some(current) + }).take(n) + } + + fn main() { + for i in fib_iter(5) { + println!("{}", i); + } + } + +That is quite a lot in one take, so let’s walk through it. First, we +encapsulate the Fibonacci iterator into a function. The function returns +impl Iterator, an existential type. You can think of this as +it returns something that is an iterator and produces u32s. The compiler +figures out which type it has exactly (actually the Take type). + +Now let’s have a look at the function body. We start with a state that +captures the current pair of Fibonacci numbers (we always need the +current and the last to compute the next). With move we move the state +into the closure (more on closures in the next section; for now this is +just a function with state). The closure itself is then straightforward +if you know how to compute Fibonacci. We take the current number, +produce the next pair of numbers and return it. We have to wrap the +value in Some(), as the closure must return an Option. If we were +returning None in one step, the iteration would end. + +Wait a minute… so we never return None and the iterator never ends? This +is correct, we produced an infinite iterator here (which makes sense as +the Fibonacci sequence is infinite too). In the next step, we use the +.take(n) adapter to reduce the sequence to the first n elements. + +Rust also provides us with common iterators: + + let once = std::iter::once(42); + for item in once { + println!("{}", item); + } + + let repeat = std::iter::repeat(5).take(5); + for item in repeat { + println!("{}", item); + } + +So wrapping a single value in a 1-element iterator or repeating it +infinitely work right away. + +Another way to create a sequence (those that only depend on one last +item) is the std::iter::successors method. Here is how we generate +powers of two: + + let pow_of_2 = std::iter::successors(Some(1u8), |n| n.checked_mul(2)); + for item in pow_of_2 { + println!("{}", item); + } + +Note that we do not have to take this apparently infinite iterator. The +reason is that checked_mul returns None when the type (u8 in this case) +would overflow. + +Finally, the Result and Option types are also producers for iterators. +For Option, we consider the Some variant as a 1-element iterator, while +the None variant is an empty iterator. For Result, we have adapter +methods that work on either the success type or the error type, so we +can write different code for the different cases. + + Adapters + +You already saw the take adapter for taking a number of elements from +the iterator. This is often used together with the skip() operator that +leaves some elements out before we take some: + + let sequence = std::iter::successors(Some(1u8), |n| n.checked_mul(2)) + .skip(2) + .take(3); + for item in sequence { + println!("{}", item); + } + +Another common use case is to map each element to something else: + + let pow_of_2 = (2..5).map(|n| 2_i32.pow(n)); + for item in pow_of_2 { + println!("{}", item); + } + +We can also leave out elements we are not interested in: + + let odd_numbers = (0..10).filter(|n| n % 2 == 1); + for item in odd_numbers { + println!("{}", item); + } + +This can also be combined into a single adapter: + + let odd_squares = (0..10).filter_map(|n| + if n % 2 == 1 { + Some(n * n) + } else { + None + }); + for item in odd_squares { + println!("{}", item); + } + +Sometimes, we have iterators of iterators and want to turn this into a +flat sequence: + + use std::collections::BTreeMap; + + let mut comics = BTreeMap::new(); + comics.insert("Peanuts", vec!["Charlie", "Linus", "Lucy", "Snoopy"]); + comics.insert("Calvin & Hobbes", vec!["Calvin", "Hobbes", "Susie"]); + + for character in comics.values().flatten() { + println!("{}", character); + } + +When developing an iterator pipeline, it can be helpful to inspect a +pipeline by looking at each item immutably, e.g., to print it: + + use std::collections::BTreeMap; + + let mut comics = BTreeMap::new(); + comics.insert("Peanuts", vec!["Charlie", "Linus", "Lucy", "Snoopy"]); + comics.insert("Calvin & Hobbes", vec!["Calvin", "Hobbes", "Susie"]); + let all_characters : Vec<_> = comics + .values() + .inspect(|value| { println!("Before {:?}", value); }) + .flatten() + .inspect(|value| { println!("After: {}", value); }) + .collect(); + println!("All: {:?}", all_characters); + +Multiple iterators can also be chain-ed together: + + let range = (0..5).chain((7..14)); + for item in range { + println!("{}", item); + } + +In some situations, we are not only interested in the element, but also +the index of the element in the iterator: + + for (i, item) in (5..10).enumerate() { + println!("{}th: {}", i, item); + } + + Consumers + +Eventually, when we have produced and adapted our iterators, we need to +consume them. You already saw for, but note that there are actually +three variants of it: + +- for element in &collection { ... }: items are taken as shared + references +- for element in &mut collection { ... }: items are taken as mutable + references +- for element in collection { ... }: items are moved out of the + collection (which gets invalidated afterwards) + +Often, we are also interested in accumulating the collection using +count, sum, or product: + + fn triangle(n: u64) -> u64 { + (1..=n).sum() + } + + fn factorial(n: u64) -> u64 { + (1..=n).product() + } + + fn main() { + let n = 5; + println!("Triangle {}: {}", n, triangle(n)); + println!("Factorial {}: {}", n, factorial(n)); + } + +We can also identify the largest or smallest element: + + println!("Max: {:?}", [-7, 5, 0, 28, -2].iter().max()); + println!("Min: {:?}", [-7, 5, 0, 28, -2].iter().min()); + +Another common use case is fold, where we accumulate the elements using +a custom initial value and accumulation function: + + let a = [1, 2, 3, 4, 5]; + println!("Sum: {}", a.iter().fold(0, |n, i| n + i)); + println!("Product: {}", a.iter().fold(1, |n, i| n * i)); + +Finally, we get to the most powerful consumer function: collect. With +collect, we can turn an iterator into a collection. Above, you already +saw how we collected the characters into a Vec. We can also collect into +HashMaps: + + use std::collections::HashMap; + + let comics = ["Peanuts", "Calvin and Hobbes"]; + let start_dates = [1950, 1985]; + let start_dates = comics + .iter() + .zip(start_dates.iter()) + .collect::>(); + println!("{:?}", start_dates); + +As collect can work by converting into different collections, you often +either need to annotate the let declaration with a type or use the +turbofish ::<> operator. The _ is used to run type inference, as the +Rust compiler can figure out that we use &str keys and u32 values from +the rest of the code. + +When working with Result<_>, collect is also handy as it can turn an +iterator of results into a result of a collection or the first error +that occurred: + + fn open_file(&self, path: String) -> Result { /* ... */ } + fn to_hashmap(self, paths: Vec) -> Result, IoError> { + paths.into_iter() // Iterator + .map(|path| open_file(path)) // Iterator> + .collect() // Result, IoError> + } + + Custom Iterator + +Before we close this section, we want to implement a custom iterator by +hand. Following the idea of the std::iter::once iterator, we create the +extremely helpful Twice iterator: + + struct Twice { + count: u32, + element: u32, + } + + + fn twice(element: u32) -> Twice { + Twice { + count: 0, + element, + } + } + + impl Iterator for Twice { + type Item = u32; + fn next(&mut self) -> Option { + if self.count >= 2 { + None + } else { + self.count += 1; + Some(self.element) + } + } + } + + fn main() { + let t = twice(5); + let c = t.collect::>(); + println!("{:?}", c); + assert_eq!(c, vec![5,5]); + } + + U04: Putting Data Together… and Apart + +Now that you know the fundamentals of Rust, we learn how we can use +parts of the Rust standard library and language to build more advanced +programs that process data, i.e. compute in memory (as opposed to +interacting with the network or operating system). This includes: + +- Structures as well as Enumerations to put related data together, + including behaviour (with methods). +- Deconstructing this related data again using Patterns. +- Leveraging Iterators that allow you to work with Collections of + data. +- Closures that act as callable inputs to functions or to be stored + inside structures. +- Finally, Strings deserve a special mention as a collection for + characters, including the intricacies of human writing systems. + + Patterns + +While Rust offers structs and enums to group together data, it also +provides means to destructure / decompose the same: patterns. + + This section is intentionally kept brief and you should read the + excellent 6th and 18th chapter of the Rust book if you have any doubts + or want a more in-depth introduction to patterns. + +Using a match statement, we can for instance implement useful methods on +the HttpStatus enumeration: + + impl HttpStatus { + fn message(self) -> &'static str { + match self { + Self::Ok => "200: Ok", + Self::NotModified => "304: Not Modified", + Self::NotFound => "404: Not Found", + ... + } + } + } + +This is also the case for patterns that contain data: + + enum List { + Empty, + NonEmpty(Box>), + } + + impl List { + fn head(self) -> Option { + match self { + List::Empty => None, + List::NonEmpty(node) => { + Some(node.element) + } + } + } + } + +Let’s have a look at how this matching is done by executing this piece +of code: + + let mut list = List::Empty; + list.add(5); + list.add(7); + assert_eq!(list.head(), Some(5)); + +When we run head(), self is passed into the match statement +pattern-by-pattern from top to bottom: + + value: List::NonEmpty(ListNode { element: 5, next: ... }) + | + X + | + pattern: List::Empty + +Hence, the first pattern is not matched and we continue with the next: + + value: List::NonEmpty(ListNode { element: 5, next: ... }) + | | + OK | + | V + pattern: List::NonEmpty(node) + +This matches with node = ListNode { element: 5, next: ... } and the +method returns Some(5). + + Pattern Types + +In Rust, patterns are very powerful and they can match on a lot of +different things: + +- Literals (e.g. 1 or "foo") + +- Ranges (e.g. 0..=42) + +- Wildcard, i.e. anything (_) + +- Variables, i.e. the value that matches is assigned to a local + variable (name, mut count) + +- Enum variants (as seen above) + +- Tuples (e.g. (key, value)) + +In the following, we give a couple of examples. + + Literal and Variable Matching + +Here is for instance a modified conversion method of the http_status +conversion method: + + fn http_status_from_u32(n: u32) -> Result { + match n { + 200 => Ok(HttpStatus::Ok), + 304 => Ok(HttpStatus::NotModified), + 404 => Ok(HttpStatus::NotFound), + code => Err(ParseError(format!("Invalid code {}", code))), + } + } + +Here, any code that is not matched by the initial literals is assigned +to code and used to create the Err variant of the Result return type. + + Struct Matching + +Consider the List type we defined in the last section. Using struct +matching, we can implement the add method: + + impl List { + fn add(&mut self, value: T) { + match *self { + List::Empty => { + *self = List::NonEmpty(Box::new(ListNode { + element: value, + next: List::Empty, + })) + } + List::NonEmpty(ref mut node) => { + node.next.add(value); + } + } + } + } + +Using ref mut, we borrow node mutably, so that we can add the value to +it (or recurse again to eventually add it to the last element). + + Matching Multiple Options + +Furthermore, we can combine multiple matches into one, e.g., for another +version of FizzBuzz: + + fn fizzbuzz(n: u32) -> String { + match n % 15 { + 0 => format!("FizzBuzz"), + 3 | 6 | 9 | 12 => format!("Fizz"), + 5 | 10 => format!("Buzz"), + n => format!("{}", n), + } + } + +The | acts as an or so any of the options lead to a match of the +respective arm. + + Dependable Patterns + +With pattern matching, multiple things can go wrong. If you know switch +statements from other languages, you know that in most cases, you have +to put a break; at the end of a case: + + case 3: + case 6: // <- 3 and 6 are used together + result = "Fizz"; + break; + case 10: + result = "Buzz"; + case 0: + result = "FizzBuzz"; + break; + +This code contains an error. Namely, case 10: leads to +result = "FizzBuzz" as break is missing. In Rust, this cannot happen and +any match arm is clearly mapped to a single expression and +multi-matchings are done with |. + +Another aspect are two properties match statements can have: they can be +exhaustive and/or overlapping. + +The first property, exhaustiveness is checked by the compiler. You can +validate this by running the following example: + + enum Variants { + FirstHandeled, + Second + } + impl Variants { + fn foo(self) -> String { + match self { + Self::FirstHandeled => format!("foo"), + } + } + } + +As you can see, the Rust compiler rejects this code with an error. + +For the second property overlap, there is also a check: + + fn foo(n: u32) -> String { + match n { + 0..=9 => "Below 10".to_string(), + 0..=19 => "Below 20".to_string(), + n => format!("{} is nothing special", n), + } + } + + fn main() { + println!("{}", foo(42)); + } + +Note that the code here in the book does not present you with warnings. +Here is what you get when you copy the code into a file (e.g., +overlap.rs) and run it with cargo clippy: + + ❯ cargo clippy + warning: some ranges overlap + --> src/overlap.rs:3:9 + | + 3 | 0..=9 => "Below 10".to_string(), + | ^^^^^ + | + = note: `#[warn(clippy::match_overlapping_arm)]` on by default + note: overlaps with this + --> src/overlap.rs:4:9 + | + 4 | 0..=19 => "Below 20".to_string(), + | ^^^^^^ + = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#match_overlapping_arm + +A special case of overlapping is unreachable, where a pattern cannot be +reached because a previous pattern was already covering all cases. Here, +the compiler (not clippy) warns in a way similar to other forms of +unreachable code. + +In summary, Rust ensures that your patterns are exhaustive and warns you +if you made them overlapping by accident. + + S04: Sample Solution + +Lists + + #[derive(PartialEq, Debug)] + enum List { + Empty, + NonEmpty(Box>), + } + + #[derive(PartialEq, Debug)] + struct ListNode { + element: T, + next: List, + } + + impl List + where + T: Copy, + { + fn add(&mut self, value: T) { + match *self { + List::Empty => { + *self = List::NonEmpty(Box::new(ListNode { + element: value, + next: List::Empty, + })) + } + List::NonEmpty(ref mut node) => { + node.next.add(value); + } + } + } + + fn length(self) -> usize { + match self { + List::Empty => 0, + List::NonEmpty(node) => 1 + node.next.length(), + } + } + + fn head(self) -> Option { + match self { + List::Empty => None, + List::NonEmpty(node) => Some(node.element), + } + } + + fn tail(self) -> List { + match self { + List::Empty => List::Empty, + List::NonEmpty(node) => node.next, + } + } + + fn get(&self, index: usize) -> Option { + match index { + 0 => match self { + List::Empty => None, + List::NonEmpty(node) => Some(node.element), + }, + _ => match self { + List::Empty => None, + List::NonEmpty(node) => node.next.get(index - 1), + }, + } + } + } + + fn main() { + let mut list = List::Empty; + list.add(5); + list.add(7); + assert_eq!(list.head(), Some(5)); + } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn test_head() { + let mut list = List::Empty; + list.add(5); + list.add(7); + assert_eq!(list.head(), Some(5)); + } + + #[test] + fn test_tail() { + let mut list = List::Empty; + list.add(5); + list.add(7); + + let mut list_2 = List::Empty; + list_2.add(7); + assert_eq!(list.tail(), list_2); + } + + #[test] + fn test_length() { + let mut list = List::Empty; + list.add(5); + list.add(7); + assert_eq!(list.length(), 2); + } + + #[test] + fn test_get() { + let mut list = List::Empty; + list.add(5); + list.add(7); + assert_eq!(list.get(0), Some(5)); + assert_eq!(list.get(1), Some(7)); + assert_eq!(list.get(2), None); + } + } + +Shape Library + + use std::f64::consts::PI; + + enum Shape { + Rectangle { width: u32, height: u32 }, + Square { side_length: u32 }, + Circle { radius: u32 }, + } + + impl Shape { + fn area(self) -> f64 { + match self { + Shape::Rectangle { width, height } => (width * height).into(), + Shape::Square { side_length } => side_length.pow(2).into(), + Shape::Circle { radius } => PI * (radius.pow(2) as f64), + } + } + + fn circumference(self) -> f64 { + match self { + Shape::Rectangle { width, height } => (2 * width + 2 * height).into(), + Shape::Square { side_length } => (4 * side_length).into(), + Shape::Circle { radius } => 2.0 * PI * (radius as f64), + } + } + } + + fn main() {} + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn test_area_rectangle() { + let rect = Shape::Rectangle { + width: 4, + height: 3, + }; + assert_eq!(rect.area(), 12.0); + } + + #[test] + fn test_area_square() { + let square = Shape::Square { side_length: 5 }; + assert_eq!(square.area(), 25.0); + } + + #[test] + fn test_area_circle() { + let circle = Shape::Circle { radius: 3 }; + assert_eq!(circle.area(), PI * 9.0); + } + + #[test] + fn test_circumference_rectangle() { + let rect = Shape::Rectangle { + width: 4, + height: 3, + }; + assert_eq!(rect.circumference(), 14.0); + } + + #[test] + fn test_circumference_square() { + let square = Shape::Square { side_length: 5 }; + assert_eq!(square.circumference(), 20.0); + } + + #[test] + fn test_circumference_circle() { + let circle = Shape::Circle { radius: 3 }; + assert_eq!(circle.circumference(), PI * 6.0); + } + } + +Iterative FizzBuzz + + fn fizz_iter(n: usize) -> impl Iterator { + let mut state = 0_usize; + std::iter::from_fn(move || { + let msg = match state % 15 { + 0 => format!("FizzBuzz"), + 3 | 6 | 9 | 12 => format!("Fizz"), + 5 | 10 => format!("Buzz"), + state => format!("{}", state), + }; + state += 1; + Some(msg) + }) + .take(n) + } + + fn main() { + for i in fizz_iter(11) { + println!("{}", i); + } + } + +Word Count + + use std::collections::HashMap; + use std::env; + use std::fs; + + fn main() { + let args: Vec = env::args().collect(); + let filename = &args[1]; + let file_content = fs::read_to_string(filename).expect("Something went wrong reading the file"); + + let mut words = HashMap::new(); + for word in file_content + .lines() + .flat_map(|line| line.split_whitespace()) + .collect::>() + { + let counter = words.entry(word).or_insert(0); + *counter += 1; + } + println!("{:#?}", words) + } + +Closure Types + +- closure0: Function Pointer fn(u16) -> u16, implements FnOnce, FnMut, + Fn +- closure1: No pointer, implements Fn, FnMut, FnOnce +- closure2: No pointer, implements FnOnce +- closure3: No pointer, implements FnMut, FnOnce + + Strings + + Strings are complicated! + +When working with collections of characters, we encounter all the +different issues that we have with written human language (e.g. what is +a character symbol, how many are there of it, do we read from +left-to-right or the other way round, …). + + This section is intentionally kept brief and you should read the + excellent chapter 8.2 of the Rust book if you have any doubts or want + a more in-depth introduction to strings. + + Unicode first + +In contrast to other, older languages, Rust has been able to leverage +Unicode quite from the start (others needed major updates to enable +Unicode in all places). The topic itself is so complicated that there +are dedicated books to this, so we only provide a short overview here. + +One of the first and still existing standardized character encoding +approaches is the American Standard Code for Information Interchange +(ASCII). ASCII uses seven bits, giving meaning to the values 0x00 to +0x7f. ISO/IEC 8859-1 is the Western European superset of ASCII which +uses 8 bits (0x00 to 0xff) to also encode characters such as ö, ç or ø. +In Unicode, this is called the Latin-1 code block. In Rust, the String +and str types use the UTF-8 encoding form, where each character is +encoded in a sequence of one to four bytes. Thereby, 0x1f980 becomes 🦀. + + char + +Internally, a String is a collection of bytes. Depending on the Unicode +code point, one to four bytes form a char. chars can be checked for +various properties (e.g is_numeric(), is_whitespace(), …), be converted +to_digit(radix) or char::from_digit(num, radix) using different bases. +Using to_lowercase and to_uppercase the casing can be changed. Finally, +with as u32 or from_u32 we can convert characters to integers (and +back). + + String and str + +The types String and str are guaranteed to only hold valid UTF-8 +characters. They can be created and modified as follows: + + let s = String::new(); + let s = "Hey Ferris".to_string(); + println!("{}", s); + let s = String::from_utf8(vec![0xF0, 0x9F, 0xA6, 0x80]); // 🦀 + println!("{:#?}", s); + let mut s : String = vec!["Hey", "Ferris"].into_iter().collect(); + println!("{}", s); + s.push_str("!"); + println!("{}", s); + +We can search for patterns and even replace parts: + + let string = "Hello Ferris. How are you doing?"; + let index = string.find("are"); + println!("{:#?}", index); + println!("{}", string.replace("Ferris", "Corro")); + +When processing text, a common task is to split by lines, or special +characters/whitespace: + + let file_content = "Id,Name\n42,Ferris\n49,Corro"; + for element in file_content + .lines() + .flat_map(|line| line.split(",") + .collect::>()) { + println!("{}", element); + } + + Formatting + +A common use case for string processing is also to format text in +various ways. The Rust standard library comes with a formatting +language, which you already encountered in U00. The language is the same +across all instances that use a formatting string, e.g. println!() as +you have seen before, but also format!(), which creates a String +in-place. The format parameters have the form {which:how}, which are +both optional — in many cases, we use {} to use the n-th argument. With +which, it is possible to select parameters by name or index. With how, +we can control the formatting itself. Depending on the type of the +argument, we have different options at our disposal. Here are several +examples in addition to those shown previously: + + println!("{:+}", 108); // forced sign + println!("{:10}", 108); // minimum field width + println!("{:010}", 108); // minimum field width, leading zeros + println!("{:02x}", 108); // hexadecimal + println!("{:02x?}", [108, 11, 42]); + println!("{:12.2}", 1234.5678); // float formatting + + println!("{:10}", "Ferris"); // minimal field width + println!("{:.5}", "Hello Ferris"); // text length limit + println!("{:>20}", "Hello Ferris"); // alignment + println!("{:=^20}", "Ferris"); // padding + center + + let data = std::rc::Rc::new("Ferris".to_string()); + println!("{:p}", data); // pointer + + Structures and Methods + +When we start to create larger programs, we tend to have values that +“belong” together. For instance, the vessel DSys is building has an +engine that has current parameters such as current operating temperature +and rotations per minute. Ideally, these bits of information are stored +and used together. This is what can be done with structures (or struct +for short) in Rust. Operating on these structures is done with +operations, so both data and behaviour are grouped together; increasing +the maintainability of the code, which is a dependability/quality +property of the code. + + This section is intentionally kept brief and you should read the + excellent 5th chapter of the Rust book if you have any doubts or want + a more in-depth introduction to structures and methods. + +In Rust, we distinguish three types of structures: + +- Named-Field +- Tuple-Like +- Unit-Like + + Named-Field Structures + +First, let’s look at how one can declare a struct: + + struct Engine { + temperature: f64, + rotations_per_minute: u64 + } + +The structure is composed of two fields with distinct names. Note that +the struct name is in CamelCase and the field names are in snake_case — +a convention common in Rust. + +Within the same module, a struct can be used as follows: + + let mut engine = Engine { + temperature: 87.5, + rotations_per_minute: 47_000, + }; + println!("Temperature: {}", engine.temperature); + engine.rotations_per_minute += 1000; + +So fields are accessed with .name. When creating a struct based on local +variables, there is a shorthand when variable and field name are the +same: + + let temperature = measure(); + // ... + let engine = Engine { + temperature, + rotations_per_minute: 47_000, + } + +By default, fields are private in Rust. When we access a struct defined +in a different module, there are two options: + +1. the field is declared public and allows for direct access +2. the field is private and provides appropriate get/set or other + manipulation methods + + pub struct EngineDirect { + pub temperature: f64, // <- allowing direct access + rotations_per_minute: u64 + } + + pub struct EngineCapsulated { + temperature: f64, + rotations_per_minute: u64 + } + + impl EngineCapsulated { + fn temperature(&self) -> f64 { + self.temperature + } + } + +Note that the second option is preferred in almost all cases, as it +allows clean capsulation and even enables to provide fields whose public +API is read-only or write-only. + + Behaviour using impl + +In the last example, you already saw a method temperature in action. +Using impl blocks, we can define functions that are either associated +with the type (associated functions) or operate on instances of the type +(methods). + +A typical example for associated functions are constructors, typically +named new: + + impl EngineCapsulated { + fn new(temperature: f64) -> Self { + Self { + temperature, + rotations_per_minute: 0, + } + } + } + +Associated functions do not have a first-parameter self. If such a +parameter is present, we have a method. As with other variables, we can +have self in three variants: + +- self, the instance is moved into the function, i.e. the function + must take care of it from now +- &self, the instance is borrowed immutably (typically done for + getters) +- &mut self, the instance is borrowed mutable (typically done for + setters) + +If you already programmed in a different language, the way Rust provides +structs and methods might surprise you. In the light of dependability, +this approach has major benefits: + +1. data and behaviour are separated (struct definition, impl block) — + improving the readability and avoiding that local fields are + overlooked +2. self is explicit, making it clear which functions are associated or + methods +3. the variant of self makes it clear whether the function consumes the + instance (move), reads (&) or writes (&mut). + + Tuple-Like Structures + +In some cases, we do not have dedicated names for fields, but have a +natural mapping to indexes of a tuple. Here is how one can define such a +structure for navigation, a two-element point: + + struct Waypoint(i64, i64); + +The usage works as follows: + + let origin = Waypoint(0,0); + let target = Waypoint(47,11); + println!("x: {}, y: {}", target.0, target.1); + +Again, elements can be made public to be directly accessed from outside +the current module: + + struct Waypoint(pub i64, pub i64); + +Tuple-like structs are especially useful for so-called newtypes; +wrappers around existing types to make them more expressive or usable. + +One use case is annotation, e.g. to create unit-safe interfaces: + + struct Nauticmiles(f64); + + fn forward(distance: Nauticmiles) -> (); + +In this case, the forward method must receive a Nauticmiles struct and +not an f64. Thereby the caller is forced to wrap the number; making the +intent clear and avoiding that an f64 representing imperial miles or +kilometers is passed in accidentally. This is also what the “unit of +measure” (uom) crate provides. + +Another use case is to change the API of a specific type: + + struct AppendOnlyLog(Vec); + + impl AppendOnlyLog { + fn append(&mut self, log: String) -> () { + self.0.push(log); + } + } + +Here, all methods of the inner type are hidden and only the methods of +the impl block are provided. In contrast to using the Vec directly, a +user can not remove elements from the log. + + Unit-Like Structures + +While the use case for the previous two struct types has been clear, the +use case for unit-like structures is a bit surprising. In some +situations, you need to have a structure that does not contain data: + + struct Highlander; + +As the name of this specific struct implies, there can ever only be one +of it, i.e. if you create it two times, they are still considered the +same (actually Rust does not allocate anything and only operates on the +type). Now how is this useful? When we work with traits and build state +machines in U10, this comes in handy. + + Deriving Common Traits + +Defining structs is straightforward, though using them can be a bit +wieldy. For instance, during development, you might want to print the +state of a structure to the console. This is provided by the Debug +trait, which you can implement by hand. As debug output is a rather +clear task, Rust comes with a set of derivable traits where the +implementation is done automatically. This is achieved as follows: + + #[derive(Debug)] // <- does the magic + struct Engine { + temperature: f64, + rotations_per_minute: u64 + } + + fn main() { + let engine = Engine { + temperature: 74.11, + rotations_per_minute: 84_000, + }; + println!("{:#?}", engine); + } + +Later you learn more about these traits and how derivation works (you +can even create your own derivable traits). + + Summary + + What did you learn? + +- The various ways how to structure data and variants in a Rust + program — and allowing to associate behaviour with it. +- Patterns that allow you to differentiate cases and destructure data. +- How to produce, adapt, and consume iterators. +- What closures are, how they can be used, as well as how their types + are determined and what this means for their capabilities. +- How the most common collections in the standard library work. +- How string and text handling work in the standard library. + + Where can you learn more? + +- Rust Book: + - Ch. 05 + - Ch. 06 + - Ch. 08 + - Ch. 13 + - Ch. 18 +- Programming Rust: Ch. 09, 10, 14, 15, 16, 17 +- Rust in Action: Ch. 02.10, 03 +- cheats.rs: + - Data Structures + - Functions & Behaviour + - Pattern Matching + - Iterators + - Strings & Chars + + W04: Work Sheet + +Rustlings + +Do the Rustlings exercises structs, enums, vecs, hashmaps and strings. + +Lists + +Using the List and ListNode structures defined in this unit. We have a +slightly update version here, i.e. including derive macros and generic +bounds (you will understand the extra syntax later): + + #[derive(Debug, PartialEq)] + enum List { + Empty, + NonEmpty(Box>), + } + + #[derive(Debug, PartialEq)] + struct ListNode { + element: T, + next: List, + } + +Develop the following methods: * fn length(self) -> usize, counting the +number of elements, * fn tail(self) -> List, return a list of all but +the first element. * fn get(&self, index: usize) -> Option, returning +the index-th element if there is one. + +Geometry + +Write a geometry library including elementary tests (ideally, develop it +test-first). The library should provide the following: + +- A shape enumeration, having the supported shapes as variants. +- Support the following shapes (as structs): Rectangle, Square, + Circle. +- Support the following methods: area, circumference. +- Elementary tests, one per (shape, method) combination. + +Iterators, Collections & Strings + +- Write an iterator-based FizzBuzz solution. + +- Implement a word count program. Input: Path to a file with words. + Output: HashMap with keys (word) and values (count). + +Closure Types + +For each of the following closures, give which traits they implement. +Also indicate if a closure is a function pointer. + + let closure0 = |i : u16| i + 5; + + let v = vec![5, 7, 8, 19]; + let closure1 = |j : u16| j * v.iter().sum::(); + + let v = vec![9, 8, 7]; + let closure2 = move |k: u16| { + println!("Vec: {:#?}, k: {}", v, k); + v + }; + + let mut v = vec!['R', 'u', 's']; + let mut closure3 = |c| v.push(c); + + println!("{}", closure0(5)); + println!("{}", closure1(2)); + closure2(3); + println!("{:#?}", closure3('t')); + println!("{:#?}", v); + +Storyline + +1. Look at the mess. +2. Apply Tidiness Tool 1: Functions. + - Discuss about Operation vs. Integration. + - Put functions under test. +3. Apply Tidiness Tool 2: Modules. +4. Apply Tidiness Tool 3: Objects. + - Extract the Line object. + - Show software cell in code. +5. Apply Tidiness Tool 4: Crate. + - Split binary and library crate. +6. Apply Tidiness Tool 5: Workspace. +7. Revisit Crate Structure. + + U05: Tidy Code + +Today is a special day, as DSys invited Ferris Kondō (a well-known +influencer and coach), who talks about: + +- Minimalism + +- Order + +She is here to help us tidy up our code, introducing a range of tidiness +tools! + + Ordnung ist das halbe Leben + + The Messy Code and its Origin + +Before we are getting started to learn about order, we look at an +example of where order is not present — a showcase where things are +rather messy: + + fn main() { + let args = &std::env::args().into_iter().collect::>()[1..]; + + let (path, length) = match args.len() { + 2 => { + let path = args.get(0).unwrap(); + let length = args.get(1).unwrap(); + let length = length + .parse() + .unwrap_or_else(|_| panic!("Couldn not parse {} to number.", length)); + (path, length) + } + _ => panic!("Must be called with 2 parameters: PATH LENGTH."), + }; + + let words: Vec = std::fs::read_to_string(path) + .unwrap_or_else(|_| panic!("Could not read from file {}.", path)) + .split_whitespace() + .map(|w| w.to_string()) + .flat_map(|s| { + s.as_bytes() + .chunks(length) + .map(|w| String::from_utf8(w.into()).unwrap()) + .collect::>() + }) + .collect(); + + let mut lines = vec![]; + let mut line: Vec = vec![]; + for word in words { + if line.iter().map(|w| w.len()).sum::() + line.len() * 1 + word.len() <= length { + line.push(word); + } else { + lines.push(line); + line = vec![word]; + } + } + lines.push(line); + + let formatted = lines + .into_iter() + .map(|l| format!("{:^length$}", l.join(" ").to_string(), length = length)) + .collect::>() + .join("\n"); + + println!("{}", formatted) + } + +Originally, this code was written to fulfil the following requirements: + + Read from a text file and format so that the length of each line is + bound by a maximum value. + +The idea is that this tool can be used at the command-line like this: + + break german-tale.txt 25 + +taking this input + + Vor einem großen Walde wohnte ein armer Holzhacker mit seiner + Frau und seinen + zwei + Kindern; das Bübchen hieß + + Hänsel und das Mädchen + Gretel. + +and producing this output + + Vor einem großen Walde + wohnte ein armer + Holzhacker mit seiner + Frau und seinen zwei + Kindern; das Bübchen + hieß Hänsel und das + Mädchen Gretel. + +Apart from the idea, there are also a couple of additional requirements +that clarify how certain situations should be handled: + +- (Extraneous) whitespace of the source file is not maintained. +- Punctuation is considered to be part of the word. +- If word is longer than maximum line length, chunk it. + + Why is this code messy? + +First of all, this code is messy as reading the code is already hard. +Second, understanding the code is hard for several reasons: + +- 40 lines are quite a long scopevariables / side-effects can happen + easily, so tracking them can be tough +- concerns are mixede.g. line 38 is responsible for formatting, lines + 2-13 for argument parsing +- abstraction layers are mixedcustom logic, API calls, … +- requirements are not clearly visiblee.g. a word that is larger than + the line length should be put on a separate line and cut in chunks. + +Eventually, testing the code as well as changing it without breaking +anything is hard. + + We want more order, minimalism, cleanness and hygiene. + + Why is messy code a problem? + +Source: Andreas Schmidt + + Visible signs of disorder encourage further disorder. cf. Broken + Window Theory by Wilson and Kelling. + + Principles of Order + +- Don’t Repeat Yourself (DRY)Who needs twenty can openers? + +- Single Responsibility Principle (SRP)Using a knife to open a can + might not be ideal. + +- Integration-Operation Separation Principle (IOSP)Anyone in your + household is either an operator (you opening a tin) or an integrator + (your pet telling you to open the can). + +… there are more, but these are already going to help you make your code +more understandable, testable, and changeable.or to stay in the +metaphor: cleaner, more ordered, and more hygenic + + Stratified Design + +Source + +This approach has originally been described in the context of Lisp +(Abelson et al. 1987). A Stratum is one of a series of layers, levels, +or gradations in an ordered system. The core metaphor here is that low +stratums serve as a basis for higher stratums. For our software, +functional dependencies should follow the abstraction gradient +(i.e. high stratums depend on lower ones). + +Here is an example program, showing which functions call which other +functionality: + +main.rs: + + fn main() { + let application = Application::new(); + application.run(); + } + + impl Application { + fn run(self) -> JoinHandle<()> { + let config = Config::new() + thread::spawn(move || { + // do something with `config` + }) + } + } + +lib.rs: + + #[derive(serde::Serialize, serde::Deserialize)] + struct Config { + // .... + } + + impl Config { + fn new() -> Self { + let content = std::fs::read_string("config").unwrap(); + let config : Config = serde_yaml::from_str(&content).unwrap(); + config + } + } + +This shows the abstraction gradient of this application (A --> B = “A +depends on B”): + + High +--------------- + Abstraction application.run() | Binary + | | | Crate + | +-----------------------------------+ +-----------+ + | | | | Library | + | V | | Crate | + | Config::new() | | | + | | | | | + | +----+ | | | + | | | | +-------+ | + | | V | | serde | | + | | serde_yaml::from_str() | | + | | + | | | | yaml | | + | | | +-------+---+--- + V V V | std + Low std::fs::read_string("conf.yaml") thread::spawn() | (Rust) + Abstraction +--------------- + +In line with stratified design, higher levels should depend on lower +levels and this is the case here. What should not happen is that Config +knows about the application it is used to configure (as the Application +is at a higher level than Config). If we adopt this design approach, we +also avoid Seas of Objects as mentioned before. + + S05: Sample Solution + +- Order Principles: discussed in class. + +- Rust Order Tools: Rustlings. + +- Refactor to Order: discussed in class. + + Summary + + On Software Architecture + +What we have been talking about in this section is software design and +software architecture. DSys highly recommends Making Architecture Matter +and other videos by Martin Fowler: + + What did you learn? + +- Why messy code is bad! +- A number of Principles of Order, e.g. the IOSP. +- Tidiness Tools (in Rust) such as + - Functions + - Modules + - Objects + - Crates + - Workspaces + - Repos +- Rust Module & Object Systems +- Software Cells +- Software Architecture Matters + + Where can you learn more? + +- Rust-Book: Ch. 07, 17 +- Programming Rust: Ch. 08 +- Rust in Motion: Module 1 +- Rust for Rustaceans: Ch. 04, 06, 14 +- cheats.rs: Organizing Code, Project Anatomy +- Software Flow-Design (in Deutsch) + + Let’s Tidy Up + +In the following video, we use the code showed before and refactor it to +provide order: + +At Rust-Saar, a similar presentation was made. There, we applied even +more refactorings (to make the code clean) but did not introduce crates. +At the end, we arrived at the following code. + + Final Confession + + The presented code was first designed carefully and then order was + destroyed.Following the Software Flow-Design approach by Ralf + Westphal. + +However, the approach showed here can also be applied to code that was +not carefully designed upfront. But as you can imagine, things get +complicated quite quickly, so ideally you try to be a good boy/girl +scout. + + Tidiness Tools + +In order to turn our messy code into code with order (or start right +away with clean code), we introduce you to the various tools you can +use: + +- Tool #1: Functions + +- Tool #2: Modules + +- Tool #3 Objects + +- Tool #4: Crates & Packages + +- Tool #5: Workspaces + +- Tool #6: Repos + + Tool #1: Functions + +A freestanding function like + + pub(crate) fn split_words(content: &str) -> Vec { + content.split_whitespace().map(|w| w.to_string()).collect() + } + +- encapsulates purposehints on the purpose are given by name, + signature, and visibility + +- can be unit tested effectively* + +- has a scope that + + - defines visibility (hides variable names, …) + + - implements Resource acquisition is initialization (RAII) + +- can be an integrating or operation function + + * or at least better than one large main function; if 1) your types are +hard to construct or 2) your function works on resources, you might +still have a hard time + + Operation vs. Integration + +Operation + +- Logic +- Operators (+, - , /) +- API calls to external functions + +Operation Examples + +- if x == 5 { return 0; } +- x.push_str('foo') +- fs::read_to_string("file.txt") + + fn read_file(path: &str) -> String { + // could be more complex, + // e.g. with error handling + std::fs::read_to_string(path).unwrap() + } + +Integration + +- API calls to internal functions + +Integration Examples + +- any call to a function in your crate + + fn main() { + let (path, size) = tui::parse_args(); + let content = read_file(&path); + // ... + let task = Task::from_str(content); + let report = analyze(task, size); + tui::output(&report); + } + + Tool #2: Modules + +The following is a module that + + mod tui { + pub(crate) fn parse_args() -> (String, usize) { + let args = &std::env::args().into_iter().collect::>()[1..]; + match args.len() { + 2 => { + let path = args.get(0).unwrap(); + let length = args.get(1).unwrap(); + let length = length + .parse() + .unwrap_or_else(|_| panic!("Couldn not parse {} to number.", length)); + (path.into(), length) + } + _ => panic!("Must be called with 2 parameters: PATH LENGTH."), + } + } + + pub(crate) fn output(formatted: &str) { + println!("{}", formatted) + } + } + +- encapsulates purpose (on higher stratum than functions) + +- hides information and functionality + + Rust’s Rules of Visibility + +- Rust’s modules build up a tree, crate is the current crate’s root + element. + +- Per default, Rust items (modules, functions, …) are private.only + visible within the current module and below + +- Visibility can be changed to: + + - pub: public (can be seen from everywhere) + - pub(crate): public within this crate + - pub(super): public in parent module + - pub(in path): public in module in path (path must be subpath of + item’s path) + +- Items can be brought into the current namespace by useing them. + +- With pub use (or any other visibility modifier), an item can be + re-exported. + + Rule: Importing from above is ok, from below needs permission. + +Modules in Separate Files + +The keyword mod can be used to structure a file: + + // lib.rs + mod tui { + fn output(text: &str) { ... } + ... + } + + mod domainlogic { ... } + +However, it is more common that separate files are used: + + // lib.rs + mod tui; + mod domainlogic; + + // tui.rs or tui/mod.rs + fn output(text: &str) { + ... + } + +This results in a project structure like this: + + src/ + - tui/ + - mod.rs // <---- either this + - format.rs + - lib.rs + - main.rs + - tui.rs // <---- or this + +Having multiple files has (at least) the following benefits: + +- Lessens the probability of Git merge conflicts. +- Smaller files are typically more accessible. + + Preludes + + Preludes can be seen as a pattern to make using multiple types more + convenient. - Rust Docs + +In every Rust module, the compiler inserts: + + use std::prelude::v1::*; + +For many crates, there are also preludes you can import by yourself: + + use chrono::prelude::*; + +Note that preludes can be harmful for dependability: + +- they can introduce naming conflicts, if multiple crates use types + with the same name +- they can occlude where types are coming from, making code harder to + understand +- they are hard to update, as code where they are used is often + tightly coupled to what they contain + + Tool #3 Objects + +Data-only objects + + enum Justification { + Left, + Right, + Center, + } + + pub(crate) struct Line { + words: Vec, + maximum_length: usize, + } + + type Words = Vec; + +- provide variants (enum) + +- group related information in memory (enum, struct) + +- provide better-to-use names (type) + +- support #[derive(..)] (e.g. Debug, Eq, …) + + Non-Anemic Data-Classes + +- Sometimes, people advise making classes method-free, aka they only + carry data. + +- Martin Fowler and Eric Evans called this the Anemic Domain + Model.anemic = too few red blood cells; lack of energy + +- When we work in object-oriented languages, our domain models should + be rich, i.e. structs should have appropriate methods. + +- In many cases, dot syntax (method-style) makes your code easier to + grasp. + +Rust’s Methods + +Defining our data and behaviour: + + pub(crate) struct Line { + words: Vec, + maximum_length: usize, + } + + impl Line { + // ... + } + +Adding an associated function: + + // in impl block + pub(crate) fn new(maximum_length: usize) -> Self { + Self { + words: vec![], + maximum_length, + } + } + +Methods start with self or &self or &mut self: + + // in impl block + pub(crate) fn try_push(&mut self, word: String) -> Option { + let current_length: usize = self.words.iter().map(|w| w.len()).sum(); + let current_length_with_separator = current_length + (self.words.len()) * SEPARATOR_LENGTH; + if current_length_with_separator + SEPARATOR_LENGTH + word.len() <= self.maximum_length { + self.words.push(word); + None + } else { + Some(word) + } + } + + Extension Traits + +Challenge + +- roxmltree is used to work with XML structures. + +- attribute(name) returns an Option, but in our context, a None would + be a ParsingError. + + let name = xmlnode.attribute("name")?; // <- ? is impossible as attribute() returns Option + +Solution + + use path::to::GetAttribute; + + let name = xmlnode.try_get_attribute("name")?; + + pub trait GetAttribute { + fn try_get_attribute(&self, attribute: &str) -> Result<&str, ParsingError>; + } + + impl GetAttribute for roxmltree::Node<'_, '_> { + fn try_get_attribute(&self, attribute: &str) -> Result<&str, ParsingError> { + self.attribute(attribute) + .ok_or_else(|| ParsingError::MissingXMLAttribute(attribute.to_string())) + } + } + + The Software Cell + +- Functional Code is (usually) free of: + + - mutable data + - state + - side effects + - resource access + +- Functional Code is great for testability. + +- Imperative Code that barely contains logic often needs no test.What + do you get from testing if println!() really works? + + +-----------------------------+ + | | + | Imperative Shell | + | (e.g. access DB) | + | | + | +---------------------+ | + | | | | + | | Functional Core | | + | | (e.g. compute | | + | | order total) | | + | | | | + | +---------------------+ | + +-----------------------------+ + + Tip: Try to keep your domain logic free of imperative code and + dependencies on resources (sockets, database, but also time, …). + +For more details, consider this Twitter client example. + + Tool #4: Crates & Packages + +Crates + +- are composed of modules, with a crate root being the top-level + module +- lib.rs is the default top-level file for library crates +- main.rs or bin/*.rs are the default top-level files for binary + crates + +Packages + +- improve separation and support collaboration +- have a version (could also be just a Git commit hash) +- contains zero or one library crate and arbitrary many binary crates +- can be put on crates.io + + Tool #5: Workspaces + +Workspaces can be used for grouping together multiple parallel packages +(e.g. in a single repo). Therefore, we must put [workspace] in the +top-level Cargo.toml like this: + + [workspace] + members = [ + "fancy-rs", + "fancy-rs-cli-util", + "cli", + ] + +As a result, Cargo.lock, compilation settings, and output directories +(target) are now shared for all packages in the workspace. + + More details can be found in the Cargo Book. + + Tool #6: Repos + +- Allow you to organize your project’s history (commits) and variants + (branches). + +- Supporting tools (e.g. GitLab) allow to manage the project + surroundings (issues, wiki, website, continuous integration, …). + +- Normally, each package on crates.io has a dedicated repo (often on + GitHub) to facilitate collaboration. + + How to size your repo is a popular topic of discussion: Mono- or + Multi-Repo? + + W05: Work Sheet + +Order Principles + +- Think about the last time you had to review someone else’s code (if + you haven’t yet or can’t remember, ask a fellow student to show you + some recent code). Describe how well you could comprehend the code + and describe which principles the code adhered or didn’t adhere to. + Come up with ideas on how the code can be changed to have more + order. + +- Reconsider your fizzbuzz code you wrote in U02 and make it adhere to + the IOSP principle. + +Rust Order Tools + +- Do the Rustlings exercises modules. + +Refactor to Order + +Consider the following binary Rust crate with its Cargo.toml: + + [package] + name = "greeter" + version = "0.1.0" + authors = ["Ferris Kondō"] + edition = "2018" + + [dependencies] + csv = "1.1" + +and main.rs: + + fn main() { + println!("Name:"); + let mut name = String::new(); + std::io::stdin() + .read_line(&mut name) + .expect("Failed to read line"); + let name : &str = name.trim().into(); + + const GUEST_FILE: &str = "guests.csv"; + + let file = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(GUEST_FILE) + .expect("Could not work with file."); + + csv::Writer::from_writer(file) + .write_record(&[name]) + .expect("Could not write."); + + let file = std::fs::OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(GUEST_FILE) + .expect("Could not work with file"); + + let visits = csv::Reader::from_reader(file) + .records() + .into_iter() + .filter_map(|result| { + let record = result.expect("Couldn't not read entry"); + if let Some(r) = record.get(0) { + if r == name { + return Some(1); + } + } + None + }) + .sum(); + + let greeting = match visits { + 1 => format!("Hello, {}!", name), + 2 => format!("Welcome back, {}!", name), + 25 => format!( + "Hello my good friend, {}! Congrats! You are now a platinum guest!", + name + ), + _ => format!("Hello my good friend, {}!", name), + }; + + println!("{}", greeting); + } + +Your task is now to refactor this into something that has more order, is +cleaner and hence more comprehensible and maintainable. Proceed as +follows: + +1. Bring the system under test to ensure you are not breaking anything. + Do so by + + 1. identifying the domain logic in the program, + 2. extracting it into a function, and + 3. writing regression tests against it that capture what the system + is currently doing. + +2. Use your first tidiness tool and introduce functions, where you feel + like blocks of code belong together. At least your main() function + should become a pure integration function. + +3. Use your second tidiness tool and introduce modules to group + functionality together (e.g. by concern). + +4. Use your third tidiness tool and introduce objects (they can share a + module): + + - VisitEntry: The entry can be constructed from a multi-line + string (fn from_str(name: &str) -> VisitEntry), can be turned + into a greeting (fn to_greeting(&self) -> String) and has public + getters for its fields. + - VisitDatabase: The database can be created by specifying a path + (fn new(path: &str) -> VisitDatabase) and supports two + functions: fn register_visit(&mut self, name: &str) -> () and + fn retrieve_visits(&self, name: &str) -> u32. + +5. Use your fourth tidiness tool and split the functionality into + crates. There are some functions that deal with logic you need for a + command-line interface application. These should remain in the + binary crate. Extract the remaining functions into a parallel + library crate so that other user interfaces (e.g. a web GUI) can be + used with the same logic. The binary crate afterwards uses the + public API of greeter. + +6. Use your fifth tidiness tool to split the crates into several + folders of a workspace. After 5. you have two crates in one folder: + a binary and a library. Change the structure into a Rust workspace, + where you have two members: greeter (the CLI) and greetings (the + library). + +7. Use your sixth tidiness tool to turn your workspace into a Git + repository. Add a README.md explaining the usage and a sensible + .gitignore. Push the results to a GitLab repository. + +Storyline + +1. Look at the initial code. +2. Extract a function. +3. Deal with two error types. +4. Introduce ?. +5. Introduce custom error type. +6. Add thiserror. +7. Add color_eyre. + + U06: How to Err + +After having developed algorithms and data structures to compute things, +the senior engineers want to introduce you to the code at DSys that +involves interacting with the operating system or other systems. First, +you learn about what can go wrong (in Rust and other languages) and what +different handling strategies there are. With these basic differences in +mind, we first look at std support for errors and later at third-party +crates to work with errors. + + S06: Sample Solution + +- Rustlings: discussed in class. + +- Refactor: discussed in class. + +std Error Handling + + Don’t panic! …unless something happened that must never ever happen + +panic!() is your Emergency Stop and allows you to handle programming +mistakes. + + enum Color { + Orange, + Boring + } + + fn parse(color: &str) -> Color { + match color { + "Orange" => Color::Orange, + "Boring" => Color::Boring, + _ => unimplemented!("All colors but orange are boring") + } + } + +In this example, any non-orange color is considered boring and if a +different string is passed to parse the program panics (maybe this is a +bit exaggerated behaviour by Ferris). + + When should you panic? + +If you answer any of the following with yes, then panic!(): + +- Is continuing with the program incorrect? + +- Did you attempt to access memory that you must not?either because + it’s not yours or uninitialized… + +- Is there no way that your caller could recover from the current + situation?e.g. caller asked you to do something that is knowingly + unimplemented!() + +- Would you need to change the code to fix it? + +- Is this failure really absolutely unexpected? + +Are you writing a library? If yes, panicking is generally discouraged. + + Panic first, change later! (aka “Fail fast”)except if you write + safety-critical software where stopping is not a safe state! + +Nice Panicking Macros + +- unreachable!: impossible locationat least this is the programmer’s + assumption + +- todo! / unimplemented!: not yet implemented + +- assert!: check preconditions, tests + +A Matter of Expectations + +Expect Results + + enum Result { + Ok(T), + Err(E) + } + +Success is expected and Failure the exception + +Example: Parsing Numbers + + let number : Result = guess.parse(); + +Check your Options + + enum Option { + Some(T), + None + } + +Both cases are expected + +Example: Vector Access + + let head : Option = list.get(0); + +What to do with Results & Options? + + Success (Ok(T) / Some(T)) + +- unwraprecoverable to unrecoverable panic! +- expect("..")prefered over unwrap +- unwrap_or_else(|| Default {})closure generates default value +- unwrap_or_default()if T implements Default +- is_ok, is_somemostly used in tests + + Failure (Err(E) / None) + +- unwrap_errpanic if Ok; common in tests +- expect_err("..")analogous; common in tests +- is_err, is_nonemostly used in tests + +General Handling + +- match option { ... }for any non-boilerplate handling +- if let Some(..) = opt { ... }might produce confusing code + + Conversions + +- result.ok()Result -> Option +- opt.ok_or(err_value : E)Option -> Result + +Return Results + +Return a Result: + + fn get_guess() -> Result { + let mut guess = String::new(); + io::stdin() + .read_line(&mut guess) + .expect("Failed to read line"); + guess.trim().parse() + } + +Alternatively, you can return an opaque error: + + fn get_guess() -> Result> { + let mut guess = String::new(); + match io::stdin().read_line(&mut guess) { + Ok(_) => {} + Err(e) => return Err(Box::new(e)), + } + match guess.trim().parse() { + Ok(r) => Ok(r), + Err(e) => return Err(Box::new(e)), + } + } + +What ? + + fn get_guess() -> Result> { + let mut guess = String::new(); + io::stdin().read_line(&mut guess)?; + + Ok(guess.trim().parse()?) + } + +- Leverages the From trait.In our case: automatically boxes into + std::error::Errors. + +- Older code used try!(..) which does the same.No longer recommended + as it is more verbose and less “chainable” + +Mapping Errors + +- Imagine Result and Option as lists with either 0 or 1 element. + +- map and map_err allow to transform one of the variants, while + keeping the other. + +- Example: Transformation into custom errors (e.g. in a library). + + let threshold : f64 = threshold.parse().map_err(|_| { + MarvinRsError::ParsingError(format!("Could not parse threshold: {}", threshold)) + })?; + +Use Your Results for Great Good + +We lied to you a little bit before. As in C, Rust allows you to +accidentally ignore an error, if the function returns Result<(), E> +(i.e. no result is returned that you would consume). However, Result in +Rust is #[must_use], so by default rustc warns you in this case: + + Compiling readfile v0.1.0 (file:///.../readfile) + warning: unused `std::result::Result` which must be used + --> src/main.rs:8:5 + | + 8 | file.read_to_string(&mut content); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + = note: #[warn(unused_must_use)] on by default + +And you can do even better with this at the top-level module in all your +crates: + + #![deny(unused_results)] + + Now every unused result hinders successful compilation! + +Custom Errors + +If you are writing a fancy lib crate, here is how you can implement your +custom error: + + #[derive(Debug)] + enum CustomError { + Io, + Parsing, + } + + impl Error for CustomError {} + + impl Display for CustomError { + ... + } + + impl From for CustomError { + fn from(_: std::io::Error) -> Self { + Self::Io {} + } + } + +This is lots of work… and we see later how to save effort here. + +Rust’s error handling is cool + +As usual with Rust, it forces you to be explicit and say what you want! +For instance, you are forced to clearly separate panics from recoverable +errors. In consequence, your programs fail fast, loud, and very close to +the fault: + +- loud: you cannot easily ignore error (compare C) +- fast: a panic immediately halts your program +- close: usually, there is no need to search long for root cause + +The expect(...) function is a good way to document the programmer’s +assumption. Furthermore, function signatures make failure possibility +explicit. Finally, the compiler enforces error handling, as there is no +way to access the inner value of Result without handling. + +But Rust error handling is also tedious, as… + +- the compiler is telling all possible ways in which your program can + faileven impossible ones that it simply can’t check statically + +- Rust focusses on the sad path through your program instead of the + happy pathis rustc a relative of Marvin? + + Prototyping Tip: Use expect a lot or cheat your way to success with + unwrap. + + Summary + + What did you learn? + +- std Error Handling + - panic! for things that should never ever happenand in which case + crashing is safe + - Result for things that should work + - Option for things that could work +- 3rd party error handling + - anyhow if you don’t care too much and talk to a user + - thiserror if you care and talk to other software components + + Where can you learn more? + +- Rust Book: Ch. 09 +- Programming Rust: Ch. 7 +- Rust in Action: Ch. 8.5 +- Rust for Rustaceans: Ch. 5 +- Embedded Software Development for Safety-Critical Systems: Ch. 8, 9 +- Nick Cameron’s Error Docs + +Third-Party Error Handling Crates + + WARNING: Ecosystem under heavy construction work + +Consider for example the following online resources: + +- Aug 20th 2020: RustConf, Jane Lusby on Error Handling +- Sep 18th 2020: Announcing the Error Handling WG + +However, we recommend two crates that help you handle and report errors. + +thiserror + +Use if you care about the exact error, because + +- you write a library that you provide to others +- your code is communicating with other pieces of software, that might + be able to recover + + [dependencies] + thiserror = "1.0.20" + + use thiserror::Error; + + #[non_exhaustive] + #[derive(Error, Debug)] + enum CustomError { + #[error("IO")] + Io(#[from] std::io::Error), + #[error("Parsing : {0:#?}")] + Parsing(#[from] std::num::ParseIntError), + } + +anyhow + +Use if you don’t care too much about the exact error, because + +- you present it to a user and not another piece of software +- you have custom error handling / reporting mechanism + +This is forked by eyre to include error reporting via backtracking. +color-eyre encapsulates eyre and improves the visual representation. + + [dependencies] + color-eyre = "0.5" + + use color_eyre::eyre::Result; + + fn main() -> Result<()> { + color_eyre::install()?; + + // ... + Ok(()) + } + +Other crates you might encounter + +- quick-error + error-chain: The old guard (may be encountered in + older code) + +- failure: Precursor to thiserror. + +- fehler: Pitching #[throws] syntax and implicit Ok-wrapping. + +- snafu: Similar to thiserror. + + W06: Work Sheet + +- Do the Rustlings exercises error_handling. + +- Consider the “Refactor to Order” task on W05. There are several + instances of errors being expect-ed. Your task is to + + - introduce an error enumeration using thiserror, + - change the main and refactored functions to return Results. For + the main function, use the color_eyre Result as you only report + errors; other functions should have your custom error type as + the error variant of Result, + - replace all calls to expect with appropriate calls to ?; use + map_err if you need to convert a std error to your custom error + type, and + - validate the created implementation by intentionally introducing + faults that lead to errors. + + Let’s Work with Errors + +In the following video, we use the Guessing Game from Rust Book Chapter +2 as a basis and introduce more elaborate error handling and reporting: + +At the end of the video, Andreas forgot to add + + color_eyre::install()? + +to the beginning of main. If you do so, the output is also colorful: + +[Color Eyre Output] + + What Can Go Wrong? + +Before you get started, you think about what can go wrong in larger +software systems and come up with the following answers: + +- Programming Faultse.g., bugs, errors in specification, … + +- System Errorse.g., can’t open file + +- User Errorse.g., provide wrong input… (intentionally?) + +Remember our considerations about faults, errors, and failures in U01. +The major focus of this unit is going to be on + +- fault prevention (some concepts we learn to avoid that we introduce + faults) and + +- fault tolerance (both by handling or reporting errors that are + caused by a fault) + +to create more dependable systems. + +When an error occurs, how can this be handled? + +- Stop the program immediately +- Attempt to recover from the situation by… +- Repeating +- Doing something else +- Resorting to a well-known default +- Pass it up the responsibility chain +- Notify the user + +C Way to “Exception Handling” + +Before we look into how error / exception handling is done in Rust, we +have a look at how the C language handles this: + + struct sockaddr_in address; + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + // everything alright? + address.sin_family = AF_INET; + address.sin_addr.s_addr = INADDR_ANY; + address.sin_port = htons( PORT ); + bind(sockfd, (struct sockaddr *) &address, sizeof(address)); + // still? + +In this snippet, there are multiple places where we can fail, e.g. + +- socket (might fail due to missing permissions, lack of unused file + descriptors, …) and +- bind (might fail due to invalid configuration). + +The code does not show any kind of error handling and this is in fact +the case: no handling is done and in the erroneous case, the program +continues… doing potentially harmful things (e.g. binding to a negative +sockfd). + +Rust Approach with Result + +In Rust, the approach is quite different. Here, you see a similar +example (network handling): + + fn handle_client(stream: TcpStream) { ... } + + fn main() -> std::io::Result<()> { + let listener_r : Result = TcpListener::bind("127.0.0.1:80"); + let listener : TcpListener = match listener_r { + Ok(l) => l, + Err(_) => panic!("Failed to bind"); + }; + // let listener = TcpListener::bind("127.0.0.1:80"); + for stream in listener.incoming() { + // defined on TcpListener ^ not on Result + handle_client(stream?); + } + Ok(()) + } + +Rust makes sure that + +- you are able to properly implement these different error causes and + error handling mechanisms. +- you do it properly — by enforcing error handling. + +thereby making you create more reliable software. + +Null Handling + +A somewhat similar case to the error handling is handling of the NULL +value you already learned about in U03 — Tony Hoare’s billion-dollar +mistake. + +In many older languages (or code written in old versions of them), +handling is done like in this Java example: + + public class MainClass { + static String hundredth_function(DbEntry entry) { + return entry.name; + } + + public static void main(String[] args) { + // ... + DbEntry entry = db.get_entry(); + first_function(entry); + } + } + +which might lead to + + Exception in thread "main" java.lang.NullPointerException: Cannot read field "name" because "" + is null + at MainClass.hundredth_function(MainClass.java:6) + at MainClass.main(MainClass.java:11) + +In this scenario, you can enjoy tracing back the error to the point +where it became null but shouldn’t have. The issue here is that null has +the same static type as an instance of the type used. Hence, checking +for null must be done manually (and also causes runtime costs). + +In Rust (and nowadays in modern C#, Kotlin, …) we have the Option type: + + fn hundredth_function(entry: Entry) -> String { + entry.name.clone() + } + + // ... + + fn main() { + let entry : Option = db.get_entry(...); + first_function(entry); + } + +Here, to debug a None value, you only have to check where Option’s are +passed around. Furthermore, checks for None are enforced before you are +allowed to access the inside of the Option. + +Note that some of the older languages nowadays have support for a +Option-like language construct. However, they do not apply this as +thoroughly as Rust, as legacy code was created without this approach and +is still around. + + Application Programming Interfaces (APIs) + +When you develop software, it always provides a means to interface with +it. While applications provide, for instance, graphical or terminal user +interfaces, a software library or framework provides an Application +Programming Interface. What is important about the latter is that +applications or other libraries again build on top of the library, hence +depend on the API. When you are the author of that library, users care +about the way your API is designed and maintained. + +API Properties + +Rust for Rustaceans introduces four properties an ideal API should have: +unsurprising, flexible, obvious, and constrained. As always with +properties, they cannot be maximized at the same time, so it is your +task to find a good balance. + + Unsurprising + +There are situations in life where surprises might be appropriate or +even appreciated[10]. When developing dependable software, that is +certainly not the case. Surprises come in many forms, but at the core, +they are expectations that are not met. For instance, a functionality +having a surprising name (e.g., frobnicate on a list to add an element) +or functionality not being provided as expected (e.g., as in other +established solutions). + +This brings us to the principle of least surprise / law of least +astonishment, stating that an interface should work in a way it is +expected by an as-large-as-possible group of users. For our dependable +Rust code, this means that we: + +- Follow naming practices: the standard library as well as popular + third-party crates have their own taxonomy to name behaviour they + are providing, e.g., iter() methods to produce an iterator of + elements. If you provide a way to iterate over your data structure + that method should for sure use iter() as a name. If you do so, make + sure the behaviour is really consistent with the way how others + implement iter(), because it might also be surprising if you re-use + a name for slightly different functionality. Finally, if you work in + a certain application domain, it is also good advice to use terms + from this domain as consistently as possible. +- Implement common traits: the standard library as well as popular + third-party crates (e.g., serde) provide traits that might be + interesting for the data structures in your API. This is especially + important because users of your API cannot retroactively add it to + the types you are defining (you can only implement traits on types + from the crate you are developing). Hence, if any of the standard + traits (e.g., Debug, Clone, or Default) make sense for your + implementation, add them. In many cases, you might also want to + allow for equality checks (PartialEq and Eq), ordering (PartialOrd + and Ord), or hashing (Hash). + + Flexible + +Additionally, APIs should be flexible so that users have the option to +use them in as many contexts as possible. This includes avoiding +unnecessary restrictions that usually come in the form of function +parameter types. An example restriction would be to only implement a +function for a String parameter and not for &str or other Rust string +types. Function return types and values are what our API promises — and +should be only limited to those that it can keep. + +A set of examples are the following function signatures that implement +different contracts (have different restrictions and promises): + + fn frobnicate1(s: String) -> String + fn frobnicate2(s: &str) -> Cow<'_, str> + fn frobnicate3(s: impl AsRef) -> impl AsRef + +All have in common that they take and return string types. For the +first, the caller must own the String and move it into the function, +which in turn returns another owned String. Making this function +allocation-free is not possible in a backwards-compatible way. For the +second, the caller is not required to own the string, but only needs a +reference (if they own it, they must convert it to &str). Returning a +Cow (copy-on-write) means it could be a reference or owned variant. +Changing this later is also not backwards-compatible. For the third, we +have very low restrictions as it only specifies that something that can +be converted to a string reference is passed in and returned by the +function. + +Note that there is no better or worse API, but it depends on what you +want to achieve today and how you expect this API to change in the +future. Deciding whether parameters must be owned or borrowed is one of +the most common API decisions you have to make. + + Obvious + +An obvious API makes it as easy as possible for users to understand the +interface and as hard as possible for them to use it incorrectly. This +can be achieved by two means: + +First, by elaborate documentation. This includes special sections on +panics (i.e., where the API could be used inappropriately, stopping +everything), errors (i.e., where inappropriate usages can be handled), +and safety aspects (i.e., invariants that must be upheld when working +with unsafe interfaces). Ideally, the documentation also contains +end-to-end examples, showcasing how to use the API. + +Second, the type system helps to encode how the API should be used. +Having dedicated types, using traits for shared functionality, etc. help +to make the interface obvious, self-documenting (no additional text is +needed), and misuse-resistant (type mismatches are caused by +inappropriate usage of types). One example for the latter is semantic +typing that you have already seen in U04, where we used enums to +properly name boolean variants or newtype structs. + + Constrained + +Finally, it is a common truth that at some point in time, every piece of +your API (everything that is public) will be used by someone and changes +to these elements become backwards-incompatible. + +For our dependable Rust code this means we should: + +- Be careful with public fields. If all fields of a struct are public, + the struct can be created using the StructName { ... } syntax. If we + later want to add or remove a field from the struct, this breaks all + usages. Instead, it is advised to either a) do not use public fields + at all or b) declare #[non_exhaustive] on the struct, to prohibit + the use of said construction mechanism. +- When re-exporting types from other libraries, the newtype pattern + should be applied and methods should be provided on the newtype. + Thereby, we promise less and changes to the inner type can be hidden + from the outside. + + (Semantic) Versioning + +Though there are plenty of ways to identify versions of software, the +Semantic Versioning (SemVer) is one of the most common approaches to +this. These version numbers most of the time consist of three parts: +MAJOR.MINOR.PATCH (e.g., 3.1.4). Sometimes, additional labels are added +to indicate pre-release versions or build metadata (e.g., 3.1.4-alpha or +3.1.4-b68177). SemVer forces you to increment the: + +1. MAJOR version when you make incompatible API changes (aka breaking + changes), +2. MINOR version when you add functionality in a backwards-compatible + manner, and +3. PATCH version when you make backwards-compatible bug fixes. + +Using conventional commits we covered before, we add a BREAKING CHANGE: +footer to the respective commit message (e.g., like this). Afterwards, +an increment in the MAJOR version is required. + +A special case of semantic versioning is Calendar Versioning (CalVer). +Many projects out there use a date-based version (e.g. using the release +year as the major version). CalVer is an attempt to standardize date- +(or better calendar-)based version schemes. A popular example is the +Ubuntu Linux operating system that uses this scheme: . Ubuntu is +released twice a year (in April and October), so that this year’s +releases would be 22.04 and 22.10. + + cargo-semver-checks + +Cargo packages are built with the SemVer approach in mind. Hence, when +you are providing a library crate with an API, you should ensure that +your package versioning policy follows SemVer. The Cargo Book has a +chapter on SemVer Compatibility, outlining how modifications of your API +should be reflected in the version. This is in plain English, and to be +honest, it is very easy to modify your code and forget about its impact +on the API. Therefore, the community has created cargo-semver-checks to +automate the process—allowing CI release checks as well. Eventually, it +is planned that this plugin becomes part of cargo itself. + +Assume you have the following lib.rs: + + pub fn get_blacklist() -> Vec<&'static str> { + vec![ + "8.8.8.8" + ] + } + +published using the following Cargo.toml + + [package] + name = "foss-rs" + version = "1.0.0" + edition = "2021" + +Following the general trend to avoid exclusionary language, we want to +provide a denylist in the future. After changing the function name, we +run cargo semver-checks check-release --baseline-rev f7e8a5 (using a Git +revision as a example). This yields + + Cloning f7e8a5 + Parsing foss-rs v0.1.0 (current) + Parsing foss-rs v0.1.0 (baseline) + Checking foss-rs v0.1.0 -> v0.1.0 (no change) + Completed [ 0.063s] 22 checks; 21 passed, 1 failed, 0 unnecessary + + --- failure function_missing: pub fn removed or renamed --- + + Description: + A publicly-visible function cannot be imported by its prior path. A `pub use` may have been removed, or the function itself may have been renamed or removed entirely. + ref: https://doc.rust-lang.org/cargo/reference/semver.html#item-remove + impl: https://github.com/obi1kenobi/cargo-semver-check/tree/v0.14.0/src/queries/function_missing.ron + + Failed in: + function foss_rs::get_blacklist, previously in file src/lib.rs:5 + Final [ 0.064s] semver requires new major version: 1 major and 0 minor checks failed + +Only after changing the version to 2.0.0, the check passes successfully. + + Documentation + +Let’s start from your own experience… have you ever attempted to use a +third-party library for a programming project of yours? Was it +documented at all? If yes, how good, extensive, and up-to-date was the +documentation? + +As you hopefully realized, documentation is important for developing +software. And as software engineers, in many cases, it is our task to +write the docs. The good news is, however, that in many situations, +documentation can easily be written alongside code, so that the +development workflow does not need to change. + +But what is documentation exactly? + + When we refer to “documentation,” we’re talking about every + supplemental text that an engineer needs to write to do their job: not + only standalone documents, but code comments as well. - Software + Engineering at Google + +With documentation, we answer, for instance, the following questions +from the “SWE at Google” book: + +- Why were the design decisions made? +- Why did we implement code in this manner? +- Why did you implement this code in this manner, if you’re looking at + your own code two years later? + +Despite being able to answer these questions and keep the software +maintainable, documentation is often seen as a burden not paying +immediate returns. We at DSys want to make it clear to you that we do +not believe in this mindset, but rather value good documentation. By the +way, this is also the case for the larger Rust ecosystem, where most +crates come at least with a minimal set of helpful documentation and +many come with extensive API documentation and handbook-style usage +references. Here are a couple of incentives for documentation: + +- Writing the docs for an API helps to make it consistent and + sensible. When you struggle documenting it, most likely it is not + yet fit for use by others. +- Writing the docs helps when maintaining the code and getting into + the mindset you had when you wrote it. +- Writing the docs improves the look of the code with respect to + professionalism. If you were to pick between two third-party + libraries with similar functionality, you would for sure pick the + one with the better docs first. +- Writing the docs reduces the number of questions you get. When + explaining things multiple times, the time would have better been + spent on writing a good doc once. + +But what is good documentation? Here are three attributes that good +documentation fulfils: + +- Complete - everything is documented +- Accurate - every detail is documented +- Clear - everything documented is straightforward to understand + +Typically, you don’t find all three at the same time, as they tend to +contradict each other (e.g. high accuracy impedes clarity, while +completeness reduces clarity). Hence, it makes sense to think about +which of the three the document should achieve for its purpose and stick +to that. + +Documentation appears in different formats: + +- Reference documentation (e.g. code comments) +- Design documents +- Tutorials +- Conceptual documentation +- Landing pages + +For the remainder of this section, we focus on how to do code comments +in Rust and talk about one approach to provide landing pages or +conceptual docs with GitLab. + +Code comments usually come in one of two forms: 1) API comments or 2) +implementation comments. The former are directed at users of the API, +while the latter are directed at implementers. Hence, they serve +different purposes and cater to different audiences. + + Rust Documentation + +In Rust, you can access documentation like this: + +- rustup doclocal, offline documentation of Rust + +- cargo doclocal, offline documentation of current crate + +Documentation is fully searchable and elements are color-coded: + +- Primitive Type +- Type +- Struct +- Function +- Enum +- Trait +- Macro +- Module + + Writing Documentation in .rs Files + +Now, what can you document with comments in your Rust code files? + +- Files - storing related functionality. +- Data structures - storing related data. +- Functions - implementing functionality. + +All these language elements mentioned above can be annotated with +documentation. You can use //! for documentation from within an element +(e.g., a module) and /// for what follows (e.g., a function). + +In lib.rs, you can for example do the following: + + //! `fcapp` - The Fancy CLI App <--- Docs for the lib (module) + + /// Generates a random number between 1 and 100 <--- Docs for `random` (function) + pub fn random() -> usize { + ... + } + +In the Rust ecosystem, crates are hosted on crates.io and the +documentation is uploaded to docs.rs. When you publish your crate, make +sure that both locations are used to create minimal surprise for +potential users of your code. + + GitLab Pages + +Now assume for a moment that you either host the documents for an +internal project (so docs.rs is no option) or you want to create a +static webpage for your code repository. GitLab has you covered by +GitLab Pages, allowing to build and serve the webpage. You need to +create the following job in the .gitlab-ci.yml: + + # the 'pages' job will deploy and build your site to the 'public' path + pages: + stage: deploy + script: + - cargo doc --lib --no-deps + - cp -R ./target/doc public + artifacts: + paths: + - public + expire_in: 1 week + only: + - main + +Note that any file put into /public is served later by Gitlab. + + Handbooks with mdbook + +The RTIC framework, for instance, uses a handbook as its landing page, +also providing tutorials and conceptual documentation. The authors use +mdbook, the tool that is also behind the Rust book and this coursebook. +Here is how to configure it using a book.toml: + + [book] + authors = ["Ferris"] + language = "en" + multilingual = false + src = "docs" + title = "ferris-rs" + + [build] + build-dir = "public" + +You can leverage GitLab pages in a similar way, by making sure mdbook +exports to the public folder. + + Changelogs with git-cliff + +Finally, we want to touch on another form of document: changelogs. When +software systems evolve over time and have a certain userbase, it is +common to document (at least breaking) changes. Source control such as +Git enables that these data points are created easily — a commit with a +succinct message can convey the meaning. You can even go and apply +conventional commits, a popular form of structuring your commit +messages. + +When you do so, git-cliff[11] helps you to build a changelog. Here is an +example cliff.toml file where you get sections per version of your repo +and subsections per type of change. + + [changelog] + header = """ + # Changelog\n + """ + body = """ + {% if version %}\ + ## [{{ version | replace(from="v", to="") }}] - {{ timestamp | date(format="%Y-%m-%d") }} + {% else %}\ + ## [unreleased] + {% endif %}\ + {% for group, commits in commits | group_by(attribute="group") %} + ### {{ group | upper_first }} + {% for commit in commits %} + - {{ commit.message | upper_first }}\ + {% endfor %} + {% endfor %}\n + """ + trim = true + footer = "" + + [git] + conventional_commits = true + commit_parsers = [ + { message = "^bump*", group = "Version Updates"}, + { message = "^chore*", group = "Miscellaneous Tasks"}, + { message = "^ci*", group = "Continuous Integration"}, + { message = "^deps*", group = "Dependencies"}, + { message = "^feat*", group = "Features"}, + { message = "^fix*", group = "Bug Fixes"}, + { message = "^doc*", group = "Documentation"}, + { message = "^perf*", group = "Performance"}, + { message = "^refactor*", group = "Refactor"}, + { message = "^style*", group = "Styling"}, + { message = "^test*", group = "Testing"}, + ] + filter_commits = false + tag_pattern = "v[0-9]*" + +With the following command, you can generate the CHANGELOG.md for your +project: + + git cliff --output CHANGELOG.md + + U07: Usable Software + +Alright, it’s now been quite some time since you started at DSys and one +of the projects requires you to build a new software library from +scratch. As it is clear that this library will be used by other parties +as well, you have to take special care to make it usable and +maintainable (an aspect of dependability we covered in U01). In this +unit, we discuss in which ways we can improve this dimension: + +- We talk about how writing documentation for your software is + essential and makes your software more usable. This not only + includes code comments, but also other pieces of information and + tools to generate & host this information. +- We have a look at APIs — which should be carefully designed and + maintained. +- We introduce supply chains & provenance as important topics in the + sharing of software for dependable systems. + + Supply Chains & Provenance + + The authors of this book are no lawyers. This section is attempting to + make software supply chains more clearly defined and this includes + copyright and license information. As such, use the presented tools to + improve your software metainformation. To make sure everything you + reuse / publish is legal, however, consult your favourite lawyer. + +Supply chains describe how organizations, people, processes, etc. +contribute to supplying a product or service. When we talk about +Software Supply Chains, we are often interested in how the software is +composed out of parts. Each part has a Provenance, i.e. details on where +it comes from and under which conditions it has been developed. Similar +to a bill-of-material (BOM) in industrial manufacturing, Software Bills +of Material are getting increasingly relevant. While this is currently +strongly used in the US due to the Biden Executive Order from May 2021 +making this mandatory for delivering software to federal organizations, +we can expect that similar regulations will emerge in Europe. + +Even tough SBOMs themselves do not make the system more dependable in +itself, they help in making their development more dependable, as we get +transparency and traceability of the composition of software. This is +particularly true with respect to the security dimension of +dependability: knowing about a vulnerability in a specific software +version allows to trace it to software that depends on it. A common +issue today is that a) building software from scratch (and in-house) is +more and more infeasible due to the increasing complexity of systems, +and b) leveraging third-party software brings a large body of +functionality in that must be scrutinized. Hence, we must accept the +fact that sharing of software must become more dependable, i.e. the +correct, security-preserving and legal usage of third-party party +software must become more feasible. + +A central information standard in this area is the Software Package Data +Exchange (SPDX). Beside licensing information (and a list of common +licenses), the SPDX specification allows to annotate files, store +checksums, and more. Other standard such as CycloneDX or SWID exist, but +we focus here on SPDX. + +In the following, we assume that DSys wants to release the foss-rs crate +as Free Open Source (FOSS), making sure it is properly licensed and this +license is also clearly communicated. + +REUSE Compliance Framework + + + +The purpose of REUSE is to clearly state the copyright and license of +any asset in your project. Thereto, it offers ways to annotate any file +with copyright via SPDX-FileCopyrightText information and license via +SPDX-License-Identifier. There are three ways: + +- Comments, if the considered file format is textual and allows for + comments. In Rust files, for instance, we can have + // SPDX-FileCopyrightText: 2022 Ferris at the beginning of the file. +- .license files if either a) the file format does not support text + comments or b) you do not want to store it there. In this case, a + file with the same name plus a .license suffix can be stored there + and includes the SPDX-FileCopyrightText: 2022 Ferris header without + comment markings. +- dep5 is intended for large directories, where adding copyright to + all files is not doable. This approach supports file glob patterns, + e.g. *.rs to apply the information to all Rust source code files. + +REUSE also provides a linter for checking compliance, the reuse-tool. +reuse uses your VCS (Version Control System), which means that it also +respects, for instance, .gitignore, and scans all files for appropriate +information. The easiest way to run it is using a Docker container: + + docker run --rm --volume $(pwd):/data fsfe/reuse lint + +Initially, our project does not comply. We can change this by adding +headers to the individual files: + + reuse addheader --copyright "Ferris" --license="MIT" src/lib.rs + +After that, the text files look like this: + + // SPDX-FileCopyrightText: 2021 Ferris + // + // SPDX-License-Identifier: MIT + + ... + +When we did this to all files, we can + + reuse download --all + +to make sure all the license are downloaded as text and stored in +LICENSES. + +Finally with + + reuse lint + +we can confirm that our system is compliant: + + # SUMMARY + + * Bad licenses: + * Deprecated licenses: + * Licenses without file extension: + * Missing licenses: + * Unused licenses: + * Used licenses: CC0-1.0, MIT + * Read errors: 0 + * Files with copyright information: 4 / 4 + * Files with license information: 4 / 4 + + Congratulations! Your project is compliant with version 3.0 of the REUSE Specification :-) + +Now if we want to make sure that any contribution to our repository is +REUSE compliant, we can add a CI job like this: + + reuse: + image: + name: fsfe/reuse:latest + entrypoint: [""] + script: + - reuse lint + +We can also produce a SPDX SBOM: + + SPDXVersion: SPDX-2.1 + DataLicense: CC0-1.0 + SPDXID: SPDXRef-DOCUMENT + DocumentName: data + DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-3600566a-fa94-47b5-8efa-9059fc4e2d26 + Creator: Person: Anonymous () + Creator: Organization: Anonymous () + Creator: Tool: reuse-0.13.0 + Created: 2021-12-03T15:53:52Z + CreatorComment: This document was created automatically using available reuse information consistent with REUSE. + Relationship: SPDXRef-DOCUMENT describes SPDXRef-8540736e946d41cc9583084c3e2d52b9 + Relationship: SPDXRef-DOCUMENT describes SPDXRef-20c74af6a1a744e3937396ceb3650119 + Relationship: SPDXRef-DOCUMENT describes SPDXRef-b4bd5775f2f58809bef6b0e1ccf3ecdb + Relationship: SPDXRef-DOCUMENT describes SPDXRef-91b555fff6242005192e133969e3a18a + + FileName: ./.gitignore + SPDXID: SPDXRef-8540736e946d41cc9583084c3e2d52b9 + FileChecksum: SHA1: 43ca72cab972d025aeaa11d014427c9160f4031f + LicenseConcluded: NOASSERTION + LicenseInfoInFile: CC0-1.0 + FileCopyrightText: SPDX-FileCopyrightText: 2021 Ferris + + FileName: ./Cargo.lock + SPDXID: SPDXRef-20c74af6a1a744e3937396ceb3650119 + FileChecksum: SHA1: ff0851f26122894e84fdd71281fde25b4b780bd5 + LicenseConcluded: NOASSERTION + LicenseInfoInFile: MIT + FileCopyrightText: SPDX-FileCopyrightText: 2021 Ferris + + FileName: ./Cargo.toml + SPDXID: SPDXRef-b4bd5775f2f58809bef6b0e1ccf3ecdb + FileChecksum: SHA1: aacee43aeb79bf0ce04c6254afdae22f9a909143 + LicenseConcluded: NOASSERTION + LicenseInfoInFile: MIT + FileCopyrightText: SPDX-FileCopyrightText: 2021 Ferris + + FileName: ./src/lib.rs + SPDXID: SPDXRef-91b555fff6242005192e133969e3a18a + FileChecksum: SHA1: f6e43e37ec5671f8f1b9995a0491dacf8d5dd1b0 + LicenseConcluded: NOASSERTION + LicenseInfoInFile: MIT + FileCopyrightText: SPDX-FileCopyrightText: 2021 Ferris + +ClearlyDefined + + + +clearlydefined.io is an online service that automatically harvests and +allows curation of project information, with respect to the following +properties: + +- Described: where is the source hosted, where can I file bugs, when + was which version released? +- Licensed: what licenses have been declared, what do they imply, + etc.? +- Secure: have there been vulnerabilities discovered with respect to a + specific project version?this is mostly under development + +In essence, ClearlyDefined provides a database for many potential +sources (Git Repos, GitHub, PyPI or crates.io packages, …) and serves +the respective information. In Rust projects, we know all dependencies +of our software due to the Cargo.lock file. The cargo-clearlydefined +utility leverages this and queries all dependencies (specific versions) +for the associated information. The following command produces the table +below: + +cargo clearlydefined --approve-osi --exclude=foss-rs --link -o markdown > cd.md + + -------------------------------------------------------------------------------------------------- + Name Version Declared License Score + license + ------------ --------- ------------ --------- ---------------------------------------------------- + autocfg 1.1.0 Apache-2.0 ✅ [88] + OR MIT + + num-traits 0.2.15 MIT OR ✅ [53] + Apache-2.0 + + typenum 1.15.0 MIT OR ✅ [88] + Apache-2.0 + + uom 0.33.0 Apache-2.0 ✅ [87] + OR MIT + -------------------------------------------------------------------------------------------------- + +Discussion + +- --exclude=foss-rs: We exclude the crate itself (we are in the + process of publishing it, so we won’t get a high enough score right + away). +- --approve-osi: We also specify that we want to approve OSI-approved + licenses. +- Finally, the ClearlyLicensed score is taken into account. A typical + threshold value is 75 (e.g. by the Eclipse Foundation), which means + it is sufficiently defined with respect to licensing + information (metric specification). With REUSE, we also get high + ClearlyLicensed scores as they check if all files have a + discoverable license. + +tern + +In the last decade, (Docker) containers have become a common exchange +format for software (in addition to binaries or virtual machines). + +Their README comes with a explanation of how to analyze Docker +containers using tern itself in a Docker container as well. After setup, +you can do this: + + docker run --rm ternd report -i debian:buster + +which returns: + + This report was generated by the Tern Project + Version: 2.10.1 + + Docker image: debian:buster: + Layer 1: + info: Layer created by commands: /bin/sh -c #(nop) ADD file:1fb366429a5df94c7ba642735d6aa77e201f90e0843de03721a6ad19f80ee4e0 in / + info: Found 'Debian GNU/Linux 10 (buster)' in /etc/os-release. + info: Retrieved package metadata using dpkg default method. + + File licenses found in Layer: None + Packages found in Layer: + +------------------------+-------------------------+-----------------------------------------------+------------+ + | Package | Version | License(s) | Pkg Format | + +------------------------+-------------------------+-----------------------------------------------+------------+ + | adduser | 3.118 | | deb | + | apt | 1.8.2.3 | GPLv2+ | deb | + | base-files | 10.3+deb10u13 | | deb | + | base-passwd | 3.5.46 | GPL-2, PD | deb | + | bash | 5.0-4 | | deb | + | bsdutils | 1:2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | coreutils | 8.30-3 | | deb | + | dash | 0.5.10.2-5 | | deb | + | debconf | 1.5.71+deb10u1 | BSD-2-clause | deb | + | debian-archive-keyring | 2019.1+deb10u1 | | deb | + | debianutils | 4.8.6.1 | | deb | + | diffutils | 1:3.7-3 | | deb | + | dpkg | 1.19.8 | public-domain-md5, GPL-2+, BSD-2-clause, | deb | + | | | public-domain-s-s-d, GPL-2 | | + | e2fsprogs | 1.44.5-1+deb10u3 | | deb | + | fdisk | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | findutils | 4.6.0+git+20190209-2 | | deb | + | gcc-8-base | 8.3.0-6 | | deb | + | gpgv | 2.2.12-1+deb10u2 | LGPL-3+, LGPL-2.1+, permissive, RFC- | deb | + | | | Reference, CC0-1.0, GPL-3+, BSD-3-clause, | | + | | | TinySCHEME, Expat, GPL-3+ or BSD-3-clause | | + | grep | 3.3-1 | GPL-3+ | deb | + | gzip | 1.9-3+deb10u1 | | deb | + | hostname | 3.21 | | deb | + | init-system-helpers | 1.56+nmu1 | BSD-3-clause, GPL-2+ | deb | + | iproute2 | 4.20.0-2+deb10u1 | GPL-2 | deb | + | iputils-ping | 3:20180629-2+deb10u2 | | deb | + | libacl1 | 2.2.53-4 | LGPL-2+, GPL-2+ | deb | + | libapt-pkg5.0 | 1.8.2.3 | GPLv2+ | deb | + | libattr1 | 1:2.4.48-4 | LGPL-2+, GPL-2+ | deb | + | libaudit-common | 1:2.8.4-3 | LGPL-2.1, GPL-2 | deb | + | libaudit1 | 1:2.8.4-3 | LGPL-2.1, GPL-2 | deb | + | libblkid1 | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | libbz2-1.0 | 1.0.6-9.2~deb10u2 | GPL-2, BSD-variant | deb | + | libc-bin | 2.28-10+deb10u1 | | deb | + | libc6 | 2.28-10+deb10u1 | | deb | + | libcap-ng0 | 0.7.9-2 | | deb | + | libcap2 | 1:2.25-2 | BSD-3-clause or GPL-2, GPL-2+, BSD-3-clause, | deb | + | | | BSD-3-clause or GPL-2+, GPL-2 | | + | libcap2-bin | 1:2.25-2 | BSD-3-clause or GPL-2, GPL-2+, BSD-3-clause, | deb | + | | | BSD-3-clause or GPL-2+, GPL-2 | | + | libcom-err2 | 1.44.5-1+deb10u3 | | deb | + | libdb5.3 | 5.3.28+dfsg1-0.5 | | deb | + | libdebconfclient0 | 0.249 | | deb | + | libelf1 | 0.176-1.1 | | deb | + | libext2fs2 | 1.44.5-1+deb10u3 | | deb | + | libfdisk1 | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | libffi6 | 3.2.1-9 | | deb | + | libgcc1 | 1:8.3.0-6 | | deb | + | libgcrypt20 | 1.8.4-5+deb10u1 | | deb | + | libgmp10 | 2:6.1.2+dfsg-4+deb10u1 | | deb | + | libgnutls30 | 3.6.7-4+deb10u9 | LGPLv3+_or_GPLv2+, GPLv3+, Public domain. | deb | + | libgpg-error0 | 1.35-1 | LGPL-2.1+, g10-permissive, GPL-3+, | deb | + | | | BSD-3-clause, LGPL-2.1+ or BSD-3-clause | | + | libhogweed4 | 3.4.1-1+deb10u1 | other, LGPL-2+, LGPL-2.1+, GPL-2+ with | deb | + | | | Autoconf exception, public-domain, GPL-2+, | | + | | | GAP, GPL-2 | | + | libidn2-0 | 2.0.5-1+deb10u1 | LGPL-3+ or GPL-2+, LGPL-3+, GPL-3+, GPL-2+, | deb | + | | | Unicode | | + | liblz4-1 | 1.8.3-1+deb10u1 | BSD-2-clause, GPL-2, GPL-2+ | deb | + | liblzma5 | 5.2.4-1+deb10u1 | GPL-2, Autoconf, config-h, none, LGPL-2.1+, | deb | + | | | PD-debian, GPL-2+, PD, noderivs, probably-PD, | | + | | | permissive-fsf, permissive-nowarranty | | + | libmnl0 | 1.0.4-2 | LGPL-2.1, GPL-2+ | deb | + | libmount1 | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | libncursesw6 | 6.1+20181013-2+deb10u2 | | deb | + | libnettle6 | 3.4.1-1+deb10u1 | other, LGPL-2+, LGPL-2.1+, GPL-2+ with | deb | + | | | Autoconf exception, public-domain, GPL-2+, | | + | | | GAP, GPL-2 | | + | libp11-kit0 | 0.23.15-2+deb10u1 | ISC, BSD-3-Clause, ISC+IBM, permissive-like- | deb | + | | | automake-output, same-as-rest-of-p11kit | | + | libpam-modules | 1.3.1-5 | | deb | + | libpam-modules-bin | 1.3.1-5 | | deb | + | libpam-runtime | 1.3.1-5 | | deb | + | libpam0g | 1.3.1-5 | | deb | + | libpcre3 | 2:8.39-12 | | deb | + | libseccomp2 | 2.3.3-4 | LGPL-2.1 | deb | + | libselinux1 | 2.8-1+b1 | | deb | + | libsemanage-common | 2.8-2 | | deb | + | libsemanage1 | 2.8-2 | | deb | + | libsepol1 | 2.8-1 | | deb | + | libsmartcols1 | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | libss2 | 1.44.5-1+deb10u3 | | deb | + | libstdc++6 | 8.3.0-6 | | deb | + | libsystemd0 | 241-7~deb10u8 | LGPL-2.1+, CC0-1.0, public-domain, GPL-2+, | deb | + | | | Expat, GPL-2 | | + | libtasn1-6 | 4.13-3 | | deb | + | libtinfo6 | 6.1+20181013-2+deb10u2 | | deb | + | libudev1 | 241-7~deb10u8 | LGPL-2.1+, CC0-1.0, public-domain, GPL-2+, | deb | + | | | Expat, GPL-2 | | + | libunistring2 | 0.9.10-1 | GFDL-1.2+, LGPL-3+, MIT, GPL-3+, GPL-2+, | deb | + | | | GPL-3+ or GFDL-1.2+, LGPL-3+ or GPL-2+, | | + | | | FreeSoftware, GPL-2+ with distribution | | + | | | exception | | + | libuuid1 | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | libxtables12 | 1.8.2-4 | custom, GPL-2, Artistic-2, GPL-2+ | deb | + | libzstd1 | 1.3.8+dfsg-3+deb10u2 | zlib, GPL-2+, BSD-3-clause, Expat, GPL-2, | deb | + | | | BSD-3-clause and GPL-2 | | + | login | 1:4.5-1.1 | | deb | + | mawk | 1.3.3-17+b3 | | deb | + | mount | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | ncurses-base | 6.1+20181013-2+deb10u2 | | deb | + | ncurses-bin | 6.1+20181013-2+deb10u2 | | deb | + | passwd | 1:4.5-1.1 | | deb | + | perl-base | 5.28.1-6+deb10u1 | GPL-1+ or Artistic or Artistic-dist, GPL-1+ | deb | + | | | or Artistic, BSD-3-clause, SDBM-PUBLIC- | | + | | | DOMAIN, Artistic or GPL-1+ or Artistic-dist, | | + | | | GPL-1+ or Artistic, and Expat, HSIEH-BSD, | | + | | | BSD-3-clause-with-weird-numbering, ZLIB, | | + | | | BSD-3-clause-GENERIC, REGCOMP, and GPL-1+ or | | + | | | Artistic, GPL-1+ or Artistic, and | | + | | | BSD-4-clause-POWERDOG, GPL-3+-WITH-BISON- | | + | | | EXCEPTION, HSIEH-DERIVATIVE, RRA-KEEP-THIS- | | + | | | NOTICE, TEXT-TABS, GPL-1+ or Artistic, and | | + | | | BSD-3-clause-GENERIC, LGPL-2.1, Artistic-2, | | + | | | Unicode, BSD-4-clause-POWERDOG, GPL-1+, DONT- | | + | | | CHANGE-THE-GPL, CC0-1.0, GPL-1+ or Artistic, | | + | | | and Unicode, BZIP, REGCOMP, GPL-2+ or | | + | | | Artistic, GPL-2+, S2P, Artistic-dist, Expat, | | + | | | Artistic, Expat or GPL-1+ or Artistic | | + | sed | 4.7-1 | | deb | + | sysvinit-utils | 2.93-8 | GPL-2+ | deb | + | tar | 1.30+dfsg-6 | | deb | + | tzdata | 2021a-0+deb10u7 | | deb | + | util-linux | 2.33.1-0.1 | BSD-4-clause, LGPL-3+, MIT, LGPL-2+, | deb | + | | | LGPL-2.1+, public-domain, GPL-3+, GPL-2+, | | + | | | BSD-2-clause, BSD-3-clause, LGPL, GPL-2 | | + | zlib1g | 1:1.2.11.dfsg-1+deb10u2 | Zlib | deb | + +------------------------+-------------------------+-----------------------------------------------+------------+ + ======================================================================================= + + ########################################### + # Summary of licenses found in Container: # + ########################################### + Public domain., LGPL, Artistic or GPL-1+ or Artistic-dist, ZLIB, LGPL-3+, public-domain-s-s-d, permissive-fsf, GPLv3+, config-h, BSD-variant, BSD-3-clause or GPL-2, public-domain, GPL-1+ or Artistic, and BSD-3-clause-GENERIC, TinySCHEME, BSD-3-clause and GPL-2, CC0-1.0, GPL-1+ or Artistic, and Unicode, BZIP, PD, noderivs, GFDL-1.2+, BSD-4-clause, g10-permissive, LGPL-3+ or GPL-2+, GPL-3+ or BSD-3-clause, GPL-1+ or Artistic, and BSD-4-clause-POWERDOG, HSIEH-DERIVATIVE, RRA-KEEP-THIS-NOTICE, GPL-2+ with distribution exception, MIT, BSD-4-clause-POWERDOG, LGPL-2.1+ or BSD-3-clause, zlib, other, REGCOMP, GAP, Expat, public-domain-md5, GPL-1+ or Artistic, BSD-3-clause, permissive-like-automake-output, BSD-3-clause-with-weird-numbering, probably-PD, Zlib, none, REGCOMP, and GPL-1+ or Artistic, FreeSoftware, ISC+IBM, BSD-3-Clause, GPL-2+ with Autoconf exception, TEXT-TABS, GPL-3+ or GFDL-1.2+, LGPL-2.1, Unicode, GPL-1+, GPL-2+, S2P, SDBM-PUBLIC-DOMAIN, Artistic, GPL-2, PD-debian, LGPL-2.1+, GPL-1+ or Artistic or Artistic-dist, permissive, Expat or GPL-1+ or Artistic, HSIEH-BSD, GPL-1+ or Artistic, and Expat, BSD-3-clause-GENERIC, RFC-Reference, GPLv2+, Autoconf, LGPL-2+, GPL-3+, custom, BSD-2-clause, Artistic-2, permissive-nowarranty, DONT-CHANGE-THE-GPL, LGPLv3+_or_GPLv2+, ISC, GPL-2+ or Artistic, Artistic-dist, BSD-3-clause or GPL-2+, same-as-rest-of-p11kit, GPL-3+-WITH-BISON-EXCEPTION + + S07: Sample Solution + +- Continuous Documentation: discussed in class. + +- Landing Page: discussed in class. + +- Changelog: discussed in class. + +- APIs: 2.0.0 + + Summary + + What did you learn? + +- Why documentation is essential for dependable and, in particular, + maintainable software. +- How to leverage Rust and other tools to generate and publish + documentation for various purposes. +- What properties an API should have and how your implementation + choices have an impact on these. +- How you can make your project REUSE-able and ClearlyDefined — + providing software bills of material. + + Where can you learn more? + +- Documentation: + - cargo-doc + - GitLab Pages + - Software Engineering at Google: Ch. 10 + - cheats.rs: Documentation + - Commit Virtual 2021: Use Gitlab to Deliver “Docs-as-Code” + Technical Documentation +- APIs: + - Rust for Rustaceans: Ch. 04 + - Rust API Guidelines + - Semantic Versioning (SemVer) + - Semantic Versioning Compatibility + - “Type-Driven API Design in Rust” by Will Crichton +- Software Bills of Material: + - Why the World Needs a Software Bill of Materials Now + - What is a Software Bill of Material + - Understanding SBOM Standards + - SBOMs Supporting Safety Critical Software + + W07: Work Sheet + +Continuous Documentation + +Re-use the FizzBuzz project created in U02 and extend it by: + +- rudimentary documentation for the fizzbuzz function and the + library’s main module +- a CI job that produces the documentation +- GitLab pages to host the documentation (/fizzbuzz) + +Landing Page and Handbook + +Again, re-use the FizzBuzz project and extend it by: + +- rudimentary mdbook configuration, allowing you to write + supplementary text for it +- write a page that explains how FizzBuzz works and how one can setup + your code (git clone, cargo install) +- use GitLab CI and pages to generate and host this as the landing + page (/) + +Changelog + +Use git-cliff to generate a CHANGELOG.md for FizzBuzz. If you picked +proper messages in the respective unit, you should get proper commit +groups for the [Unreleased] version of FizzBuzz. + +APIs and Versions + +Assume version 1.3.1 of your crate has the following code: + + pub struct Engine { + pub temperature: f64, + pub rotations: u64, + } + +Now you add pub kind: EngineKind, with pub enum EngineKind to tell +electric from combustion engines apart. What should the new version of +your crate be? + +Coding + +General Coding Process + +Now that we know how error control generally works to improve +reliability of a system, we look at the process of coding information in +detail. This process looks like this: + +{{#include img/CodingProcess.svg }} + +We have the following variables: + +- \(i I\): Information (Data) + - \(I\) is the information alphabet. + - Example: a set of symbols like { START, STOP, RESUME, EXIT }. +- \(r R^+\): Received Data + - \(R\) is the channel alphabet, i.e. each word represents a + receivable message. + - \(r\) is non-empty word from \(R\) + - Example: binary numbers \(R = \{0,1\}\). +- \(c C\): Coded Data + - \(C\) is the code word alphabet. Code words can be received, but + not everything that can be received is a code word, i.e. \(C + R^+\). + - The encoding function is \(encode : I C\). + - The correction function is \(correct : R^+ C\). Note that only + for perfect codes, this mapping is total. + - The decoding function is \(decode : C I\), i.e. the inverse of + \(encode\). +- \(f R^+\): Error + - Added by noise etc. +- \(s\): Syndrome + - Used for error detection and correction. +- \(o\): Error Locator + - Derived from \(s\) to get \(f\). + +Definitions + +First, we have to define terms we have used loosely in the previous +sections in a clearer way: + +- Information: The actual data we want to transmit. +- Code: The mapping between information words and code words. +- Redundancy: Parts of the original information cleverly… + - (re-) arranged, + - combined, or + - otherwise mathematically transformed and + - transmitted. + +Block Codes + +While there are various ways to do coding, we only concentrate on block +codes in this unit. + +A block code transforms \(k\) information symbols to \(n\) code symbols, +so the code rate is \(\).(with \(r=(n-k)\) redundancy symbols) + +We define the Hamming Distance \(d\) as the difference between two code +words. The minimal distance between any two code words (\(d_{min}\)) +gives the distance of the code. This distance gives us the capability of +the code. A code with \(d_{min}\) can: + +- Detect \(e\) errors, if \(d_{min} e+1\) + +- Correct \(e\) errors, if \(d_{min} 2e+1\) + +Hamming Code + +One example of a block code is a Hamming Code (HC). The HC operates on +symbols that are single bits. We denote it with \(HC(n,k)\). HC make use +of so-called parity bits which are: + +- 0 for even number of bits set to 1 in block. +- 1 else. + +These \(r\) parity bits are at positions \(2^x\) (i.e., 1, 2, 4, 8, …) + +The syndrome \(s\) is used to recalculate parity including parity bits. +The syndrome both checks for an error and locates it: + +- \(s=0\): no error. +- \(s \): syndrome value is the location of the error. + + More information about the code generation algorithm can be found + here. + +Hamming Code | Example HC(7,4) + +In this example our alphabets are: + +- \( = [0, 1]\), note that in this case \(+\) and \(-\) become XOR. +- \(I = ^4\) +- \(r ^7\) + +In the following, we give a worked example for a Hamming Code. + + Transmitter (\(x = [x_1 … x_n]\)) + +Encode 4 bits \(i = 1001 = c_3 c_5 c_6 c_7\). + +Parities: + +\(p_1 = (c_3 + c_5 + c_7) = 0 = c_1\) + +\(p_2 = (c_3 + c_6 + c_7) = 0 = c_2\) + +\(p_3 = (c_5 + c_6 + c_7) = 1 = c_4\) + +Result: + +\(c = [ 0 0 1 ]\) + +\(f = [ 0 0 0 0 0 1 0 ]\) + + Receiver + +\(r = [ 0 0 1 1 0 1 ] 0 1\) (wrong!) + +Syndromes: + +\(s_1 = (p_1 + c_3 + c_5 + c_7) = 0\) + +\(s_2 = (p_2 + c_3 + c_6 + c_7) = 1\) + +\(s_3 = (p_3 + c_5 + c_6 + c_7) = 1\) + +\(s = [ 0 1 1 ] \) , hence error occured + +Location: \(s_1 + s_2 + s_3 = 6 = o f\) + +\(r’ = r - f = [ 0 0 1 1 001 ] \) + +Advanced Codes + +A hamming code is a rather simple coding approach. There is a vast +amount of literature on other coding schemes, for example: + +BCH Codes, where we add multiple Hamming Codes together to get more +correction capabilities. + +Reed-Solomon Codes, which work on bytes rather than single bits. This +code is able to correct full bytes, independent of how many bit errors +happened within it. This is ideal for computer systems, with 8-bit +symbols (byte). + +With Code Concatenation multiple codes are used inside each other. Doing +this efficiently is a complex topic on its own. + +Bursts and how to get rid of them + +Remember, bursts are multiple consecutive errors. Assume the following: + +The information we want to send is: \([ 1 0 0 1 1 0 0 1]\), which yields +the following code: \([ 0 0 1 1 0 0 1  0 0 1 1 0 0 1]\) (HC(7,4)). +Assume the channel causes two errors in different ways: + +- a) \([ 0 0 1 1 0 1 1  0 1 1 1 0 0 1]\) + +- b) \([ 0 0 1 1 1 1 1  0 0 1 1 0 0 1]\) + +For each option, think about whether you can correct the errors or not? + +For a), we can correct as there is 1 error per block. For b), we cannot +correct as 2 errors are in the 1st block, which exceeds the correction +capabilities of HC(7,4). Now you might wonder if we can do something +about the second case, where we have enough correction capabilities but +the errors are distributed over blocks in an unfortunate way. + +Interleaving + +As you might have guessed, there is such an approach and it is called +interleaving. The basic idea is to scramble bit positions and spread +adjacent symbols apart. This helps with burst errors, but it is also +time-consuming, as data symbols have to be aggregated at transmitter and +receiver before sending or delivering. + +An interleaver is parameterized by picking numbers for columns \(C\) and +rows \(R\). After interleaving, the new distance between originally +adjacent symbols (within block) becomes \(R\). In between blocks, the +distance is different. + +At the interleaver, we fill row-wise and read column-wise: + +\(i = [ 0, 1, 2, 3, 4, 5, 6, 7]\) + +0 +1 +2 +3 +4 +5 +6 +7 +\(c = [0, 4, 1, 5, 2, 6, 3, 7]\) + +At the deinterleaver, we fill column-wise, read row-wise: + +\(c = [0, 4, 1, 5, 2, 6, 3, 7]\) + +0 +1 +2 +3 +4 +5 +6 +7 +\(i = [ 0, 1, 2, 3, 4, 5, 6, 7]\) + +Interleaving Example + +Information: \([ 1 0 0 1 1 0 0 1 ]\) + +Code: \([ 0 0 1 1 0 0 1  0 0 1 1 0 0 1]\) (HC(7,4)) + +Transmitting (0 = padding) + +Modify code before sending: [ 0 0 1 1 0 0 1   0 0 1 1 0 0 1   0 0 ] + +0 +0 +1 +1 +0 +0 +1 +0 +0 +1 +1 +0 +0 +1 +0 +0 +[ 0 0 0 0 0 0 1   1 1 1 1 0 1 0   0 0 ] + +Channel + +Error occurs: [ 0 0 0 0 0 1 0   1 1 1 1 0 1 0   0 0 ] + +Receiving + +0 +0 +1 +1 +0 +1 +1 +0 +0 +0 +1 +0 +0 +1 +0 +0 +0 0 1 1 0 1 1    0 0 0 1 0 0 1    0 0 + +Now, the two bit error burst becomes correctable! + +Coding Project + +In P02, you must implement an FEC coding scheme by hand. Note that a +Hamming Code will suffice to pass the project. At the same time, +interleaving and Reed-Solomon codes can improve your performance and are +worth learning / applying — so don’t hesitate to try them out. + + Error Control + +As we discussed previously, faults cannot be completely avoided. In a +similar tone, a communication channel or computation is never 100% +guaranteed to be correct. Therefore, it is necessary to (a) know which +errors can happen, (b) how to detect them, and (c) how to prevent or +handle them. + + Error Types + +First, we look at what types of errors can happen in communication +between two systems: + +{{#include img/bit_error_packet_erasure.svg }} + +- Bit Errors are caused by physical problems (noise, etc.). + +- Packet Erasures are caused by + + - physical problems (e.g. shadowing in wireless media) or + - logical problems (e.g. buffers are filled and newly arriving + packets must be dropped). + +- Delayed Packets caused by + + - differing paths across a network, + - network congestion, or + - insufficient priority compared to other network traffic. + + Error Distributions + +Now that we know about the different types of errors, it is also +essential to look at how likely errors are — in particular how they are +distributed. + +{{#include img/sporadic_burst_error.svg }} + +Informally speaking, we talk about sporadic errors that happen once in a +while and only affect single or small-scale units of data. Burst errors +are instead multiple consecutive errors that indicate some error +correlation. This can be due to, e.g., a scratch in a CD (multiple bits +affected) or an intermittent link failure (multiple packets affected). + + General Error Control + +In accordance with the Shannon model of communication, the function of +error control is split between the transmitter and the receiver. The +transmitter has the task of providing redundancy, i.e. repeating some of +the information or coding the information into a different form to be +transmitted. The receiver has multiple tasks: + +- First, it has to detect if there was an error. If this is the case, + it has two options: + - Hand over the received data to a correction task, or + - Discard the data with the erroneous data. This can transform + single bit errors into packet erasures. +- Second, if correction is attempted, the receiver locates the error. + Using this information, it searches for the closest valid code + symbol to the received non-valid symbol. Closest in this context + means that this symbol has the highest likelihood, assuming random + noise on the channel. + +For the correction, there are two common approaches on how the receiver +can get access to redundant information in order to correct the error: + +- Proactive, also known as forward error coding (FEC) +- Reactive, also known as automated repeat request (ARQ) + + Proactive + +In the proactive approach, the transmitter anticipates that some +information is lost on the channel. Therefore, it transmits more data +(i.e. data + redundancy) to increase the likelihood of enough data +arriving at the receiver to allow for decoding the original information. + +There are multiple schemes to add this redundancy: + +- Redundancy Packet: Send additional packets used to recover erasures. + +- Robust Packet: Send packets with included redundancy to recover bit + flips. + +- Piggy-Back: Include digest of packet n+1 in packet n to conceal + erasures. + +This approach has the benefit that correction can be attempted without +waiting for additional redundancy to arrive (as it is sent immediately +by the transmitter). The drawbacks are: + +- Data rate increases statically.Independent of actual errors. +- Picking redundancy amount is tricky.Too much: Waste capacity. Too + little: Fail regularly. +- De- and Encoding takes time.Coding process to generate redudancy. + Aggregating application data to allow efficient block coding. + + Reactive + +A different approach is the reactive approach that is, for example, +employed in the Transmission Control Protocol (TCP). This approach is +especially efficient when transmission is fast, as: + +- time consumption for correction is transmission time + repetition + timeout length, and +- if no errors occur, there is no added overhead by ARQ (it is a + reactive scheme). + +The approach is problematic when transmission takes long, as: + +- spending a second operation time can exceed limits. +- retransmission timers may expire too early and redo transmission + without need. + +Nobody is Perfect + +Finally, even when we put these error-control mechanisms in place, it is +still possible that we are not successful in recovering the +transmitters’s information due to two possible reasons: + + Decoding Failure + +A decoding failure happens when more than one symbol is closest to the +received one. Mathematically speaking, this means that the code’s +equation system cannot be solved and the correction cannot decide for +one of the options. Using perfect codes avoids this completely. + + Decoding Error + +In contrast, a decoding error is when a symbol is changed on the channel +to an extent that a different code symbol appears closer. Hence, the +correction happens but yields information that is different from the one +transmitted. In this case, one cannot blame the coding system for +providing a wrong result — rather one must change the coding system to +provide higher correction capabilities. + + Detection vs. Correction + +The rule of thumb to favour detection over correction or vice-versa is: + +- Detection is better on reliable media.The common case is successful + transmission. Only retransmit in the rare cases (save data rate). +- Correction is better on unreliable media.The common case is + unsuccessful transmission. Always transmit more (save latency for + retransmissions). + +You can find examples for this in communication protocols: + +- Error detection is used in, e.g., Ethernet (802.3) or CAN bus +- Error correction is used in, e.g., WLAN (802.11), LTE, UMTS + + A Quantom of Information Theory + +Communication Systems + +In information theory, communication systems are typically described +according to a general model developed by Claude Elwood Shannon: + +{{#include img/Shannon_communication_system.min.svg}} + +Source: Wikipedia + +Bits + +A bit is the basic unit of information in computing and digital +communications. The word is a portmanteau of binary digit. A bit can +only have two values: 0 or 1. This can be compared with a light bulb +that can be either on or off. In information theory, you also find the +unit 1 Sh (Shannon). Bit is often used for data and Shannon for +information. + +Information and Entropy + +When talking about the information content of some message, we use +information (measured in bits) to describe it. If we talk about the +information involved in a random process, we often use the term entropy +(expected information). The term has its origin in thermodynamics and +describes the disorder in a system. Thanks to the second law of +thermodynamics, ultimate chaos is inevitable! + +In information theory, Shannon described that, in principle, the +receiver attempts to infer which message has been sent. The receiver is +uncertain about this (before receiving as well as after), but +anticipates certain information. The entropy then describes a) how +uncertain she is before the reception, b) how uncertain she is after the +reception and hence c) how much uncertainty was removed by the reception +(i.e. a - b). Information (and in turn entropy) also depends on the +number and likelihood of different options (e.g. sides of a die or a die +showing a certain number). This set of options is called \(\) and has +\(N=||\) elements. In this case, \(_2(N)\) gives the number of bits +required to identify these options by a unique binary number (and is +optimal, if they are uniformly distributed). A single option \(x\) has +the information \(-_2(p_x)\). Intuitively, we have the following +relationships: + +- Likely Option \(\) Low Information.e.g. white pixel of a document + scan + +- Unlikely Option \(\) High Information.e.g. traffic light is yellow + light + +- More Options \(\) Higher Information.e.g. traffic light vs. 7-segm. + display + +The (discrete!) entropy of the process \(H(X)\) can be quantified as the +expected information content of \(X\) and is measured in bits like this: +\(- _{x X} p_x _2(p_x)\). + +Case Study: Inefficiency of Textual Protocols + +Let’s look at a textual protocol involving a command field, which can be +one of the following: + +- Retrieve (GET), +- Create (ADD), +- Modify (MOD), and +- Delete (DEL) + +How many bits are used for the textual and binary solution? + +- For textual, we have 3 characters for each command and one ASCII + char needs 7 bits (often even 1 byte, but let’s be fair). Hence, the + result is: \(3  bit = 21 bit\) + +- For binary, the 4 different commands (0,1,2,3) mean that we have 4 + values that require two bits each (00, 01, 10, 11). Hence, the + result is \(2 bit\) + +Now you might ask, why textual protocols are used at all? The answer is +that efficiency is not your only parameter! Compression can bring +efficiency without requiring explicit mapping from information to binary +sequences. + + U08: Working Reliably with Codes + +In U07, we told you that you have to write a software library… but what +should it do? Here we are with the following challenge: DSys products +communicate with each other using various communication means. Some of +them are even wireless, which is known to be not as reliable as cables +(reliability being one dependability dimension). As the, what network +engineers call, “lower layers” are built out of off-the-shelf WLAN +components, you can only change your communication protocol. + +To prepare you for this task, we start with a little bit of information +theory, continue with the concept of error control to increase +reliability, and deal with actual codes that allow you to detect and +correct bit errors. + + S08: Sample Solution + +Information Theory + +- Message (the information content), signal (the encoded information). + Transmitter encodes (turns information symbols into code symbols), + adds redundancy, transforms symbol into a transmissible form + (e.g. electromagnetic waves), … + +- -1/6 * log2(1/6) = 0.43082708345 + +- Encoding Die Throw: + + - ASCII: 1 byte = 7 or 8 bits; + - Binary: 3 bits (0 .. 7 -> 1 .. 6) + - 3 / 8 = 37.5% (reduced to) + +- Traffic Light: + + - Four phases: red active in two, yellow active in two, green + active in one + - Probabilities: Red 1/2, Yellow 1/2, Green 1/4 + - Information: 1, 1, 2, + - Entropies: 0.5, 0.5, 0.5 => Total Entropy 1.5 + - US Traffic: + - 1/3, 1/3, 1/3 -> 0.5283 + - Total Entropy 1.5849 + - US traffic lights are more “surprising” hence more dangerous + if you ask me + +Error Control + +- Proactive should be used if error likelyhood and recovery time are + high. + +- Some single bit errors might not be correctable, leading to a + discard of the larger unit of information. + +- They can’t. + +- Failure: cannot correct, Error: can correct, but do not arrive at + true value + +Coding + +- HC(7,4) + - Data: [0101] + - Encoded: [0100101] + - Error: [0000010] + - Received: [0100111] + - Syndrome: [011] -> 6 -> Error [0000010] + - Corrected: [0100101] +- Interleaver (P = Padding): [0, 3, 1, 4, 2, 5, 6, P, 7, P, P, P] + + Summary + + What did you learn? + +- Information theory is relevant for building reliable communicating + systems. +- Errors come in various types and distributions and you should know + about them to tune your error control approach. +- Coding schemes generate redundancy based on data — their detection + and correction capabilities differ, so they must be chosen wisely. + + Where can you learn more? + +- Information theory and Coding theory are good starting points. +- Error Correction Code (ECC) Memory is another application of coding + that is more computation- than communication-centered. + + W08: Work Sheet + +Information Theory + +- Explain the difference between the information source’s message and + the signal. Describe what different things can happen in the + transmitter. + +- You have a 6-sided fair die. What is the entropy of throwing a 6? + +- You encode the result of a die throw in ASCII text (1, 2, …, 6) and + binary. How many bits does the binary encoding need? Encoding binary + reduces the used bits to how many percent of the textual encoding? + + Traffic Light + +- Assume you have a German traffic light such as in the animation on + the right. Assume for now that the different light phases are of + equal duration (as in the animation). Calculate both information and + entropy of seeing each individual light (red, yellow, green) being + “on”. Does a US traffic light have higher or lower entropy than the + German ones (assuming equal duration)? + +Error Control + +- Explain in which cases you should prefer proactive over reactive + error control. + +- Explain how single bit errors can turn into packet erasures. + +- Explain how an overly delayed packet can be told apart from a lost + packet. + +- Explain the difference between a decoding failure and a decoding + error. + +Coding + +- Assume you use a \(HC(7,4)\) and the following bit sequences have + this form \([x_1, … x_n]\). Encode the 4 bits \([[0101]\). When you + transmit, the following error happens on the channel \([0000010]\). + Compute the syndrome and show how it detects and locates the error. + +- Assume you have the data sequence \([0, 1, 2, 3, 4, 5, 6, 7]\) and + you feed it into a 2 x 3 interleaver. Compute the resulting data + sequence after the interleaver. + + Binary Trees + +(Source: Programming Rust) + +In this section, we cover binary trees, i.e. trees where elements have +0⁠–2 children. Children can be left or right of the parent. Furthermore, +a binary search tree has the property that elements left of a parent are +<= the parent element and right of the parent are >. + +Declaration + +Here is how we declare types for binary trees: + + enum BinaryTree { + Empty, + NonEmpty(Box>), + } + + struct TreeNode { + element: T, + left: BinaryTree, + right: BinaryTree, + } + +Note that the NonEmpty variant carries a Box. Why is this the case? +Assume we would use the following: + + enum BinaryTree { + Empty, + NonEmpty(TreeNode), + } + +What can go wrong? + +In fact, Rust complains because it cannot figure out the memory size of +BinaryTree as we now made it infinite. Why? + +Enums are sized according to the largest type they contain. So +BinaryTree has the size of TreeNode plus the space to store that +it is the NonEmpty variant. Now how big is TreeNode? The node +contains up to two BinaryTrees which again could, in the worst case, +contain a TreeNode. So we create a recursive dependency. With Box, we +introduce a pointer with a fixed size that points to a heap-allocated +value and its size. This means that the BinaryTree enum only carries +the size of a pointer. + +Population + +Now with the data structure at hand, let’s implement our first +algorithm, namely a way to fill (or populate) the tree: + + impl BinaryTree { + fn insert(&mut self, value: T) { + match self { + BinaryTree::Empty => { + *self = BinaryTree::NonEmpty(Box::new(TreeNode { + element: value, + left: BinaryTree::Empty, + right: BinaryTree::Empty, + })) + }, + BinaryTree::NonEmpty(ref mut node) => { + if value <= node.element { + node.left.insert(value); + } else { + node.right.insert(value); + } + } + } + } + } + +Here we see how two concepts play nicely together when working with tree +data structures: match expressions and recursion. + +First, we split the handling of two different cases: a) empty node and +b) non-empty node. If empty, we start with a newly created node. If +non-empty, we recurse with adding to either left or right, depending on +the value to be inserted. Thereby, we ensure the order-property of the +tree is maintained. + +Width + +Now, it’s time to compute things while working our way through the tree. +The width gives the number of leaf elements the tree contains: + + fn width(&self) -> u32 { + match self { + Self::Empty => 0, + Self::NonEmpty(t) => u32::max(1, t.left.width() + t.right.width()), + } + } + +Hence, an empty tree has no leaves. A non-empty tree either has a width +of 1 (it is a leaf) or the combined width of its left and right +children. Take a piece of paper and validate that all four cases (leaf, +non-leaf with left child, with right child, with two children) yield the +correct answer. + +Projecting + +Finally, a common use case for trees is to traverse them in a particular +order, e.g. to compute a projection (i.e. enumerate the elements in said +order). Here is how to compute a preorder (root, left sub-tree, right +sub-tree): + + fn project_preorder<'a>(&self) -> PreOrderProjection { + PreOrderProjection { stack: vec![self] } + } + + struct PreOrderProjection<'a, T> { + stack: Vec<&'a BinaryTree>, + } + + impl<'a, T> Iterator for PreOrderProjection<'a, T> + where + T: Copy, + { + type Item = T; + + fn next(&mut self) -> Option { + let root = self.stack.pop(); + match root { + None => None, + Some(t) => match t { + BinaryTree::Empty => None, + BinaryTree::NonEmpty(t) => { + if let BinaryTree::NonEmpty(_r) = &t.right { + self.stack.push(&t.right); + } + if let BinaryTree::NonEmpty(_l) = &t.left { + self.stack.push(&t.left); + } + Some(t.element) + } + }, + } + } + } + +Note that we implement a custom struct that works as a projection of the +tree. It implements Iterator so that consuming code can call it as it +would call other iterators. + + Fault Trees + +When analyzing systems for safety and reliability, fault trees have +shown great performance and are in broad use since their invention (in +1961 by Bell Laboratories). Standards such as + +- IEC 61508-3 (electrical/electronic/programmable systems), +- ISO 26262-10 (automotive), +- EN 50126-2 (rail) and +- ISO 14971 (medical) + +recommend the use of fault tree analysis to check for the safety of +systems. + + Fault Trees (FT) and Algorithms + +Fault trees serve multiple purposes. Fault trees… * trace back +influences to a given hazard or failure, * help to find all +influences, * graphically explain causal chains leading to the hazard, * +can be used to find event combinations that are sufficient to cause +hazard, orqualitative analysis: systematic investigation for +combinations * can be used to calculate hazard probability from +influence probabilitiesquantitative analysis: systematic investigation +for likelihoods + +Originally, fault trees were only boolean trees, but over time various +different forms evolved. In this section, we focus on Boolean and +Extended Boolean Fault Trees. Additional forms and analysis techniques +are left for you to explore. + + Boolean Fault Trees + +The concept underlying a boolean fault tree is straightforward. The tree +is built up of nodes and edges, where nodes are basic events (the leaves +of the tree) that can happen or logic gates that combine multiple basic +or intermediate events (the non-leaves of the tree). When evaluating the +tree, events can be present (true) or non-present (false) and edges +propagate this information upward. Typically, at least the Or and And +gate are supported that combine the truth value of their lower events +into a new one (using the respective boolean operation). Eventually, the +root is the top-level event in question and evaluation of the tree leads +to either true or false for this top-level event. The tree itself has +failure logic, i.e. the top-level becoming true means it failed, as is +the case for other events. So, an event failing means that the value +changes from false to true. This is in contrast to a success tree, where +true means something is successful or present. In essence, fault trees +are equivalent with negation-free boolean formulas (only And/Or are +supported). + +When it comes to (graphical) notation, you find lots of different ways +to specify the same set of core elements of a fault tree. In particular, +gate symbols are often borrowed from circuit design (where the common +symbols differ from US to EU, for instance) and often it is only their +shape that indicates their function. In some cases you also get +operators (e.g. &) in the symbol itself, but this is not always the +case. In the following, we use & for And and >=1 for Or. + +Assume the following fault tree that captures how it could happen that +you were late at the uni (hypothetically — we know this never happens to +you!): + + Late at + the uni + | + +-----+ + | >=1 | + +-----+ + | | + +--+ | + | | + +-------+ | + | & | | + +-------+ | + | | | + O O O + ^ ^ ^ + Alarm not | Train + set | late + | + Slept + too long + +Note that Fault Trees have their use, even if no analysis is carried +out. Constructing the FT already helps in understanding the system, +revealing problems, and building awareness on safety and reliability. In +this example, you already see that if the train is late, having slept +too long is not relevant anymore. Thereby, we already carried out a +qualitative analysis, i.e. checking if the top-event is reachable, +depending on the basic events. + +This leads us to the definition of two special sets: + +- Cut Set: set of basic events which causes the top event in + conjunction +- Path Set: set of basic events that (by being false) inhibit the + top-event from occurring + +If you have a careful look, you see that these sets are usually bigger +than they need to be to fulfill their definition (e.g. a cut set +contains an event that does not need to be true for the top-event to +become true, e.g., because it is or-ed with another event that is true). +Hence, there are also: + +- Minimal Cut Set (MCS): smallest set of events that, if failing, lead + to top-level fail +- Minimal Path Set (MPS): path set where removing any basic event + means it no longer is a path set + +In fault tree analysis, one is usually concerned with MCS of order 1 or +2, as well as MCS with probability > 0.01 (which require quantitative +analysis that we learn later in this section). That means you focus on +single points of failure or small combinations that appear with +significant probability. + +In the example above, we have the following: + +- minimal cut sets: [Alarm not set, Slept too long] and [Train late]. +- minimal path sets: [Train late, Alarm not set] and [Train late, + Slept too long]. + + Extended Boolean Fault Tree + +If quantitative analysis is planned, we have to use extended boolean +FTs. The diagram we showed above is a good basis for such a tree, we +only have to decorate it with failure probabilities. This is depicted +below, where the failure probabilities of basic events induce the +probabilities further upwards in the tree (as per the rules quoted +below): + + Late at + the uni + 0.154 + | + +-----+ + | >=1 | + +-----+ + | | + +--+ | + 0.06 | | + +-------+ | + | & | | + +-------+ | + | | | + O O O + 0.2 0.3 0.1 + ^ ^ ^ + Alarm not | Train + set | late + | + Slept + too long + +In general, for quantitative evaluation basic events should be chosen +to: a) have clear semantics, b) be self-contained and independent, c) +have a probability value assigned to them. + +When asked to compute the probability of the top-level element failing, +we traverse the tree bottom-up and apply the following rules for the +gates: + +- And Gate: \[P_{out} = _{i=1}^{n} P_i\] +- Or Gate: \[P_{out} = 1 - _{i=1}^{n} (1 - P_i)\] + +Note that the above echoes the De Morgan law \[(A B) = (A B)\] since +\(1-P\) can be though of being the probability of \(X\) if \(P\) is the +probability of \(X\). + + Efficient Minimal Cut Sets Computation + +As minimal cut sets are so important for analysis (e.g., finding single +points of failure), an efficient computation is essential, especially +for large trees of complex systems. For this purpose, we can use the +following algorithm to compute the set of minimal cut sets: + +- Traverse the tree recursively. +- At an OR gate, generate one entry per input: \([(i_1), …, (i_n)]\). +- At an AND gate, generate one entry with all inputs: \([(i_1, …, + i_n)]\). +- Drop duplicates during the process. + + Fault Trees in Rust + +The following shows how a fault tree is defined in one of the projects +you will work on: + + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] + pub enum Tree { + BasicEvent(Event), + IntermediateEvent(String, Box), + Gate(Gate), + } + +Similar to how we defined binary trees here, we have variants that +contain Tree — making the data structure recursive. + + Gates + +Gates store sub-trees and the gate-function itself: + + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] + pub enum Gate { + Or(Vec), + And(Vec), + } + + Events + +Events store a name as well as a probability: + + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] + pub struct Event(String, uom::si::rational64::Ratio); + + Cut Sets + +A cut set, as defined above, is a set of events for which the tree +evaluates to true if we set the respective events to true and traverse +the tree upwards. Here is a way to test if a certain set of events is a +cut set: + + impl Tree { + fn cut_set(&self, events: &BTreeSet) -> bool { + match self { + Tree::BasicEvent(event) => events.contains(event), + Tree::IntermediateEvent(_, subtree) => subtree.cut_set(events), + Tree::Gate(gate) => match gate { + Gate::Or(subtrees) => subtrees.iter().any(|subtree| subtree.cut_set(&events)), + Gate::And(subtrees) => subtrees.iter().all(|subtree| subtree.cut_set(&events)), + }, + } + } + } + +You might notice how well the fault tree structure and logic maps to the +algorithm’s match statement (empty, gate, event) and subtree iteration +(or → any, and → all). + +The following computes the set of minimum cut sets in a naive fashion: + + fn naive_minimal_cut_sets(&self) -> BTreeSet> { + let mut last_set = self.cut_sets(); + let mut current_set = self.cut_sets(); + loop { + let mut drop_set = BTreeSet::new(); + for subset in ¤t_set { + let s = BTreeSet::from_iter(vec![subset.clone()]); + let others = current_set.difference(&s).cloned().collect::>(); + for other in others.into_iter() { + if subset.is_subset(&other) { + drop_set.insert(other); + } + } + } + current_set = current_set.difference(&drop_set).cloned().collect(); + if current_set.len() < last_set.len() { + last_set = current_set.clone(); + continue; + } else { + break; + } + } + current_set + } + +The rationale is the following: We start with all cut sets (including +those that are not minimal). In every iterator of loop, we attempt to +make this collection smaller. As soon as we no longer succeed, we break. +Removal itself works by comparing all sets with each other and if one is +the subset of another set, we drop the other set (as it is not minimal). + +Above, we showed one of many other algorithms that computes MCS in a way +that has a smaller computational complexity. + + U09: Out in the Woods + +Source: Andreas Schmidt + +This has been a tough journey with DSys so far: You learned a lot, but +sitting in front of a computer all the time was quite stressful. It’s +about time to go outdoors and enjoy nature (if you are binge-learning +this course and have been sitting in front of the PC the whole day, go +outside ASAP). + +Now that you are back from taking a close look at the trees outside, we +learn how to implement binary trees in Rust and afterwards we discuss +how fault trees can be used for dependability analysis. + + S09: Sample Solution + +Trees + +- depth() + + fn depth(&self) -> u32 { + match self { + Self::Empty => 0, + Self::NonEmpty(t) => 1 + u32::max(t.left.depth(), t.right.depth()), + } + } + +- leaves() + + fn leaves(&self) -> Vec<&T> { + match self { + Self::Empty => { + vec![] + } + Self::NonEmpty(tree) => { + let TreeNode { + element, + left, + right, + } = &**tree; + match (left, right) { + (Self::Empty, Self::Empty) => vec![element], + (left, right) => { + let mut leaves = left.leaves(); + leaves.extend(right.leaves()); + leaves + } + } + } + } + } + +- project_inorder() + + fn project_inorder(&self) -> Vec<&T> { + match self { + Self::Empty => vec![], + Self::NonEmpty(t) => { + let mut l = t.left.project_inorder(); + l.push(&t.element); + l.append(&mut t.right.project_inorder()); + l + } + } + } + +- project_postorder() + + fn project_postorder(&self) -> Vec<&T> { + match self { + Self::Empty => vec![], + Self::NonEmpty(t) => { + let mut l = t.left.project_postorder(); + l.append(&mut t.right.project_postorder()); + l.push(&t.element); + l + } + } + } + +- find() + + fn find(&self, f: fn(&T) -> bool) -> Option<&T> { + match self { + Self::Empty => None, + Self::NonEmpty(t) => { + if f(&t.element) == true { + Some(&t.element) + } else { + if let Some(v) = t.left.find(f) { + return Some(v); + } + if let Some(v) = t.right.find(f) { + return Some(v); + } + None + } + } + } + } + +- balanced() + + fn balanced(&self) -> bool { + match self { + Self::Empty => true, + Self::NonEmpty(t) => { + t.left.balanced() + && t.right.balanced() + && ((t.left.depth() as i64) - (t.right.depth() as i64)).abs() <= 1 + } + } + } + +- balance() + + fn balance(self) -> Self { + let array: Vec = self.project_inorder().into_iter().cloned().collect(); + Self::from_sorted(&array) + } + + fn from_sorted(slice: &[T]) -> Self { + if slice.len() == 0 { + Self::Empty + } else { + let mid_index = slice.len() / 2; + let mid = &slice[mid_index]; + let left = Self::from_sorted(&slice[0..mid_index]); + let right = Self::from_sorted(&slice[mid_index + 1..slice.len()]); + Self::NonEmpty(Box::new(TreeNode { + element: mid.clone(), + left, + right, + })) + } + } + +- map(): + + fn map(self, f: fn(T) -> U) -> BinaryTree { + match self { + Self::Empty => BinaryTree::Empty, + Self::NonEmpty(t) => { + let element = f(t.element); + let left = t.left.map(f); + let right = t.right.map(f); + BinaryTree::NonEmpty(Box::new(TreeNode { + element, + left, + right, + })) + } + } + } + +- fold(): + + fn fold(&self, acc: A, f: fn(A, &T) -> A) -> A { + match self { + Self::Empty => acc, + Self::NonEmpty(t) => { + let acc = t.left.fold(acc, f); + let acc = f(acc, &t.element); + t.right.fold(acc, f) + } + } + } + +Fault Trees + +- MCS: [[V], [S1,S2], [S1,S3], [S2,S3]] + +- MPS: [[V,S1,S2],[V,S1,S3],[V,S2,S3]] + +- Top-Level Probability: + + - S1 & S2 (and others): 0.01 + - || over &: 0.0297 + - Top-Level ||: 0.0394 + + Summary + + What did you learn? + +- How tree data structures and algorithms are implemented in Rust. +- How fault trees are used to do dependability analysis. +- How fault trees and some of their algorithms can be implemented in + Rust. + + Where can you learn more? + +- Embedded Software Development for Safety-Critical Systems: Ch. 12 +- Fault Tree Analysis: + - on Wikipedia + - Survey by Enno Ruijters and Mariëlle Stoelinga (University of + Twente) + - Overview Article by Sohag Kabir (University of Hull) + + W09: Work Sheet + +Tree Algorithms + +One of the projects you get assigned to work on makes use of tree data +structures. To prepare you, this work sheet focuses on implementing +tree-based algorithms. + +Here is an example_tree for which we show outputs for every method to be +implemented: + + 4 + / \ + 3 6 + / / \ + 2 5 7 + / + 1 + +Informational Algorithms + +- Add a method depth that computes the depth or height of the tree: + fn depth(&self) -> u32 + + assert_eq!(example_tree.depth(), 4); + +- Add a method leaves that returns a vector with all the leaf + elements: fn leaves(&self) -> Vec<&T> + + assert_eq!(example_tree.leaves(), vec![&1, &5, &7]); + +Projecting + +- Add a method project_inorder that returns the tree elements + in-order: fn project_inorder(&self) -> Vec<&T> + + assert_eq!(example_tree.project_inorder(), vec![&1, &2, &3, &4, &5, &6, &7]); + +- Add a method project_postorder that returns the tree elements + post-order: fn project_postorder(&self) -> Vec<&T> + + assert_eq!(example_tree.project_postorder(), vec![&1, &2, &3, &5, &7, &6, &4]); + +Finding + +- Add a method find that returns the first element where a predicate f + returns true: fn find(&self, f: fn(&T) -> bool) -> Option<&T> + + assert_eq!(example_tree.find(|&e| e >= 5), Some(&6)); + +Balancing + +- Add a method balanced that returns whether a tree is balanced or not + (height difference between leaves max. 1): + fn balanced(&self) -> bool + + assert_eq!(example_tree.balanced(), false); + +- Add a method balance that turns a tree into a balanced version: + fn balance(self) -> Self + + 4 + / \ + / \ + 2 6 + / \ / \ + 1 3 5 7 + +Map & Fold + +- Add a method map that turns each element of the tree into something + different: + fn map(self, f: fn(T) -> U) -> BinaryTree + + assert_eq!(example_tree.map(|e| e * 2).project_inorder(), vec![&2, &4, &6, &8, &10, &12, &14]); + +- Add a method fold that traverses a tree inorder and folds the values + to an accumulator: fn fold(&self, acc: A, f: fn(A, &T) -> A) -> A + + assert_eq!(example_tree.fold(0, |a,e| a + e), 28); + +Fault Tree Analysis + +We consider a triple-modular redundancy scheme with a voter (V) and +three systems (S1, S2, S3). The fault tree of this system looks as in +the following diagram. + + System + failed + | + +-----+ + | >=1 | + +-----+ + | | + | +--+ + | | + | +-------+ + | | >=1 | + | +-------+ + | | | +----------------+ + | | +---------+ | + | | | | + | +-------+ +-------+ +-------+ + | | & | | & | | & | + | +-------+ +-------+ +-------+ + | | | | | | | + O O O O O O O + ^ ^ ^ ^ ^ ^ ^ + V | S2 | S3 | S3 + failed | failed | failed | failed + S1 S1 S2 + failed failed failed + +Your task is now to: + +- Compute the minimal cut sets. +- Compute the minimal path sets. +- Compute the top-level failure probability using the gate formulas + and \(P_V = 0.01\) and \(P_{S1} = P_{S2} = P_{S3} = 0.1\). + + Generics and Traits + +You have already encountered generic types and traits and now is the +time to take a closer look at these two fundamental features of Rust. +Both allow you to write code that is able to operate on many different +and not just a single type. + + This section is intentionally kept brief and you should read the + excellent 10th chapter of the Rust book if you have any doubts or want + a more in-depth introduction to generics and traits. + + Generics + +Generic Structs and Enums + +First, we look at a generic type: Point, a 2-dimensional point that +can be defined for different scales: + + struct Point { + x: T, + y: T, + } + + fn main() { + let integer = Point { x: 5, y: 10 }; + let float = Point { x: 1.0, y: 4.0 }; + // let not_possible = Point { x: 1.0, y: 4 }; + } + +Note how the type itself is independent of the type that is used for the +two dimensions. We can use, e.g., integers or floats to specify them. It +is also possible to use complex numbers (or something awkward such as +strings) as instantiations of T, as long as both are the same. + +Two common examples for generic enums are the Result and Option +types, that are defined like this: + + enum Option { + Some(T), + None, + } + + enum Result { + Ok(T), + Err(E), + } + +In both cases, the variants can contain arbitrary types (or none for +None), that can, e.g., be extracted via pattern matching. + +Generic Functions + +Another typical use case for generics are functions that are capable of +working on any type. For instance, consider the following function: + + fn largest(list: &[T]) -> T { + let mut largest = list[0]; + + for &item in list { + if item > largest { + largest = item; + } + } + + largest + } + +First, we realize that it is generic in T. To achieve this, all code +inside the body must be independent of which T we have and what is +supported on T. If we look through it line by line, we see that T itself +is one assigned (which is supported by any type) and compared against +another instance of T. The latter aspect is the reason why you get a +compiler error when executing this code: nobody guaranteed that you can +do T > T. We can achieve this by adding a trait bound, i.e., limiting on +which types our function is defined. For supporting >, T must implement +the std::cmp::PartialOrd trait and we change the function signature to: + + fn largest(list: &[T]) -> T + +Monomorphization + +Finally, a note on performance: Supporting generics means that the +different types (remember this implements polymorphism) must be handled +differently at machine-level, despite their common definition. One way +to do this is using virtual function calls, where we have a distinction +at run-time which type is present and which code is executed. However, +Rust uses a different approach, where the generic code is monomorphized, +i.e., for each used type, a distinct implementation is generated, +optimized, and referenced at the call-site. This increases compile time +but reduces run time, making Rust generics faster than generics in some +other languages. + + Traits + +Using Traits + +As you have seen already, traits encapsulate a certain feature or +property a type has or supports. It can be seen as a capability: +something a type can do. In U04, you saw the std::iter::Iterator trait +which is implemented for types that can produce a sequence of values. + +When we want to use traits, we must make sure that the trait itself is +in the current scope. Some of them are already, because they are part of +std’s prelude, but others must be used. + +For example, the following code is only valid with the first line: + + use std::io::Write; + + let mut buf: Vec = vec![]; + buf.write_all(b"hello"); + +Vec implements Write, but for Write::write_all to be accessible, it +must be in scope. This is to avoid naming conflicts, as types can +implement multiple traits with, potentially, identical function names. +In these cases, you use fully qualified method calls: + + Write::write_all(&mut buf, b"hello"); + OtherWriteTrait::write_all(&mut buf, b"hello"); + +Implementing Traits + +You can define your own traits like this: + + /// A trait for things that can be moved around + trait Moveable { + fn move(&mut self, distance: Point); + fn rotate(&mut self, angle: Angle); + } + +Implementing it can be done like: + + impl Moveable for Container { + fn move(&mut self, distance: Point) { + self.origin += distance; + } + + fn rotate(&mut self, angle: Angle) { + // ... + } + } + +Note that while you can write your own traits and implementations, it is +also possible to implement third-party traits for your own types (as you +see in a minute). + +Traits can also be used to implement Default Methods. For instance, +consider a Sink writer (i.e. it implements Write) that simply discards +the data (you can think of this as > /dev/null on Linux): + + pub struct Sink; + + use std::io::{Write, Result}; + + impl Write for Sink { + fn write(&mut self, buf: &[u8]) -> Result { + Ok(buf.len()) // claim the full data has been written + } + + fn flush(&mut self) -> Result<()> { + Ok(()) + } + } + +As you see, we only specified the write and flush methods. If something +implements Write, it also supports the write_all method you have seen +before. This is done via a default implementation in the Write trait: + + trait Write { + fn write(&mut self, buf: &[u8]) -> Result; + fn flush(&mut self) -> Result<()>; + fn write_all(&mut self, buf: &[u8]) -> Result<()> { + let mut bytes_written = 0; + while bytes_written < buf.len() { + bytes_written += self.write(&buf[bytes_written..]?); + } + Ok(()) + } + } + +Utility Traits + +We conclude this section by having a closer look at a couple of utility +traits that are part of the standard library… and can be used to work +with fantasy creatures. + +With Drop, we can implement a custom destruction method for a type: + + struct Pokémon { + name: String, + // ... + } + + impl Pokémon { + fn new(name: String) -> Self { + println!("A wild {} appears!", name); + Self { + name, + // ... + } + } + } + + impl Drop for Pokémon { + fn drop(&mut self) { + println!("{} disappears!", self.name); + } + } + + fn main() { + println!("Game start."); + { + let pikachu = Pokémon::new("Pikachu".into()); + } // pikachu is dropped at the scope end + println!("Game end."); + } + +With Default, we can define default values: + + enum Pokéball { + Empty, + Filled(Pokémon), + } + + impl Default for Pokéball { + fn default() -> Self { + Pokéball::Empty + } + } + + fn main() { + let ball : Pokéball = Default::default(); + } + +Finally, there are the From / Into and TryFrom / TryInto trait pairs +used to do conversions: + + impl From for Pokéball { + fn from(pokémon: Pokémon) -> Self { + println!("{} was captured.", pokémon.name); + Self::Filled(pokémon) + } + } + + fn main() { + let pikachu = Pokémon::new("Pikachu".into()); + let ball: Pokéball = pikachu.into(); + // or + // let ball = Pokéball::from(pikachu); + } + +Note that by convention (and default implementation), if you implement +From, you get the inverse Into for free. The Try variants are fallible, +i.e they return Result. + + Macros + +In your computing career, you might have heard the term macros before +(which is short for macro instruction, i.e. a long/large instruction). +Abstractly, it is defined as a rule or pattern that specifies how a +certain input should be mapped to a replacement output (cf. Wikipedia). +Now this sounds like any function, and indeed a macro is a function. The +difference, though, is that macros usually produce inputs to a computer +program (e.g. characters, keystrokes, or syntax trees) — automating the +process of using the program. Spreadsheet or photography applications +often provide this to turn a sequence of arbitrary process steps into a +single instruction. For us, we mostly care about macros that take code +and produce (usually more) code. The expansion happens during +compilation, which means that compilation metadata is also available. + +In this section, we will learn about different types of macros. You are +already familiar with the println! macro for printing a formatted +string. + + Reasons for Macros + +Before we get into the details of how to use Rust macros, let’s revisit +the various use cases: + +- Avoid Boilerplate Code +- Domain Specific Languages +- Conditional Compilation +- Inlining + + Avoid Boilerplate Code + +First of all, as macros simply automate the process of code-production, +they can be used to simplify the generation of repetitive code. This +means, whenever you realize that you are writing the same kind of code +over and over (with the only exception that you might be doing it for +different types or other slight variations), macros can help. Note that, +in the case of your variable being a type, generics should be an easier +solution. Leveraging macros in these situations increases the +maintainability with respect to: + +- readability, i.e. developers first understand the macro (or infer it + from the name) and then the usages, +- changeability, i.e. changes can be done once and are applied + everywhere. + + The best code is no code at all. Every line of code you willingly + bring into the world is code that has to be debugged, code that has to + be read and understood, code that has to be supported. - Jeff Atwood + +A straightforward example is the println!() macro that allows us to pass +a format string and a variable list of arguments. We can use +cargo-expand to show how all macros in our code are expanded. This piece +of code: + + fn main() { + println!("Macro magic {}!", "rulz!"); + } + +is expanded into: + + #![feature(prelude_import)] + #[prelude_import] + use std::prelude::rust_2021::*; + #[macro_use] + extern crate std; + fn main() { + { + ::std::io::_print(::core::fmt::Arguments::new_v1(&["Macro magic ", + "!\n"], &[::core::fmt::ArgumentV1::new_display(&"rulz!")])); + }; + } + +While there are many new pieces of code added, look at the usage of +_print function. Imagine, you had to write this code every time you +wanted to print something as simple as the string above. Additionally, +println!() supports a variable number of arguments (which normal +functions do not) so the macro helps here too by turning arguments into +lists of elements. + + Domain-Specific Languages (DSL) + +Another common use case for macros are languages that are +domain-specific. This can mean various things, one example could already +be the table-based tests, we wrote in U02. A tester only needs to +understand the test-specification language and needs to have no clue +about Rust. + +While Python is not really domain-specific (it is a general purpose +language), the following example showcases how a DSL would be used. The +inline-python crate provides the python!{...} macro that allows a +developer to write Python code in Rust. This includes that data can be +shared between the two. Here is an example where we assume we have an +existing algorithm in Python and want to use it one-to-one in Rust: + + use inline_python::{Context,python}; + let c: Context = python! { + def fib(n): + if n == 0 or n == 1: + return 1 + else: + return fib(n-1) + fib(n-2) + res = fib(7) + }; + + assert_eq!(c.get::("res"), 21); + + Conditional Compilation + +When you develop larger software projects, you face the challenge that +some parts of your code are necessary in some situations but not all. An +example could be debugging code or platform-specific +code (e.g. Windows-specific behaviour). The manual solutions to this are +to comment in/out code on demand or introduce global boolean variables +to enable/disable functionality. Global variables have the drawback that +the code itself is still compiled into the binary, i.e. you are “paying +in binary size” for code that is never used. Comments overcome this +issue, but adding/removing comments is tedious (and does not integrate +well with version control). Furthermore, as both things are done +manually, they impose a risk for dependability (as both comments and +global variables can be overlooked). + +The most elegant and dependable solution is to use conditional +compilation. This means that, at the time of compilation, various +conditions get evaluated and depending on the result, parts of the code +are still used or not. Let’s take the ntohs function as an example, +which converts a network u16 to a host u16, respecting endianness. What +does this mean? While people have agreed that multi-byte numbers on the +network are sent as big-endian (most significant byte first), most +desktop systems are little-endian (most significant byte last). Hence, +our function should take the CPU endianness into account, which is +available as the target_endian variable. + + #[cfg(target_endian = "big")] + fn ntohs(input: u16) -> u16 { + input + } + + #[cfg(target_endian = "little")] + fn ntohs(input: u16) -> u16 { + input.swap_bytes() + } + + fn main() { + println!("{:X}", ntohs(0xA010)); + } + +Here, on a big-endian system, ntohs is an identity function (which might +be optimized away by a clever compiler). On the little-endian system, +however, the bytes must be swapped. + + In fact, the attributes here are built-in attributes and not + (attribute-)macros, so the compiler itself knows how to interpret + them. However, other more complex forms of conditional compilation can + be realized using macros. + + Inlining + +If you are writing performant software in a modular way, you often end +up with functions that are called frequently (often refered to as hot +functions). Entering and exiting a function does not come for +free (variables must be copied, stacks prepared, etc.). A solution to +this is to remove the function and inline its functionality, where it is +needed. This has multiple drawbacks: a) readability is lost, as the +function with a name is replaced with a (complex) expression, b) +maintainability is lost, as changing the function means changing every +occurence. For these reasons, inlining should not be done manually, but +rather using compiler-support. In C/C++, people often use macros for +this or the inline keyword. The former will always do the replacement, +while the latter leaves it to the compiler’s implementation. + +Similar to C/C++, we can use macros to inline functionality in Rust. +However, the more common approach is to use attributes to specify +whether a function is inline or not. There are four cases: + +- No attribute. If we do not specify anything, the compiler might + decide to inline it (depending on optimization level, function size + etc.). These functions are never inlined across crates. +- #[inline] suggests the function to be inlined, also across crates. +- #[inline(always)] this strongly suggests the function to be inlined, + but the compiler might still decide not to (in exceptional cases). +- #[inline(never)] strongly suggests the function should not be + inlined. + + Note that, again, these are not attribute macros but built-in + attributes. However, we mention them here because a) other languages + use macros for inlining and b) attribute syntax is used, which makes + them look similar. + + On Programming Syntax + +As mentioned before, macros take an input and produce and output (Rust +code). In general, the grammar of a programming language defines how a +string (x = 5) is turned into a sequence of tokens +([Variable(x), Operator(=), Literal(5)]). These tokens are the building +blocks for the syntax of a language. + +In compiler terms, we go from a raw string via the process of lexing to +a token stream. The following line of code + + let value = 40 + 2; + //1 2 3 4 5 67 <- index + +is transformed into this stream of tokens: + + TokenStream [ + 1 Ident { sym: let }, + 2 Ident { sym: value }, + 3 Punct { char: '=', spacing: Alone }, + 4 Literal { lit: 40 }, + 5 Punct { char: '+', spacing: Alone }, + 6 Literal { lit: 2 }, + 7 Punct { char: ';', spacing: Alone } + ] + +A tokenstream can then be transformed into a Rust syntax fragment, +e.g. a statement in the case above or an expression (based on string +5 * 5). + + Macros in Rust + +Having talked about the pros and cons of macros, let’s see how to use +them in Rust. First, we must distinguish between two types: + +- Declarative Macros: + - declared using the macro_rules!() macro + - leverage special mini-language to declare macros (match & + replace) + - limited in functionality +- Procedural Macros: + - declared in a dedicated proc-macro crate + - take raw TokenStreams as both input and output + - offer maximum functionality + +Declarative Macros + +The first, and easier, class of macros are the declarative ones. They +can be defined using the macro_rules!() macro in any crate. They act in +a copy and paste manner, i.e. they have transformation rules that are +simply applied. The input to a declarative macro is a syntax +fragment (e.g. a expression, identifier, …) which is used to generate +code according to a template. Finally, a macro must be defined before +the invocation, limiting the places where it can be introduced. + +The general structure of a declarative macro is as follows: + + macro_rules! macro_name { + (matcher1) => { transcriber1 } + // ... + (matcherN) => { transcriberN } + } + +The macro_name can be picked mostly freely and will be used to invoke +the macro. Afterwards, there is a set of matcher-transcriber pairs, +which can be thought of as patterns in pattern matching. + +Matchers try to match the given syntax fragment to its own regex. The +syntax fragments are also captured in metavariables, allowing access to +them. The following illustrates that + + ($var:ident, $val:expr) + +would match + + some_variable_name, 42 + 17 * 3 + +Matching sequences is also possible with $(). can be + +- *: any number of repetitions +- +: any number, but at least one +- ?: optional fragment, zero or one occurence + +An example would be this: + + $($key:expr => $value:expr),+ + +would match + +1 => 2 + 3, 4 => 5 * 6 + +These concepts are in play in the vec! macro (with invocations in +comments): + + macro_rules! vec { + () => { ... }; // vec![] + ($elem : expr ; $n : expr) => { ... }; // vec![1; 100] + ($($x: expr),+ $(,)?) => { ... }; // vec![1,2,3] or vec![1, 2, 3] + } + +Note that invocation of macros can be done with (), [], or {}. All of +them are equivalent. However, there are common conventions (e.g. [] for +collections, {} for larger blocks, and () for single-lines). + +A transcriber then declares how the captured metavariables are +transformed into code. This can make use of metavariables as mentioned +before. Here is an example of a macro that creates a vector of numbers +in [min, max) (exclusive end): + + macro_rules! ranged_vec { + ($min:expr, $max:expr) => { + ($min..$max).collect::>() + }; + } + +Procedural Macros + +This form of macro comes in three distinct types: + +- Function-Like Macros: custom!(...) +- Derive Macros: #[derive(CustomDerive)] +- Attribute Macros: #[CustomAttribute] + +In contrast to declarative macros, procedural macros must be defined in +a proc-macro library. They are also compiled differently and tend to +lead to an increase in compile-time compared to non-macro code. The +Cargo.toml must look like this: + + [package] + name = "dsys-macros" # arbitrary name + version = "0.1.0" + edition = "2021" + + [lib] + proc-macro = true + +Each proc macro is then a function in this library: + + #[proc_macro] + pub fn dsys(input: TokenStream) -> TokenStream { + let output = TokenStream::new(); + // ... do the actual work ... + output + } + +Function-like and attribute macros replace their input completely +(though parts of the input can be maintained within the transformation +function). Derive macros instead do not replace, but rather extend what +they are applied to, like this: + + #[derive(CustomDerive)] // <--- this + struct CustomStruct { + // ... + } + + // generates for example this: + impl CustomStruct { + // ... + } + +In contrast to declarative macros, the input token streams are taken +as-is (no matching applied) and the output token stream must also be +composed manually (as opposed to the transcriber syntax). In practice, +developers use the syn crate for parsing inputs and the quote crate for +producing outputs. syn can parse arbitrary Rust code into an Abstract +Syntax Tree (AST). Afterwards, one would analyze the AST and produce +tokens accordingly. For this use-case, quote!(...) helps as Rust code +passed in as ... is a TokenStream and can be treated as data. There is +also parse_quote!(...) which returns a parsed syn element instead of a +TokenStream. + + Macros in Action + + Function-Like Macro + +These macros are the most basic form as they can accept any input and +produce any output. The TokenStream resulting from the function is +injected inplace. This is often necessary for complex tasks, for +instance when computation over input must be done. Wherever possible, +declarative macros should be used instead of function-like procedural +macros (as they are simpler). Popular examples are for instance the +json! in serde. + +Here, we build our own macro timeit! that takes an arbitrary expression, +measures how long it computes, and prints this to the console: + + // lib.rs + use proc_macro::TokenStream; + use quote::quote; + + #[proc_macro] + pub fn timeit(input: TokenStream) -> TokenStream { + let input_code = input.to_string(); + let input: proc_macro2::TokenStream = input.into(); + quote!({ + let start = std::time::Instant::now(); + let result = #input; + println!("`{}` took {:?}", #input_code, start.elapsed()); + result + }) + .into() + } + +Later, we use it like this: + + // main.rs + + use macros::timeit; + + fn main() { + let f = timeit!(5 * 5); + println!("Result: {}", f); + } + + Derive Macro + +These macros can only be applied to struct or enum declarations and +cannot stand freely. Furthermore, they cannot alter the input stream, +but rather add functionality to the input declaration. The most common +use case is the automated implementation of traits and associated +functionality (as seen in a previous section of this unit). + +Assume that we want to build a Description trait that can be +automatically derived for types, including additional attributes. The +usage looks like this: + + // main.rs + + #[derive(Description)] + pub enum Mode { + #[description("System completely disabled.")] + Off = 0, + #[description("System in limited recovery mode.")] + Recovery = 5, + #[description("System fully operational.")] + On = 9, + } + + fn main() { + println!(Mode::Recovery.description()); + // Should print "[5] System in limited recovery mode." + } + +The implementation of the macro looks like this (click the “show hidden +button”, as this book misinterprets the quote syntax): + + // lib.rs + + use quote::quote; + + #[proc_macro_derive(Description, attributes(description))] + pub fn derive_description(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as DeriveInput); + if let syn::Data::Enum(data) = input.data { + let arms: Vec<_> = data + .variants + .into_iter() + .map(enum_variant_to_match_arm) + .collect(); + + let ty = input.ident; + quote! { + impl Description for #ty { + fn description(&self) -> &str { + match self { + #(#arms),* + } + } + } + } + .into() + } else { + panic!("Description can only be derived on enums."); + } + } + + fn enum_variant_to_match_arm(variant: syn::Variant) -> proc_macro2::TokenStream { + let attribute_ident: proc_macro2::Ident = quote::format_ident!("description"); + + let description = variant + .attrs + .iter() + .find(|attr| { + attr.path + .get_ident() + .map(|ident| ident == &attribute_ident) + .is_some() + }) + .expect( + "When deriving Description, each variant must have one #[description(...)] attribute.", + ); + + let tokens = description.tokens.clone().into_iter().collect::>(); + if tokens.len() == 1 { + if let proc_macro2::TokenTree::Group(g) = &tokens[0] { + let description: syn::LitStr = syn::parse2(g.stream()) + .expect("#[description(...)] argument should be a literal string."); + + let discriminant = if let Some((_, discriminant)) = variant.discriminant { + discriminant.to_token_stream().to_string() + } else { + "?".to_string() + }; + let result = format!("[{}] {}", discriminant, description.value()); + let variant_ident = variant.ident; + quote! { + Self::#variant_ident => #result + } + } else { + panic!("#[description(...)] argument must be wrapped in ().") + } + } else { + panic!("#[description(...)] should have exactly one argument."); + } + } + +The macro first checks if it is applied to an enum. If so, each enum +variant is transformed into a match arm to be later added to the +impl Description block that implements the trait. Within +enum_variant_to_match_arm, we validate that the variant has an attribute +and the attribute has the following form: +#[description("A literal string")]. Eventually, the variant identifier +and the literal string are used to compose the description text. + + Attribute Macros + +Finally, attribute macros also work on items (e.g. struct, enum, or +function) but replace instead of extend. This can be seen from their +signature in the following example: + + #[proc_macro_attribute] + pub fn amend(attr: TokenStream, item: TokenStream) -> TokenStream { + println!("attr: \"{}\"", attr.to_string()); + println!("item: \"{}\"", item.to_string()); + item + } + +The attribute as well as the item itself are passed to the +transformation function. Inside the attribute, we can use expressions of +arbitrary complexity. This can be seen here: + + #[amend(baz => bar)] + fn foo() {} + // out: attr: "baz => bar" + // out: item: "fn foo() {}" + +The use cases for this are various: + +- Framework annotations, e.g. declare a function as a backend route in + rocket.rs. +- Transparent middleware, e.g. injecting tracing functionality. +- Type transformation, e.g. alter the input struct. +- Test generation, e.g. generate same test for different cases / + configurations. + +A helpful crate in this case is darling, which let’s us declare a struct +into which the arguments of the attribute are parsed automatically. The +following is similar to the timeit function-like macros, but this time +as an attribute that can be added to functions (as opposed to +expressions for timeit): + + use macros::timed; + + #[timed(fmt = "{} elapsed")] + fn the_answer() -> usize { + 42 + } + + fn main() { + let a = the_answer(); // will print "100ns elapsed" or similar + println!("The answer is {}", a); + } + +The macro is implemeted as follows: + + use darling::FromMeta; + + #[derive(Debug, FromMeta)] + struct MacroArgs { + fmt: String, + } + + #[proc_macro_attribute] + pub fn timed(args: TokenStream, input: TokenStream) -> TokenStream { + let attr_args = parse_macro_input!(args as syn::AttributeArgs); + let input = parse_macro_input!(input as syn::ItemFn); + + let args = match MacroArgs::from_list(&attr_args) { + Ok(v) => v, + Err(e) => { + return TokenStream::from(e.write_errors()); + } + }; + + let fmt = args.fmt.replace("{}", "{0:#?}"); + + let block = input.block; + let block = parse_quote! { + { + let start = std::time::Instant::now(); + let result = #block; + println!(#fmt, start.elapsed()); + result + } + }; + + syn::ItemFn { block, ..input }.to_token_stream().into() + } + +First, a darling::FromMeta struct is defined, which is then parsed and +used to make the resulting code argument-dependent. In particular, the +format string of println! is based on the argument. In this use case, +you also see how we can use the struct copy operation +({ changed, ..original }) to modify syn structures. Concretely, we parse +an ItemFn, modify its block (by wrapping it), and return a tokenized +version again. + + Hygiene + +In the context of macros, you often read about hygiene (no worries, no +showers involved): Before we define hygiene, let’s have a look at an +unhygienic C example: + + #include + + #define TIMES_TWO(X) X + X + + int main() { + int x = TIMES_TWO(3) * 2; + printf("%d", x); + return 0; + } + +Given the name of the macro, the developer probably intended this to be +self-contained, i.e. the input number is doubled. However, the example +use produces 9 instead of 12, as the macro is a 1:1 replacement and +operator precedence rules are applied afterwards. A common fix is to put +brackets around these kinds of macros to overcome this (round brackets +for values; curly brackets for scopes in some C variants). + +Another example is a macro that uses identifiers: + + #include + #define MODIFY_X(VALUE) x = VALUE; + + int main() { + int x = 5; + MODIFY_X(42) + printf("%d", x); + return 0; + } + +Here, by accident or not, x is used in both the macro itself and the +destination scope. Again, you can see that the 1:1 replacements could +lead to unforseen and hard to debug effects on their environment. + +In consequence, we call a macro hygienic, if it is neither affected by +its surroundings, nor it affects the surrounding. Without further +limitation, this sounds like macros can either (a) be hygienic and +useless (no effect) or (b) have an effect and be dirty. Infact, we have +to clarify surroundings more: Obviously, macros add functionality +(e.g. by introducing new items such as function, structures, statements, +etc.). This functionality sometimes includes items with identifiers +(functions, variables, structs, etc.). If a macro uses an identifier +that is already present in the scope in which it is executed, it is not +clear how disambiguities are resolved. Here, hygiene comes into play: + +- For module-level items (e.g. structs, functions), the compiler + simply complains about the reused identifier (forcing the developer + to act). +- For function-level local variables, each macro invocation creates + its own scope/context. +- For expressions (as in the C example above), the macro returns an + expression that stands for itself and is not syntactically merged + with the destination code. + +In the following we have two pieces of code: + + macro_rules! keep_unchanged { + ($x:expr) => { + value = $x; + } + } + + let mut value = 1; + keep_unchanged!(2); + assert_eq!(value, 1); + +The compiler complains that value is not found in the scope (showing +that the macro expansion has its own scope). In the second code example, +we pass an identifier of the environment to the macro, allowing the +macro to modify it: + + macro_rules! modify { + ($var:ident, $val:expr) => { + $var = $val; + }; + } + + let mut value = 0; + modify!(value, 42); + assert_eq!(value, 42); + +Finally, what is about identifiers used in the macro such as Instant? +When refering to items (types, functions, …) the lookup happens as would +any other lookup at the call site of the macro. This means that if +Instant was not brought into scope by use, the compilation fails. +Furthermore, it can happen that others than the intended item get used +due to having the same name (shadowing). As a consequence, the +recommendation is to use fully qualified module paths to items in a +macro (e.g. std::time::Instant). + + Reasons Against Macros + +Now that we have covered use cases and implementations of macros, you +are probably excited to use them (all over the place). But before you do +so, let’s think a second about what drawbacks they have: + +First of all, macros increase the complexity of your code, as + +- procedural macros introduce an additional crate, +- declarative macros use the special language for matchers and + transcribers, and +- non-trivial macros should be written using syn and quote, which you + must learn first. + +The usages of macros tend to look simple, but can be responsible for +quite some complex code. If used wisely, this is good (as on the +usage-side, complexity is reduced). If not, a macro-solution can be less +maintainable and reliable as writing the code manually. + +Second, macros can be hard to maintain, as developers must understand +the transformation logic for all the use cases. While this would be the +same for any function, macros tend not to have such a clear and obvious +API. + +This also causes bugs in macros to be harder to find and fix than normal +code (tough cargo-expand can help here). + +Macros also make it easy to implement unidiomatic behaviour, i.e. you +can use them to write code that no longer feels like Rust — making it +potentially hard to understand for others. + +Especially new programmers also tend to overuse macros, as it seems like +it is a powerful tool. Indeed, it is but should only be used with care +and where appropriate. + +As macros are so powerful, there are also bad ways to use them. One +anecdote can be found on StackOverflow. Here, we see that the C/C++ +macro system uses the so-called preprocessor, i.e. before compilation +the macros are one-to-one text replacements, agnostic of language +syntax. In fact, the macro is used to fix the broken syntax. In Rust, +this is not possible, as we always work on token trees or token streams +and not pure text. However, you get the idea that not every use of +macros is really sensible. + + U10: Metaprogramming + +We introduce generics and traits as a means to easily and correctly +reuse code (or write code that adapts to the use case). We make use of +them to build both run-time and compile-time state machines. Finally, we +also cover macros, another way to write code that writes code +(i.e. metaprogramming). + + S10: Sample Solution + +IntoIterator + + struct ListIterator(List); + + impl Iterator for ListIterator { + type Item = T; + + fn next(&mut self) -> Option { + if let Some(x) = self.0.head() { + self.0 = self.0.clone().tail(); + Some(x) + } else { + None + } + } + } + + impl IntoIterator for List { + type Item = T; + + type IntoIter = ListIterator; + + fn into_iter(self) -> Self::IntoIter { + ListIterator(self) + } + } + + #[test] + fn test_into_iter() { + let mut list = List::Empty; + list.add(5u8); + list.add(7u8); + for i in list { + println!("{}", i); + } + } + +FromIterator + + impl FromIterator for List { + fn from_iter>(iter: A) -> Self { + let mut list = List::Empty; + for i in iter { + list.add(i); + } + list + } + } + + + #[test] + fn test_from_iter() { + let numbers = std::iter::repeat(5).take(5); + let list = List::from_iter(numbers); + assert_eq!(list.length(), 5); + } + +Run-Time State Machines + + #[derive(Clone)] + pub struct DFA + where + S: Clone + PartialEq, + { + start: S, + accept: Vec, + transition: fn(S, I) -> S, + } + + impl DFA + where + S: Clone + PartialEq, + { + pub fn new(start: S, accept: Vec, transition: fn(S, I) -> S) -> Self { + Self { + start, + accept, + transition, + } + } + + pub fn run(&self, mut input: Vec) -> bool { + let mut state = self.start.clone(); + input.reverse(); + while let Some(symbol) = input.pop() { + state = (self.transition)(state, symbol); + } + self.accept.contains(&state) + } + } + + #[derive(Clone, Copy, PartialEq)] + enum State { + Ready, + AwaitMoney { cents: u32 }, + Error, + } + + enum Input { + SelectBeverage, + Insert1Euro, + Insert50Cent, + Insert20Cent, + Insert10Cent, + } + + fn transition(state: State, symbol: Input) -> State { + let new_state = match (state, symbol) { + (State::Ready, Input::SelectBeverage) => { + println!("You selected Ferriskola! An excellent choice :-)"); + State::AwaitMoney { cents: 0 } + } + (state, Input::SelectBeverage) => { + println!("Cannot select a beverage in this state."); + state + } + (State::AwaitMoney { cents }, Input::Insert1Euro) => { + State::AwaitMoney { cents: cents + 100 } + } + (State::AwaitMoney { cents }, Input::Insert50Cent) => { + State::AwaitMoney { cents: cents + 50 } + } + (State::AwaitMoney { cents }, Input::Insert20Cent) => { + State::AwaitMoney { cents: cents + 20 } + } + (State::AwaitMoney { cents }, Input::Insert10Cent) => { + State::AwaitMoney { cents: cents + 10 } + } + (State::Ready, _) => { + println!("Pick a beverage first before putting in money."); + State::Ready + } + (State::Error, _) => { + println!("The system is in error state. Please ask the operators to fix it."); + State::Error + } + }; + + if let State::AwaitMoney { cents } = new_state { + if cents >= 280 { + println!("Enjoy your Ferriskola. Here are {}c back", cents - 280); + State::Ready + } else { + println!("{}c more to go", 280 - cents); + new_state + } + } else { + new_state + } + } + + fn main() { + let dfa = DFA::new(State::Ready, vec![State::Ready], transition); + assert!(dfa.run(vec![ + Input::SelectBeverage, + Input::Insert1Euro, + Input::Insert1Euro, + Input::Insert10Cent, + Input::Insert1Euro + ])); + } + +Compile-Time State Machine + + use std::marker::PhantomData; + + struct MiniPlumber; + struct NormalPlumber; + struct FirePlumber; + + struct Plumber { + data: PhantomData, + } + + struct Shroom; + struct FireFlower; + + impl Plumber { + fn hit(self) { + println!("Game Over"); + panic!(); + } + + fn consume_shroom(self, _item: Shroom) -> Plumber { + println!("Yippie!"); + Plumber:: { + data: Default::default(), + } + } + + fn consume_fireflower(self, _item: FireFlower) -> Plumber { + println!("Whapp whapp whapp!"); + Plumber:: { + data: Default::default(), + } + } + } + + impl Plumber { + fn new() -> Plumber { + println!("Flitze-Go!"); + Plumber:: { + data: Default::default(), + } + } + + fn hit(self) -> Plumber { + println!("Aua!"); + Plumber:: { + data: Default::default(), + } + } + + fn consume_fireflower(self, _item: FireFlower) -> Plumber { + println!("Whapp whapp whapp!"); + Plumber:: { + data: Default::default(), + } + } + } + + impl Plumber { + fn hit(self) -> Plumber { + println!("Aua!"); + Plumber:: { + data: Default::default(), + } + } + } + + fn main() { + let plumber = Plumber::new(); + let plumber = plumber.hit(); + let plumber = plumber.consume_fireflower(FireFlower); + let plumber = plumber.hit(); + let plumber = plumber.consume_fireflower(FireFlower); + let plumber = plumber.hit(); + let plumber = plumber.hit(); + let plumber = plumber.consume_shroom(Shroom); + let plumber = plumber.hit(); + let plumber = plumber.hit(); + } + +Macros + + # Cargo.toml + + [package] + name = "macros" + version = "0.1.0" + edition = "2021" + + [lib] + proc-macro = true + + [dependencies] + proc-macro2 = "1.0.32" + + [dependencies.syn] + version = "1.0.102" + features = [ + "full", + ] + + // lib.rs + use syn::parse_quote; + + #[proc_macro_attribute] + pub fn repeat(_: TokenStream, input: TokenStream) -> TokenStream { + let input: syn::ItemFn = syn::parse2(input.into()).unwrap(); + + let ty = if let syn::ReturnType::Type(_, ty) = input.sig.output { + ty + } else { + parse_quote!(()) + }; + + let output: syn::ReturnType = parse_quote! { -> impl Iterator }; + + let sig = syn::Signature { + output, + ..input.sig + }; + + let block = input.block; + let block = parse_quote! { + { + let result = #block; + std::iter::repeat(result) + } + }; + + syn::ItemFn { + sig, + block, + ..input + } + .to_token_stream() + .into() + } + + State Machines + +With our knowledge on generics, we start looking into state machines — a +common tool to both model and implement dependable systems. + +First, we start with run-time state machines, allowing you to model or +execute them in your code. Later, we look at compile-time state +machines, allowing you to enforce that the code you write complies with +the state machine (e.g. a certain operation must be executed first, +before another is available). + + Run-time + +Here is a general definition for a deterministic finite automaton (DFA): + + #[derive(Clone)] + pub struct DFA + where + S: Clone + PartialEq, + { + start: S, + accept: Vec, + transition: fn(S, I) -> S, + } + + impl DFA + where + S: Clone + PartialEq, + { + pub fn new(start: S, accept: Vec, transition: fn(S, I) -> S) -> Self { + Self { + start, + accept, + transition, + } + } + + pub fn run(&self, mut input: Vec) -> bool { + let mut state = self.start.clone(); + input.reverse(); + while let Some(symbol) = input.pop() { + state = (self.transition)(state, symbol); + } + self.accept.contains(&state) + } + } + +Note that: + +- The automaton is generic in S (the states) and I (the inputs). +- The definition enforces that our state type S is used for the single + start state, the accept states as well as an input and output of the + transition. +- The run method executes our DFA with an input vector, returning if + we end in an accept state when the input is consumed. + +Here is the example usage for a DFA that checks if there is an even +count of zeros: + + #[derive(Clone, Copy, PartialEq)] + enum State { + Even, + Odd, + Error, + } + + fn main() { + let dfa = DFA::new(State::Even, vec![State::Even], |state, symbol| { + match (state, symbol) { + (State::Even, 0) => State::Odd, + (State::Odd, 0) => State::Even, + (state, 1) => state, + _ => State::Error, + } + }); + assert!(dfa.run(vec![])); + assert!(!dfa.run(vec![0, 1])); + assert!(!dfa.run(vec![0, 1, 1])); + assert!(dfa.run(vec![0, 1, 1, 0])); + assert!(dfa.run(vec![0, 0])); + } + +Note that: The input space is i32, even though we only allow 0 and 1. +Hence passing vec![-5] is valid code and leads to the DFA entering the +error state. A workaround would be to define a separate input enum with +two variants (Zero, One). + +An issue with this is that invalid transitions are detected at runtime +only. Handling this means that we typically go to the error state. +However, such a transition could be due to an implementation bug, +i.e. the error state should never have been entered but rather this +transition should not be valid. With this, we come to the topic of +compile-time state machines. + + Compile-time State Machines + +For the coming section, we use the following state machine for a certain +device: + + +----------+ +--------+ +--------+ + | +----+> +----+> | + | Inactive | | Active | | Paused | + | <+----+ <+----+ | + +---+----^-+ +--------+ +----+-+-+ + | | | | + | +-------------------------+ | + | | + +---+------+ | + | V | | + | Exit <+-------------------------+ + | | + +----------+ + +We encode it as follows: + + #[derive(Debug)] + struct StateMachine { + shared_data_value: usize, + state: S, + } + +We define the states as follows, including state-dependent data if there +is any: + + #[derive(Debug)] + struct Inactive; + + #[derive(Debug)] + struct Active { + value: usize, + } + + #[derive(Debug)] + struct Paused { + frozen_value: usize, + } + + #[derive(Debug)] + struct Exit; + +We can define methods on state machines in any state S like this: + + impl StateMachine { + fn state(&mut self) -> &mut S { + &mut self.state + } + } + +We can also define methods only for machines in certain states. For +instance, only Inactive machines can be created with new and Active +state machines can have an increment() method: + + impl StateMachine { + fn new(val: usize) -> Self { + Self { + shared_data_value: val, + state: Inactive, + } + } + } + + impl StateMachine { + fn increment(&mut self) { + self.state.value += 1; + } + } + +Further, we can define valid transitions and their logic using the From +traits: + + impl From> for StateMachine { + fn from(val: StateMachine) -> StateMachine { + println!("Start"); + StateMachine { + shared_data_value: val.shared_data_value, + state: Active { value: 0 }, + } + } + } + + impl From> for StateMachine { + fn from(_: StateMachine) -> StateMachine { + println!("Disable"); + StateMachine { + shared_data_value: 0, + state: Exit, + } + } + } + + impl From> for StateMachine { + fn from(mut val: StateMachine) -> StateMachine { + println!("Pause"); + StateMachine { + shared_data_value: val.shared_data_value, + state: Paused { + frozen_value: val.state().value, + }, + } + } + } + + impl From> for StateMachine { + fn from(mut val: StateMachine) -> StateMachine { + println!("End with {}", val.state().value); + StateMachine { + shared_data_value: val.shared_data_value, + state: Inactive, + } + } + } + + impl From> for StateMachine { + fn from(mut val: StateMachine) -> StateMachine { + println!("Resume"); + StateMachine { + shared_data_value: val.shared_data_value, + state: Active { + value: val.state().frozen_value, + }, + } + } + } + + impl From> for StateMachine { + fn from(mut val: StateMachine) -> StateMachine { + println!("Stop with {}", val.state().frozen_value); + StateMachine { + shared_data_value: val.shared_data_value, + state: Inactive, + } + } + } + +or using custom functions: + + impl StateMachine { + fn pause(mut self) -> StateMachine { + println!("Exit with {}", self.state().frozen_value); + StateMachine { + shared_data_value: self.state().frozen_value, + state: Exit, + } + } + } + +In the following code, you see this in action. Note the commented out +lines that cause a compile-time error if commented in: + + fn main() { + let sm = StateMachine::new(5); + println!("{:?}", &sm); + // let sm: StateMachine = StateMachine::new(5); <-- does not work + let mut sm: StateMachine = sm.into(); + println!("{:?}", &sm); + for _ in 0..5 { + sm.increment(); + } + sm.shared_data_value = 7; + println!("Modified"); + println!("{:?}", &sm); + let sm: StateMachine = sm.into(); + println!("{:?}", &sm); + // sm.increment(); <-- does not work + let mut sm: StateMachine = sm.into(); + sm.increment(); + println!("{:?}", &sm); + let sm: StateMachine = sm.into(); + println!("{:?}", &sm); + // let sm: StateMachine = sm.into(); <-- does not work + let sm: StateMachine = sm.pause(); + println!("{:?}", &sm); + } + +This approach is also known as typestate pattern, about which you can +read more in the RustEmbedded Book. + + Summary + + What did you learn? + +- How to write generic code in Rust and make use of traits. +- How to implement both run-time and compile-time state machines. +- How (and when) to use macros. + + Where can you learn more? + +- Generics: + - Rust Book: Ch. 10 + - Programming Rust: Ch. 11, 13 + - Rust for Rustaceans: Ch. 03 + - cheats.rs: Generics & Constraints +- State Machines + - Typestate Programming in the Embedded Rust Book. + - Hoverbear’s State Machine Pattern + - Novatec GmbH’s Case for the Typestate Pattern + - Yoshua Wuyts on Future of Type States in State Machines III: + Type States +- Macros: + - Rust Book: Ch. 19.5 + - Rust for Rustaceans: Ch. 07 + - Rust Reference: Macros + - Rust by Example: Macros + - The Little Book of Rust Macros + - Fathomable Rust Macros + - David Tolnay’s Procedural Macros Workshop + - Nine Rules for Creating Procedural Macros + + W10: Work Sheet + +Generics & Traits + +- Do the Rustlings exercises generics and traits. + +- Revisit the List from U04. Add support for the FromIterator and + IntoIterator traits. + +Run-Time State Machines + +Develop a run-time state machine that implements a beverage dispenser. +Reuse the DFA definitions provided in the unit. The specification is as +follows: + +- The automaton starts in the Ready state, waiting for an order. +- Upon the input SelectBeverage, it enters the AwaitMoney state. +- In this state, Insert1EUR, Insert50Cent, Insert20Cent, and + Insert10Cent inputs can happen. +- As soon as the price for the beverage (2,80EUR) has been reached, + the automaton + - prints to stdout: “Beverage dispensed”; optionally including + “Returning X.XX EUR” if too much money has been inserted. + - re-enters the Ready state. + +Compile-Time State Machines + +Develop a compile-time state machine for an Italian plumber: + + fire flower + +-----------------------------------------------------+ + | V + +--------------+ shroom +----------------+ fire flower +--------------+ + | Mini Plumber | ----------->| Normal Plumber |-------------->| Fire Plumber | + +--------------+ +----------------+ +--------------+ + ^ hit | ^ ^ hit | + +-------------------+ | +--------------------------+ + | + +The following code snippets should be present in your solution (with ??? +replaced appropriately) and you shall not use the From-style transitions +but custom ones: + + struct Plumber { + ??? + } + + struct Shroom; + struct FireFlower; + + fn hit(self) -> ???; + + fn consume_shroom(self, item: Shroom) -> ???; + +Macro Warm-Up + +Work through the macrokata. + +Custom Macro + +Develop an attribute macro #[repeat], which you can apply on any +function: + + #[repeat] + pub fn foo(bar: usize) -> usize { + // ... + } + +The macro changes the return type to impl Iterator and +wraps the return value in std::iter::repeat(). Apart from that, the +function must stay unchanged, i.e. visibility, parameters, etc. stay the +same. + +The following program should compile afterwards (macros is your +proc-macro crate): + + pub mod math { + use macros::repeat; + + #[repeat] + pub fn the_answer() -> usize { + 42 + } + } + + fn main() { + let answers = math::the_answer().take(5).collect::>(); + println!("5 Answers: {:#?}", answers); + } + +and output + + 5 Answers: [ + 42, + 42, + 42, + 42, + 42, + ] + +Make use of quote and syn (the latter with feature full enabled). + + Async Programming + +Now that we have seen how to work in parallel on data and use mechanisms +to synchronize threads (locks or channels), we investigate another +approach to write concurrent programs: Cooperative Multitasking. While +multithreading and working on parallel data are to maximize the usage of +your computer’s resources (shortening computation time by increasing +throughput), cooperative multitasking is often about minimizing the +usage of resources (shortening computation time by cleverly using wait +times). Note that one is multi-threading and the other multi-tasking +(also see the terms introduced before). Frequently, you encounter the +terms compute-intensive (e.g. predict weather) and I/O-intensive tasks +(e.g. serve 10k chat users) in this context. If compute is your +bottleneck, multithreading is the first thing to try out; if I/O is it, +it is multitasking instead. + +Assume you want to download all sections of this coursebook. Ignoring +that you can print them to a single page via the Print button, we assume +you do a HTTP request to each of the pages. Even if you don’t know much +about computer networking, you probably believe that for each of these +requests, we tell the operating system to: * Open a TCP socket. * +Trigger the TCP socket to connect to the hod.cs.uni-saarland.de +server. * Issue a HTTP request to GET /units/U11.md (and other pages). * +Read the response and return it to the caller. + +This involves both system calls as well as packet transmits/receives, +which take non-negligible time. Though, your software cannot progress +with the request at hand while the system call is executed or a packet +is in flight. We could use our knowledge from before to use multiple +threads to multiplex this, where each thread blocks and waits for +completion. However, each thread comes with a non-negligible overhead in +terms of memory usage (e.g. pthread on Linux uses 512KB). If we have +lots of requests we want to multiplex, this can quickly add up. + +A more lightweight solution is to use tasks (sometimes referred to as +green threads). A task can optionally have task-local storage, but +usually comes with only small amounts of memory usage. In contrast to a +thread, a task is an independent unit of work, which can be processed by +a single thread or be distributed over a pool of threads — allowing to +execute parts of a task by different threads in succession (but not at +the same time). Hence, this is a concurrency approach but not a +parallelism approach if we look at a single task (looking at multiple +tasks, we can indeed have parallelism if a pool of threads is used). + +So far, the operating system has taken care of scheduling different +threads, including stopping a thread to give CPU resources to another +thread so both can make progress. With cooperative multitasking, each of +the tasks must cooperate, i.e. yield execution if it has nothing to do +or made enough progress that it can spare a pause. + +TODO: Nice Diagram showing the differences + + Rust’s Async Machinery + +In Rust, we have the async keyword, the .await syntax as well as the +std::future::Future type to provide means for asynchronous programming. +In contrast to synchronous functions (which block the flow of execution +until completion), asynchronous functions can yield control flow and be +resumed later. + +First, let’s have a look at std::future::Future: + + trait Future { + type Output; + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll; + } + + enum Poll { + Ready(T), + Pending, + } + +We see that a Future has an output and can be polled for it. Upon poll, +it can either return Ready if the output is there or Pending if it needs +more time. This abstraction means that we have to regularly poll a +future to make progress. Also note that futures in Rust are lazy +(similar to iterators). If nobody polls them, they do not run. + +You are already familiar with std::fs::read_to_string, which has the +following signature: + + fn read_to_string>(path: P) -> Result + +An asynchronous equivalent would look like this: + + fn read_to_string>(path: P) -> impl Future> + +This is a common pattern you find in async code: The function parameters +stay the same, but the return value is wrapped in an impl Future. + +As this is used so frequently (and in many cases involves additional +lifetime considerations that we do not show here), we can use the async +keyword to conveniently turn a sync function into an async one, which +returns a Future: + + async fn read_to_string>(path: P) -> Result + +Finally, .await can be used to consume a future. Even though it looks +like accessing a field, it is special syntax that is translated by the +compiler into code that awaits the result and returns the final value. + + Executors + +Now that we have everything to create and chain futures together, we +need a way to actually execute them. While other programming languages +have a built-in global event loop into which tasks are put and where +they are executed, Rust leaves this to the application developer to +start an executor (or runtime) of their choice (also allowing to have +multiple executors at the same time). + +The most prominent executors are: + +- async-std - focus on compatibility with std +- tokio - focus on network applications +- embassy (EMBedded ASYnc) - focus on embedded applications + + + +In the following, we use async-std, as it is easier to use than tokio +and closely mimics the std, by using the same types wherever possible. +We add it like this to our project: + + [dependencies] + async-std = {version = "1.10.0", features = ["attributes", "unstable"] } + +The easiest way to execute an async function is to use the block_on +primitive. We leverage async_std::fs::read_to_string(), which is similar +in functionality to the std equivalent, except that it is async. + +Coming from the std equivalent, we try this: + + fn main() { + let s = async_std::fs::read_to_string("ferris.txt").unwrap(); + println!("{}", s); + } + +Following the suggestion of the compiler, we add .await like this: + + fn main() { + let s = async_std::fs::read_to_string("ferris.txt").await.unwrap(); + println!("{}", s); + } + +Again, the compiler complains, but this time about .await not being +allowed outside of async functions or blocks. So let us add a block: + + fn main() { + let s = async { + async_std::fs::read_to_string("ferris.txt").await.unwrap() + }; + println!("{}", s); + } + +We would hope for s to be a String, but it is not yet. The async block +returns a Future. We could repeat our .await, but +obviously we start a cycle. Instead, we leverage the block_on primitive, +which blocks on the future and consumes it: + + fn main() { + let s = async_std::task::block_on(async { + async_std::fs::read_to_string("ferris.txt").await.unwrap() + }); + println!("{}", s); + } + +Or simpler: + + fn main() { + let s = async_std::task::block_on( + async_std::fs::read_to_string("ferris.txt") + }).unwrap(); + println!("{}", s); + } + +Now we are back to a synchronous mode of operation, but we gained +something in terms of program organization. Note that block_on, much +like any other blocking operation, should never be used in an async +function. block_on is an efficient primitive, as it goes to sleep +(instead of busy-waiting). + +When we are dealing with larger async-only programs (i.e. with a single +runtime), we can simplify the above code to: + + #[async_std::main] + async fn main() { + let s = async_std::fs::read_to_string("ferris.txt").await.unwrap(); + println!("{}", s); + } + +Essentially, the main() function becomes the function on which block_on +is applied, causing the program to run until completion (if ever). + +With our program nicely organized like this, let’s try to actually +become concurrent and do multiple things (potentially) at the same time. +Therefore, we use the async_std::task::spawn_local method that adds a +future to the thread-local executor to be then polled eventually, when +the block_on is used. Before we start, we add the async-log crate, so +that we can see the interleaving of events on the command-line: + + [dependencies] + async-log = "2.0.0" + log = "0.4.14" + femme = "1.2.0" + +Now, we will use simple HTTP requests, which we execute via the +following function: + + use async_std::io::prelude::*; + use async_std::net; + + async fn request(host: &str, port: u16, path: &str) -> std::io::Result { + let mut socket = net::TcpStream::connect((host, port)).await?; + + let request = format!("GET {} HTTP/1.1\r\nHost: {}\r\n\r\n", path, host); + socket.write_all(request.as_bytes()).await?; + socket.shutdown(net::Shutdown::Write)?; + info!("{} Request to {} sent", host); + + let mut response = String::new(); + socket.read_to_string(&mut response).await?; + info!("Response from {} received", host); + + Ok(response) + } + +From the main function, we now do several HTTP requests concurrently and +we also setup logging: + + fn setup_logger() { + let logger = femme::pretty::Logger::new(); + + async_log::Logger::wrap(logger, || 12) + .start(log::LevelFilter::Info) + .unwrap(); + } + + #[async_std::main] + async fn main() { + setup_logger(); + + let hosts = vec!["google.com", "depend.cs.uni-saarland.de", "rustacean.net"]; + + let mut handles = vec![]; + for host in hosts { + handles.push(task::spawn_local(request(host, 80, "/"))); + } + info!("All tasks spawned!"); + + let mut results = vec![]; + for handle in handles { + results.push(handle.await); + } + dbg!(results); + } + +The async-std executor also supports a thread pool, which means that we +can use several threads in parallel to poll futures and attempt to make +progress. With spawn_local, we added the task to the same thread we are +working on now. There is also spawn, which adds it to the global +executor, allowing other threads to access this. Normally, you will to +spawn and let the executor figure out for you, which thread should poll +the future. Note that this implies that data is shared between threads, +which we see by comparing the signatures of the two functions: + + pub fn spawn_local(future: F) -> JoinHandle where + F: Future + 'static, + T: 'static {} + + pub fn spawn(future: F) -> JoinHandle where + F: Future + Send + 'static, + T: Send + 'static {} + +Notice that the Future we pass to spawn must be Send, allowing it to be +passed between threads. + + Async Iterators + +Note: the async_std::stream::Stream type is probably going to be +replaced by the AsyncIterator in std via RFC2996. + +async-std provides the Stream trait, which is very similar to Iterator +but supports async: + + trait Stream { + type Item; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; + } + +We do not go into the details of Pin, so consider Pin<&mut Self> to be +the same as &mut Self for now, knowing that this is a simplification. + +Analogously to U04: Iterators, a Stream is produced, adapted, and +consumed. The major difference is that execution can “pause” at more +locations than for the sync equivalent, i.e. at .await points. + +Here is how we can turn an iterator into a Stream (produce), map each +element to a request (adapt), and collect the results in a vector +(consume): + + use async_std::stream::StreamExt; + + let hosts = vec!["google.com", "depend.cs.uni-saarland.de", "rustacean.net"]; + let hosts = async_std::stream::from_iter(hosts); + let mut requests = hosts.map(|host| request(host, 80, "/")); + let mut results = vec![]; + while let Some(item) = requests.next().await { + results.push(item.await); + } + + dbg!(results); + +In P02, these Streams will come in handy. + + Parting Words + +Before we let you explore the world of async Rust on your own, you +should know that the state of the ecosystem is a bit challenging. This +situation is due to the “late” standardization of async and .await in +the std lib (Rust edition 2015 did not include this, only in November +2019 this got stabilized). At the time of stabilization, the ecosystem +had already evolved and in particularly split into multiple, +non-compatible solutions. Nowadays, you can use multiple executors (like +tokio and async-std) together, which was not the case before. The good +news for the future (and future) is that the Async Foundations Working +Group attempts to change the state of the ecosystem and develops a +shared vision of how async programming should work in Rust in the long +run. + + Fearless Concurrency + +Rust has, as mentioned before, several concepts in place that make +dealing with concurrent code indeed fearless. As you have seen in U03, +the Rust ownership model makes data races impossible — though, you can +still have race conditions as well as Heisenbugs. However, there are a +set of technical tools Rust and its ecosystem offer that allow you to +implement concurrent software in a dependable way. Therefore, we show +you how to approach this using two paradigms: + +- Message Passing Concurrency using Channels + +- Shared Memory Concurrency with Mutual Exclusion constructs and + Atomics + + This section is intentionally kept brief and you should read the + excellent 16th chapter of the Rust book if you have any doubts or want + a more in-depth introduction to concurrency in Rust. + + Message Passing + +In our first concurrency approach, threads communicate with each other +via messages that are sent through channels: + + +----------+ +----------+ + | Thread 1 | | Thread 2 | + | | +---------+ | | + | S |--->| Channel |--->| R | + +----------+ +---------+ +----------+ + +A unidirectional channel: + +- sits between two threads +- has a sending (S) and a receiving side (R) +- forwards messages of a certain type +- has a capacity of messages it can store (a message that was sent but + not yet received) + +This is a so-called single-producer single-consumer (SPSC) channel that +links two threads. The Rust standard library contains std::sync::mpsc, +which is a multi-producer single-consumer channel. Instead of this, we +are going to show how the third-party structure crossbeam::channel can +be used, as it is in all aspects superior to the std variant (except for +the fact that you need one more crate). Note that the crossbeam channel +is a multi-producer, multi-consumer channel, but for our use case, this +does not matter. + +Let’s create our example system, which has the following components: + +- A logger thread that waits for worker threads to produce data to be + logged and logs “nothing happened” if there was no message for a + certain time. +- Two worker threads that take different amounts of time to produce + data. + +Here is the code. The Cargo.toml: + + [package] + name = "messagepassing" + version = "0.1.0" + edition = "2018" + + [dependencies] + crossbeam-channel = "0.5.1" + rand = "0.8.4" + +and main.rs: + + use crossbeam_channel::{select, unbounded}; + use rand::prelude::*; + use std::{ + thread, + time::{Duration, Instant}, + }; + + fn worker_thread(sender: crossbeam_channel::Sender) { + let mut rng = thread_rng(); + loop { + let number = rng.gen_range(1..=8); + thread::sleep(Duration::from_secs(number.into())); + sender.send(number).unwrap(); + } + } + + fn main() { + let (s1, r1) = unbounded(); + let (s2, r2) = unbounded(); + + thread::spawn(move || worker_thread(s1)); + thread::spawn(move || worker_thread(s2)); + + let start = Instant::now(); + println!("Sec - Message"); + loop { + let msg = select! { + recv(r1) -> msg => format!("R1: {}", msg.unwrap()), + recv(r2) -> msg => format!("R2: {}", msg.unwrap()), + default(Duration::from_secs(3)) => format!("nothing happened"), + }; + println!("{:03} - {}", start.elapsed().as_secs(), msg); + } + } + +Here is the output of a sample run: + + Sec - Message + 003 - nothing happened + 005 - R2: 5 + 005 - R1: 5 + 008 - R1: 3 + 011 - nothing happened + 012 - R2: 7 + 013 - R1: 5 + 016 - nothing happened + 016 - R1: 3 + 019 - nothing happened + 020 - R2: 8 + 023 - nothing happened + 024 - R1: 8 + 025 - R2: 5 + 027 - R1: 3 + 030 - R1: 3 + 031 - R1: 1 + +Let’s go through this piece by piece: + +- at the beginning of main, we create two unbounded channels. The + function returns both a sending as well as a receiving end, which we + can pass around. +- when we spawn the treads, we move the sending ends into the thread. +- the worker_thread continuously produces numbers in the range from 1 + to 8 (inclusive), sleeps for these many seconds and sends the number + to the channel afterwards. +- the rest of the main function deals with simultaneously receiving + from both channels and having a timeout of 3 seconds. Whenever at + least one of the arms can be taken (a message on r1 and/or r2 and/or + the timeout) the select! call non-deterministically takes one of the + available arms. + +Hopefully, you can appreciate how clean this solution is. We do not need +to care about individual parts of memory, our data is safely shared +(sent!) between threads and can be easily accessed. + + Shared Memory + +Here is a similar solution for the program we developed using channels +before: + + use rand::prelude::*; + use std::{ + sync::{Arc, Condvar, Mutex}, + thread, + time::{Duration, Instant}, + }; + + fn worker_thread(reference: Arc<(Mutex>, Condvar)>, index: usize) { + let mut rng = thread_rng(); + loop { + let number = rng.gen_range(1..=8); + thread::sleep(Duration::from_secs(number.into())); + let mut buffer = reference.0.lock().unwrap(); + buffer.push((index, number)); + reference.1.notify_all(); + } + } + + fn main() { + let shared_buffer = Arc::new((Mutex::new(vec![]), Condvar::new())); + let sb1 = shared_buffer.clone(); + let sb2 = shared_buffer.clone(); + + thread::spawn(move || worker_thread(sb1, 1)); + thread::spawn(move || worker_thread(sb2, 2)); + + let start = Instant::now(); + println!("Sec - Message"); + let mut guard = shared_buffer.0.lock().unwrap(); + loop { + let mut new_guard = shared_buffer + .1 + .wait_timeout(guard, Duration::from_secs(3)) + .unwrap(); + let msg = if new_guard.0.len() > 0 { + let e = new_guard.0.pop().unwrap(); + format!("R{}: {}", e.0, e.1) + } else { + format!("nothing happened") + }; + guard = new_guard.0; + println!("{:03} - {}", start.elapsed().as_secs(), msg); + } + } + +The following changes have been made: + +- We use an Arc> instead of a channel. Arc stands for + atomic-reference counting, i.e. a thread-safe reference counting + type. The Arc allows us to move copies of it to the other threads + when we spawn them. Mutex stands for mutual exclusion and is used + to, at run-time, ensure only one thread can hold a lock at the same + time. Whoever holds the lock can access the inside using the guard + variable after .lock() returned. +- We introduce a Condvar (conditional variable) to be able to signal + between threads that data is available. worker_thread notifies all + other threads that wait_timeout on the condvar. This is the + replacement for the select call with timeout we had before. +- Note that after wait_timeout we hold a new guard that must be used + in the following iteration. + + Communicating Processes + +While we show you both approaches to concurrency, you should be aware +of: + + Don’t communicate by sharing memory; share memory by communicating. - + Rob Pike + +That means that generally, message passing should be prefered over +shared memory, as it leads to solutions that are easier to implement and +reason about. This StackOverflow answer explains very well why this is +the case. + + Marker Traits + +Finally, we briefly want to mention two important traits: + +- If something implements Send, it can be safely moved from one thread + to another. +- If something implements Sync, it can be safely used by more than a + single thread. + +Both of them are marker traits, i.e. they do not carry implementations +but instead signal to the Rust compiler how they can be used. This means +that you can also annotate your types with this. However, doing so +yourself is strongly discouraged. The reason is that Rust automatically +marks a structure, where all elements have either of these traits, with +this trait as well. If this is not the case, there is in many cases a +reason that an element is not Sync or Send and pretending that it is, +can cause serious problems. In consequence, you only use this marker +trait if you implement unsafe code (which we cover in U13), where you +are the only one who knows that the structure is Send and/or Sync. + + Concurrent Introduction + +While we are very used to the fact that, in the real world, things +happen at the same time (you read THIS word and a tree is planted +somewhere) or overlap in durations (you checking Facebook during a +lecture… though this never happens), talking about these concepts with +respect to computation is certainly not easy. Most likely, most of the +software you have written so far has been concerned with executing +things in a sequential manner (instruction after instruction). So let’s +enter the realm of concurrency: + + In computer science, concurrency is the property of program, + algorithm, or problem decomposition into order-independent or + partially-ordered units. - Wikipedia + +Now this sounds a bit complicated so let’s decompose it and play this +through with an example: Assume you are at the train station and you +just left your train to head for the connecting one. While you run (task +1), you check the boards for the platform your next train is leaving +(task 2). Let’s, for a second, ignore the fact that running without +knowing the destination might be worse than waiting and checking first. +These tasks are executed concurrently, but there is only one you so it +is not done in parallel (you might have to slow down or stand to check +the board). If you were on your phone with your partner and ask them to +check the platform, we would be in a concurrent and parallel scenario, +as now you can focus on running while someone else checks for the +destination simultaneously. + +Mapping this to our definition, we see that the “algorithm to get to the +connecting train” is decomposed into two units: figuring out the +destination platform and running for it. This is partially ordered, as +you can only reliably run for a destination if you know where it is. If +more than one executing party is involved, units can, but must not be, +executed in parallel. + +Benefits and Drawbacks of Concurrency + +Now that we know that things don’t get easier with concurrency, the +question is why we do it in the first place. There are two major +reasons: + +- First, if we have concurrency with parallelism, we have the chance + to increase our throughput (completed tasks per time) or decrease + latency (completion time of task). Assume you have a task that can + be parallelized, e.g., train attendants checking for tickets only in + their “section” of the train. The tickets checked per time increases + (throughput) and the average time between a traveller entering the + train and getting checked for their ticket decreases as well + (completion time). Note that this is not always the case: in + so-called pipelined systems, the throughput increases, even though + the end-to-end completion time (item arrives and is completely + processed) does not change. +- Second, if we are able to write programs with concurrency in mind, + we can deal better with systems that are either distributed + (messages between systems take non-negligible time and each system + can compute independently) or interfacing with the real-world (where + actions take time until a reaction follows). + +Now you might be convinced that, despite concurrency being hard to talk +about, it is often a desirable concept. But there are also drawbacks of +concurrent computing systems: + +- Concurrent code can exhibit race conditions, if the result of the + computation depends on the exact timing and/or the order of executed + code. A special form of this are data races when the result only + depends on the order (not the timing) in which concurrent threads + are executed. We discussed this already in U03. +- Another situation into which concurrent tasks can get is a deadlock. + When we try to synchronize tasks by using locks (which we cover in + the next section), i.e. when a system locks a resource before it + works on it, it can happen that two tasks wait on each other + indefinitely. +- Concurrent code often contains Heisenbugs (in contrast to + Bohrbugs)[12], i.e. undesired behaviour that is hard to be traced + down — where debugging is hard as the debugging process & + instrumentation itself tends to make the issue disappear as long as + it is attached. + +In essence, when we strive for high performance using parallelism or +want to develop concurrent, distributed systems, we have to find ways to +compensate for the drawbacks — a topic that we cover in the next +sections. + + Terms, Terms, Terms + +Before we get started, we have to introduce a couple of system +programming terms, i.e. concepts coming from the operating systems +community. We follow this excellent glossary (unfortunately, English +terms are mentioned but only German explanations provided), if possible. +What we need in the following, which we translated and simplified: + + Definition: A Process is a program in execution (program is the + description of what should be done). + + Definition: A Thread is a strand of actions (e.g. call a function, + compute a value) with its own runtime context (i.e. state like + variables etc.). + + Definition: A Task is something to be done in a process. This can be + implemented by calling a subroutine, or can have its own thread. + + Definition: An Event is a set of circumstances that happen during a + process and are observed. + + Definition: A Routine is a smaller program or part of a program with a + well-defined, commonly required functionality. + + Definition: A Coroutine is executed together (lat. con) other + routines, all being on the same conceptual level (in contrast to + subroutine). + +You do not have to learn them by heart, but make sure that you +understand the difference so that the following sections make more +sense. + + U11: Dependable Concurrent Operation + +We are getting close to the end of your junior program at DSys, which +means there is a final set of trainings given by coaches — for instance +Ferris Heisenberg, who is with us today. He is here to introduce +concurrency (including Heisenbugs), show why concurrency in Rust is +fearless, how to program asynchronous code, and demo how working with +parallel data is a breeze in Rust. + + Parallel Data Processing with Rayon + +So far, we have talked about parallelism in the form that two or more +threads work on the same data or do independent tasks. + +A different form of concurrency is so-called data-parallelism, where you +exploit that data can be partitioned into equal units and worked upon +independently. A simple form, that even works in hardware, are +single-instruction multiple-data (SIMD) instructions certain CPUs or +GPUs provide. Instead of 8 multiplications of 8 values, you put them in +place and run an 8-value wide multiplication instruction. + +At a higher level, we see this with iterators of items to which certain +modifications should be applied (remember adapters from U04). We +distinguish between mappers and reducers: + +- A Mapper transforms each item into something else (fn(T) -> U). A + perfect example is the map function, but also the filter function + that “removes” elements. +- A Reducer transforms a sequence of items into something else + (fn(Iter) -> U). A perfect example are fold methods and special + cases such as sum or product. + +Further, we distinguish between non-blockers and blockers: + +- Blockers can only produce their output when they have completely + consumed their input. An example is the fold method. +- Non-Blockers can produce outputs stepwise, without requiring the + whole input to be consumed. An example is the map method. + +Depending on the chain of adapters we built up, and whether they +block/don’t block or map/reduce, we get potential ways for parallelising +things. From these definitions, it should be clear that: + +- A step after a blocker cannot happen in parallel to the blocker. The + successor can only start as soon as the blocker is done. +- A step after a non-blocker can happen in parallel to the non-blocker + step, but on different items (i.e. we get a pipelined system). +- A mapper can be parallelised by applying the map function to + distinct parts of the iterator. +- A reducer can be parallelised, if the operation is associative and + commutative (e.g. sum). In this case, the input is put into batches + that are evaluated in parallel. + + Benchmarking Tools + +For the following use case, we leverage different benchmarking tools +that are also helpful in other situations. + +hyperfine + +hyperfine is a command-line benchmarking tool that can work with +anything, not just Rust binaries. You can use it like this: + + hyperfine [OPTIONS] + +The following options are useful and often leveraged by performance +evaluations: + +- --warmup run command multiple times before benchmarking to + fill caches +- --prepare run before the command to measure + +btm + +btm is a CLI task manager that shows how your CPUs are used: + +[btm demo] + + A Computation Example using Data Parallelism + +The task is to compute the sum of prime numbers successors lower/equal +to n. While this is not particular useful, it allows us to use a filter, +a map and a fold/reduce adaptor. We leverage the following checking +function: + + pub fn is_prime(n: &u32) -> bool { + let root = (*n as f64).sqrt().floor() as u32; + (2..=root).all(|i| *n % i != 0) + } + +First, we write a sequential solution: + + use paralleldata::is_prime; + + fn main() { + let n: u32 = 300_000; + let res = (1..n) + .into_iter() + .filter(is_prime) + .map(|i: u32| i + 1) + .fold(0, |a, b| a + b); + println!("res: {}", res); + } + +Afterwards, we add rayon for a parallel solution: + + use paralleldata::is_prime; + use rayon::prelude::*; + + fn main() { + let n: u32 = 300_000; + let res = (1..n) + .into_par_iter() + .filter(is_prime) + .map(|i: u32| i + 1) + .reduce(|| 0, |a, b| a + b); + println!("res: {}", res); + } + +We implemented the following changes: + +- use rayon::prelude::*; imports rayon and its traits that allow + turning regular iterators into parallel ones and provides the map, + fold, … adapters. +- .into_iter() became .into_par_iter() turning it into a parallel + iterator rayon provides. +- .fold(...) now takes a closure as the first parameter, as it is + executed multiple times (parallel fold groups values and produces a + fold for each group). +- .fold(...) alone was no longer sufficient, as it produces single + values for each group. Now we do a reduce instead, which produces a + single value. + +Benchmarking Results + +We run the benchmarking using the following commands: + + cargo build --bin sequential --release + cargo build --bin parallel --release + + hyperfine target/release/sequential target/release/parallel + +Here are the results: + + Benchmark 1: target/release/sequential + Time (mean ± σ): 287.9 ms ± 36.9 ms [User: 280.6 ms, System: 3.5 ms] + Range (min … max): 246.6 ms … 361.4 ms 11 runs + + Benchmark 2: target/release/parallel + Time (mean ± σ): 112.7 ms ± 27.6 ms [User: 401.8 ms, System: 16.7 ms] + Range (min … max): 83.6 ms … 183.6 ms 20 runs + + Summary + 'target/release/parallel' ran + 2.55 ± 0.71 times faster than 'target/release/sequential' + +Watching them live in btm + +[Btm Result] + +The left part with the multiple spikes shows the sequential solution +running. The right part with the significant purple spike shows the +parallel solution. It becomes clear that by using all cores, the +parallel solution is done faster. + +Why is Rayon useful? + +- Rayon guarantees that there are no data races introduced. +- Rayon figures out ways to parallelize steps that can be + parallelized. +- Rayon internally uses a join primitive that only executes + concurrently when cores are idle (implementing potential parallelism + in contrast to guaranteed parallelism that might cause too much + overhead). + + S11: Sample Solution + +Applied Concurrency in Rust + +- Rustlings: Discuss in class. + +10-Incrementer + +Shared Memory + + use std::{ + sync::{Arc, Mutex}, + thread, + }; + + fn increment(location: Arc>) { + for _ in 0..10 { + let mut l = location.lock().unwrap(); + *l = *l + 1; + } + } + + fn main() { + let counter = Arc::new(Mutex::new(42)); + let t1 = thread::spawn({ + let counter = counter.clone(); + move || increment(counter) + }); + let t2 = thread::spawn({ + let counter = counter.clone(); + move || increment(counter) + }); + t1.join().unwrap(); + t2.join().unwrap(); + println!("{}", counter.lock().unwrap()); + } + +Message Passing + + // Cargo.toml + // ... + [dependencies] + crossbeam-channel = "0.5.1" + + use std::thread; + + use crossbeam_channel::{Receiver, Sender}; + + fn increment(input: Receiver, output: Sender) { + for _ in 0..10 { + output.send(input.recv().unwrap() + 1).unwrap(); + } + } + + fn main() { + let mut counter = 42; + let (s, r) = crossbeam_channel::unbounded(); + let (s2, r2) = crossbeam_channel::unbounded(); + + let t1 = thread::spawn({ + let r = r.clone(); + let s = s2.clone(); + move || increment(r, s) + }); + let t2 = thread::spawn(move || increment(r, s2)); + + s.send(counter).unwrap(); + for result in r2 { + counter = result; + match s.send(result) { + Ok(_) => continue, + Err(_) => break, + } + } + + t1.join().unwrap(); + t2.join().unwrap(); + println!("{}", counter); + } + +Rayon in Action + +- TODO + +Async in Action + + // ... + [dependencies] + async-std = { version = "1.10.0", features = ["attributes"] } + rayon = "1.5.1" + surf = "2.3.2" + url = "2.2.2" + + // main.rs + fn create_url_vector() -> Result, url::ParseError> { + let urls = vec![ + "https://rustacean.net/assets/rustacean-orig-noshadow.png", + "https://rustacean.net/assets/rustacean-orig-noshadow.svg", + "https://rustacean.net/assets/rustacean-flat-noshadow.png", + "https://rustacean.net/assets/rustacean-flat-noshadow.svg", + "https://rustacean.net/assets/cuddlyferris.png", + "https://rustacean.net/assets/cuddlyferris.svg", + "https://rustacean.net/assets/rustacean-flat-happy.png", + "https://rustacean.net/assets/rustacean-flat-happy.svg", + "https://rustacean.net/assets/rustacean-flat-gesture.png", + "https://rustacean.net/assets/rustacean-flat-gesture.svg", + "https://rustacean.net/assets/corro.svg", + "https://rustacean.net/more-crabby-things/droidstacean-flat-happy_green.png", + ]; + urls.into_iter().map(url::Url::parse).collect() + } + + async fn download_file(url: &url::Url) -> Result<(), Box> { + let mut res = surf::get(&url).await?; + let body = res.body_bytes().await?; + + let segments = url.path_segments().expect("url has no path"); + let mut path = std::env::current_dir()?; + path.push("target"); + async_std::fs::write(path.join(segments.last().unwrap()), &body).await?; + Ok(()) + } + + #[async_std::main] + async fn main() -> Result<(), Box> { + let urls = create_url_vector()?; + + let tasks = urls + .into_iter() + .map(|url| async move { + if let Err(error) = download_file(&url).await { + eprintln!("Error downloading `{url}`: {error}!") + } + }) + .map(async_std::task::spawn) + .collect::>(); + + for task in tasks { + task.await + } + + Ok(()) + } + + Summary + + What did you learn? + +- What the difference between concurrency and parallelism is, as well + as their benefits and drawbacks. +- How to do both message passing as well as shared memory concurrency + in Rust. +- How rayon allows you to easily exploit data parallelism when working + with iterators. +- How to program asynchronous code, enabling resource-efficient + software that deals well with many I/O tasks. + + Where can you learn more? + +- Concurrency + - Rust Book: Ch. 16 20 + - Programming Rust: Ch. 19, 20 + - Rust for Rustaceans: Ch. 09, 11 + - The Embedded Rust Book: Concurrency + - OneSignal: Thread Safety + - 7 Concurrency Models in 7 Weeks +- Parallel Data / rayon + - Blog Post + - RustBelt Talk +- Async Programming + - Programming Rust: Ch. 20 + - Rust in Action: Ch. 10 + - Rust for Rustaceans: Ch. 08 + - Async IO Fundamentals + - A Pratical Guide to Async in Rust + - async-rs Stop Token + - Async Read and Write Traits + + W11: Work Sheet + +Applied Concurrency in Rust + +- Do the Rustlings exercises threads. + +- Remember the 10-incrementer we have mentioned in U03. Your task is + now to take this code (which we show below) and turn it into a + concurrent solution (i.e. it keeps spawning two threads that do the + stepwise 10-increment) and produces the correct output (62). Do so + once using message passing and once using shared memory concurrency. + + use std::thread; + + fn increment(mut counter: Counter) { + for _ in 0..10 { + counter.count += 1; + } + } + + #[derive(Debug)] + struct Counter { + count: u32, + } + + fn main() { + let mut counter = Counter { count: 42 }; + let t1 = thread::spawn(|| increment(counter)); + let t2 = thread::spawn(|| increment(counter)); + t1.join().unwrap(); + t2.join().unwrap(); + println!("{:#?}", counter); + } + +Rayon in Action + +In U04 you had to implement a word count program using iterators. For +this task, take this solution and turn it into a concurrency-enabled +solution using Rayon. Benchmark the sequential and the parallel solution +and compare the performance. + +Async in Action + +At the beginning of Async Programming, we described the “download all +sections of this book concurrently” use case for async. Your task is now +to do exactly that: given a vector of URIs, try to download all of them +in parallel and write them to a folder on disk. Do not attempt to +download the book sections, because they are behind HTTP basic auth, +which complicates things. In the sample solution, we download all the +~~crap~~ crabs images. Instead of doing HTTP requests by hand, leverage +the surf crate that works nicely with async-std. Benchmark your solution +while you develop it. + + Hardware Dependability + +System dependability can come in various forms: + +- how often are reboots allowed? +- are crashes allowed and how often? +- what is the acceptable failure rate? + + Every system can fail. So, you need to decide what your acceptable + failure rate is. - Better Embedded System Software + +Dependability targets must exist so that systems can be designed for +this target. + +The two most common issues for hardware of embedded systems are +reliability and availability. Notably, software fails in different ways +than hardware and the math we cover here cannot easily be transferred to +software components. + +Typical faults that happen in hardware are that gates do not properly +compute their output or bits get corrupted in memory. + + Reliability + +For the remainder of this section, we define reliability as the +probability of a system to work continuously for X hours after having +been turned on. Naturally, longer uptimes induce larger probabilities of +failure. Purely mechanical components often have a high probability of +failure right after they have been produced; which is often called +burn-in phase. + + Reliability is a measure of a system’s ability to work completely + failure-free for a certain length of time, or mission. - Better + Embedded System Software + +Under some natural assumptions, including that of mutual independence of +failures, we can consider the probability of hardware failures as being +determined by the failure rate \(\) and time \(t\) in a negative +exponential probability distribution: + +\[ R(t) = e^{-t}\] + +The exponential function leads to the fact that reliability drops the +longer the mission becomes. + +A typical measure is Mean Time To Failure (\(MTTF\)), the average length +of failure-free operation after initialization, which under the above +assumptions corresponds to the inverse of the failure rate: \(MTTF=1/\). +Higher MTTF values are indicators of higher reliability. + +Note that if the MTTF is 1000 hours (\(= 0.001 \)), the system is not +guaranteed to work for 1000 hours — it could very well fail sooner or +later. Instead, the reliability of a 1000-hour mission is: + +\[ R(1000) = e^{-0.001 } = e^{-1} = 0.3679 \] + +Spelled out, this means that operating for 1000 hours with such a +component, you would expect in 63% of the cases that the component is +failed by that time. Here is a table of mission times given a certain +MTTF and target reliability: + + ------------------------------------------------------------------------------ + MTTF (hrs) Mission time at 99% Mission time at 99.9% Mission time at 99.99% + reliability reliability reliability + ------------ -------------------- --------------------- ---------------------- + 10 6 minutes 36 seconds 3.6 seconds + + 100 1 hour 6 minutes 36 seconds + + 1000 10 hours 1 hour 6 minutes + + 10,000 4.2 days 10 hours 1 hour + + 100,000 6 weeks 4.2 days 10 hours + + 1,000,000 60 weeks 6 weeks 4.2 days + + 10,000,000 11.5 years 60 weeks 6 weeks + ------------------------------------------------------------------------------ + +Depending on how we connect components, the reliability can change: + +- Serial connection: if one fails, the entire chain fails: + \[R(t)_{serial} = _{i}R(t)_i\] +- Parallel connection: if one fails, the other can take over: + \[R(t)_{parallel} = 1 - _{i}(1-R(t)_i)\] + +We can deduce that parallel connection improves reliability, while +serial reduces it. Here is a table of the number of (redundant) parallel +components and the chances that they fail on 11-hour long missions: + + ------------------------------------------------------------------------ + # Components R(11) at 50,000 MTTF Mean Number of Missions Before + Failure + ------------ -------------------------- -------------------------------- + 1 \(0.99978\) \(4,546\) + + 2 \(0.999 999 952\) \(20,665,853\) + + 3 \(0.999 999 999 999 999 \(1.326 ^{17}\) + 998\) + ------------------------------------------------------------------------ + +Note that all this assumes that failures are independent, which is +something engineers have to put in a lot of effort to ensure. + + Availability + +For repairable systems/components, a different view on dependability is +to look at the (long-run) availability: + + Availability is the fraction of time the system is operational. + +That number depends on the MTTF as well as the mean time needed to +repair the system upon failure: + +\[ = \] + +Note that the availability is independent of the mission time. + +Increasing availability is usually done via redundancy (a single failure +does not cause unavailability) or fast recovery (repair time gets +small). Approaches to do the latter can be standby-systems, fast resets, +or watchdog timers of periodic resets. + + Markov Analysis + +Markov models are stochastic processes describing system behaviour +stochastically using concepts similar to state machines. Central to +these systems is the Markov assumption: The probability of the next +state depends on the current state only (i.e. is independent of previous +states). Therefore, a Markov model is said to be memory-less as prior +state occupancies are not influencing the future behaviour. + +There are different models: * Discrete-time Markov chains are the most +basic ones, where state changes are described by probabilistic +experiments over successor states. For instance, with states being +elements of \(\{head, tail\}^{+}\), the possible sequences of outcomes +of a tosses of a fair coin describe a discrete-time Markov chain. + +- More relevant for our purposes here are continuous-time Markov + chains. They evolve in continuous time (the reals), not in discrete + time (the integers). Here, the memory-less property is also in the + time domain, which means it does not matter when the system has been + where (including how long the system has been in the current state), + only the current state determines the future behaviour. It can be + shown that state occupancy times in such models must be + exponentially distributed. The proof is beautiful and hence + recommended (alternative: join the next ``Quantitative Model + Checking’’ lecture). + +Here is an example continuous time Markov chain, where rates (of +exponential distributions) label edges. + + + +-----------+ μ + | A: intact |<--------+ + +-----------+ | + | | + λ | +-----+-----+ + +-------->| B: failed | + +-----------+ + +Application in Dependability Analysis + +Markov models are widely applied in dependability analysis. For +instance, we can do the following: + +- First, we create a system model with empirically measured mean times + between failure and repair times. +- Based on this, we build a Markov model, where component states form + nodes, while failure and repair times are used on the edges between + states. +- From this model, a set of equations can be derived and used to + compute the percentage of time spent in a state, as well as visit + frequency and visit durations for states (as well as the precise + information in what state the system is at what time with what + probability). +- Finally, this information can be used to tell working system states + apart from failed system states. +- Using these two states, we can compute the availability of the + entire system and many other measures of interest, like mission + survivability. + + Use Case: Remote-Controlled Robot + +We reconsider our robotic application and now want to analyse it using +Markov chains: For this, we consider the system as the composition of +components that can be in working or failed state. We denote that +component X is working with X and !X if it is failed. With our 3 +components, we have \(2^3\) distinct states: + + +---------+ + | (P,T,S) | + +---------+ + | | | + +---------+ | +----------+ + | | | + V V V + +-----------+ +-----------+ +-----------+ + | (!P,T,S) | | (P,!T,S) | | (P,T,!S) | + +-----------+ +-----------+ +-----------+ + | | | | | | + | +-----------+----+--------+---+ | + | | | | | | + | +------+ | +-----+ | | + V V V V V V + +-----------+ +-----------+ +-----------+ + | (!P,!T,S) | | (P,!T,!S) | | (!P,T,!S) | + +-----------+ +-----------+ +-----------+ + | | | + +---------+ | +-----------+ + V V V + +------------+ + | (!P,!T,!S) | + +------------+ + +In another step, we mark states that are failed states (where the fault +tree evaluates to true) as well as working states (marked with ==, where +the FT evaluates to false). We again consider the failure rates as +specified in the previous section. We also annotate the edges with the +rates: + + +=========+ + | (P,T,S) | + +=========+ + λ_P | | | λ_S + +---------+ | +----------+ + | | λ_T | + V V V + +-----------+ +===========+ +===========+ + | (!P,T,S) | | (P,!T,S) | | (P,T,!S) | + +-----------+ +===========+ +===========+ + | λ_T | λ_S | λ_P | λ_S | λ_T | λ_P + | +------+------+------+---+ | + | | +-+ | | | + | +------+ | +-----+ | | + V V V V V V + +-----------+ +-----------+ +-----------+ + | (!P,!T,S) | | (P,!T,!S) | | (!P,T,!S) | + +-----------+ +-----------+ +-----------+ + | λ_S | λ_P | λ_T + +---------+ | +-----------+ + V V V + +------------+ + | (!P,!T,!S) | + +------------+ + +Assume that we now want to compute the probability of the overall system +failure, i.e. the probability of being in any of the states not +surrounded with ==. To do this, we can simplify the chain (factually +being an exploitation of bisimulation on Markov chains) as follows by +collapsing failed states into one (numbers in state give the index used +for latter analysis): + + +===========+ + | (P,T,S) 0 | + +===========+ + λ_P | | | λ_S + +---------------+ | +----------+ + | | λ_T | + | V V + | +============+ +============+ + | | (P,!T,S) 1 | | (P,T,!S) 2 | + | +============+ +============+ + | | λ_P | λ_S | λ_T | λ_P + | | | | | + | | +-+ | | + | +------+ | +-----+ | + V V V V V + +-----------------------------------------+ + | Failed 3 | + +-----------------------------------------+ + +Multiple outgoing edges can be combined as in the following diagram (as +a result of the fact that the minimum of exponential distributions is +exponentially distributed with the sum of the rates): + + +===========+ + | (P,T,S) 0 | + +===========+ + λ_P | | | λ_S + +---------------+ | +----------+ + | | λ_T | + | V V + | +============+ +============+ + | | (P,!T,S) 1 | | (P,T,!S) 2 | + | +============+ +============+ + | | λ_P + λ_S | λ_P + λ_T + | | | + | | | + | | | + V V V + +-----------------------------------------+ + | Failed 3 | + +-----------------------------------------+ + +Final Analysis + +If we apply the formulas from the previous section on the failure rates +we get \[\] The last line is obtained by evaluating \(Q_0(t)\) with time +\(t\) set to 8760 hours (1 year). The result obtained should agree with +the one of the direct analysis in the previous section, which was a lot +simpler, but needed to fix \(t\) prior to the analysis, and which is +generally unable to cover repairable systems. + + U12: Dependability Theory + +This time, it is Ferris McHardHat who is going to teach you about the +reliability of hardware, quantitative fault tree analysis, and Markov +analysis. As he is more of a theoretical person, he mostly wears the +hard hat for style and not for safety reasons — but that does not mean +you should pay less attention to him! + + Quantitative Fault Tree Analysis + +In U09 we already discussed how fault trees can be used to analyze a +system for events that can cause failures. While we looked at algorithms +to find a minimal set of these failures, we have not considered how each +event contributes to the overall reliability of the system. In this +section, we also introduce importance measures to: + +- identify basic events that should be improved, maintained, or + controlled +- identify basic events that contribute significantly to the top-event + probability — which means that high-quality failure data should be + obtained. + +In practice, the values computed by the importance measures differ by +orders of magnitude. Hence, it is often sufficient to look at the rough +estimates and you do not need precise results. + + Use Case: Remote-Controlled Robot + +Let’s consider a remote-controlled robotic system composed of the +following components with respective failure rates: + +- A power supply \(P\), without which the system stops working. \(_P = + 12.03 ^{-6} \) +- A communication module, without which the mission is no longer under + control i.e. failed, that is composed of two redundant links: + - A terrestrial link \(T\). \(_T = 25.47 ^{-6} \) + - A satellite link \(S\). \(_S = 40.72 ^{-6} \) + +Our mission is designed to last for \(t = 8760 h\) (1 year) and we now +want to know how likely a mission failure is, given these values. If +this is insufficient, we would need to improve the reliability of +components. + +Based on these details, we can come up with both (a) the structure of +the fault tree and (b) the failure probabilities of all events \(p_ = +1-e^{-t}\) (check the numbers, note that we rounded gracefully): + + System + failed + 0.154 + | + +-----+ + | >=1 | + +-----+ + | | + +--+ | + 0.06 | | + +-------+ | + | & | | + +-------+ | + | | | + O O O + 0.2 0.3 0.1 + ^ ^ ^ + Terrestrial | Power Supply + Link failed | failed + | + Satellite + Link failed + + Top-Event Probabilities + +First, let us introduce a bit of notation: + +- Our system is composed of \(n\) components. +- The fault tree induces a structure function \((.)\), a negation-free + Boolean formula. +- Let \(x_i\) indicate that the \(i\)th component is in working state + (1), respectively failed state (0).\( = (x_1, x_2, …, x_n)\) is the + vector of states of all components. +- \(()\) represents the system state which is defined as \(() = + 1~(0)\) if the system is working (failed). \((.)\) is the negation + of \((.)\) meaning that the the system fails once the top-level + event turns true. +- Let \(p_i\) specify the reliability of component \(i\), i.e. \(P(x_i + = 1) = p_i = 1 - P(x_i = 0)\).\(\) is the vector of reliabilities of + all components. +- \(R()\) the system reliability with component reliability vector + \(\). + +We introduce the top-level failure probability \(Q_0\), that is computed +based on minimal cut sets. The minimal cut sets are: [Terrestrial Link +failed, Satellite Link failed] and [Power Supply failed]. + +If basic events are independent, \(Q_0\) gives the top-level failure +probability exactly. If they are not, \(Q_0\) is a conservative upper +bound, as failure combinations are counted multiple times. + +First, let’s compute \(q_i\) of each cut set using the failure +probabilities from the diagram \((1 - p_j)\): + +\[q_i = _{j MCS_i} (1 - p_j) \] + +For our cut sets, this means: + +\(q_1 = 0.2 = 0.06\) [Terrestrial Link failed, Satellite Link Failed] + +\(q_2 = 0.1\) [Power Supply failed] + +The top-level failure probability is then computed as + +\[Q_0 = 1 - _{i MCSes} (1 - q_i)\] + +For our case: + +\[Q_0 = 1 - (1 - 0.06) (1 - 0.1) = 1 - 0.94 = 1 - 0.846 = 0.154\] + + Birnbaum Importance + +When it comes to importance measures, the Birnbaum Importance \(I_B\) +indicates how important a component (represented by the failure event) +is for correct functioning of the whole system. The value is computed +for an event \(i\) as follows: + +\[I_B(i) = Q_0(  p_i = 1) - Q_0(  p_i = 0)\] + +Hence, you re-compute the \(Q_0\) values but replace the event +probabilities with 1 and 0 respectively. Intuitively speaking, the +importance quantifies how much the top-level failure probability changes +if the component is either perfectly unreliable (1) or reliable (0). + +Let’s compute this for the event Terrestrial Link failed: + +\[\] + +We get the following results for the other events: + +\[\] + + Improvement Potential Importance + +Another importance measure is the Improvement Potential Importance, +which gives for each component how much the overall system reliability +would increase if the component were perfect. The value is computed for +an event \(i\) as follows: + +\[I_{IP}(i) = R(  p_i = 0) - R()\] + +Let us reformulate this in terms of unreliability to use our \(Q_0\) +function: + +\[\] + +Let’s compute this for the event Terrestrial Link failed: + +\[\] + +We get the following results for the other events: + +\[\] + + S12: Sample Solution + +HW Reliability + +- \(R(t) = e^{-} = e^{-0.5} = 0.606\) + +- \(t = -2160 h ^{-4} = 0.2160h = 12.96 min\) + +- \( = = 72000h\) + +- \(N = -t _S ln(1 - R(t))= (1-0.999)= 81\) + +HW Availability + +- \(5 years - 0.999 year = 0.005 year = 43.8 h\) + +- \( = 0.999726\), i.e. three nines + +- \(15min = 1485min = 24.75h\) + +QTA + +Q0: 0.0394 -> 1 - 0.0394 = 0.9606 + + Summary + + What did you learn? + +- How hardware dependability can be computed in terms of reliability + and availability. +- How to execute quantitative fault tree analysis to compute + system-level reliability as well as importance of basic events for + reliability. +- How to execute Markov analysis on systems to check for the amount of + time the system spends in the failed state. + + Where can you learn more? + +- Better Embedded System Software: Ch. 26 or Course Unit +- Embedded Software Development for Safety-Critical Systems: Ch. 11 + Markov Chains +- Markov Chains in Detail + + W12: Work Sheet + +Hardware Reliability + +- Given \(MTTF = 2 years = 17520 h\) and \(t = 1 year = 8760 h\), what + is \(R(t)\)? + +- Given \(MTTF = 3 months = 3 24h = 2160 h\) and \(R(t) = 0.9999\) + goal, how long is \(t\)? + +- Given \(R(t) = 0.999\) goal and \(t = 72h\), what is the \(MTTF\)? + +- You have a component with failure rate \(= 0.001 \) and a target + mission time of \(t = 8760 h\). How many of the component do you + need in parallel to achieve at least a reliability of 0.9999 at + \(t\). + +Hardware Availability + +In dependability jargon, “X 9s” refers to a reliability of X-many 9s — +e.g. Two-9s = 0.99. + +- Given availability target of three 9s and \(MTTF\) of 5 years what + is the repair time? + +- Give \(MTTF\) of 10 years and repair time of 1 day, how many 9s of + availability do you get? + +- Given availability target of two 9s and repair time of 15 minutes, + how large should the \(MTTF\) be? + +Quantitative Fault Tree Analysis + +Consider the fault tree from W09 with failure probabilities \(P_V = +0.01\) and \(P_{S1} = P_{S2} = P_{S3} = 0.1\): + + System + failed + | + +-----+ + | >=1 | + +-----+ + | | + | +--+ + | | + | +-------+ + | | >=1 | + | +-------+ + | | | +----------------+ + | | +---------+ | + | | | | + | +-------+ +-------+ +-------+ + | | & | | & | | & | + | +-------+ +-------+ +-------+ + | | | | | | | + O O O O O O O + ^ ^ ^ ^ ^ ^ ^ + V | S2 | S3 | S3 + failed | failed | failed | failed + S1 S1 S2 + failed failed failed + +Your task is now to: + +- Compute the top-level failure reliability using \(Q_0\). +- Compute the Birnbaum Importance for all basic events. +- Compute the Improvement Potential Importance for all basic events. + + Assembly + +As you probably know already, CPUs do not execute Rust code, but rather +work with bits and bytes that encode the CPUs instruction. This code is +called assembly code. In contrast to high-level, safe Rust, assembly can +be broken in many ways. The goal of this section is that you value the +guarantees Rust gives you more and only go to lower layers if really +necessary — or if you want to play around with the compiler itself. + +When we write Rust, we write in a high-level language and pass it to the +compiler. The translation happens through multiple layers, e.g. the +following ones for the LLVM compiler: + +- Rust High-Level Code +- Rust’s Mid-Level Intermediate Representation (MIR) +- LLVM Intermediate Representation +- Target-specific Assembly Code + +We can inspect the compilation process using cargo-asm, which brings +both the cargo asm as well as cargo llvm-ir subcommands to our system. + +Let’s Assemble + +We start with the following code that sums all the numbers in a range: + + pub fn sum(range: std::ops::RangeInclusive) -> u8 { + let mut sum = 0; + for i in range { + sum += i; + } + sum + } + +We do cargo llvm-ir asm::sum and see how the %sum variable is +initialized, updated, and how the loop is converted: + + define i8 @asm::sum(i24 %0) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context" + start: + %iter.sroa.0.0.extract.trunc = trunc i24 %0 to i8 + %iter.sroa.5.0.extract.shift = lshr i24 %0, 8 + %iter.sroa.5.0.extract.trunc = trunc i24 %iter.sroa.5.0.extract.shift to i8 + %_2.not.i.i.i16 = icmp ugt i24 %0, 65535 + %.not.i.i.i17 = icmp ugt i8 %iter.sroa.0.0.extract.trunc, %iter.sroa.5.0.extract.trunc + %.0.i.i.i18 = select i1 %_2.not.i.i.i16, i1 true, i1 %.not.i.i.i17 + br i1 %.0.i.i.i18, label %bb6, label %bb3.i.i + + bb3.i.i: ; preds = %start, %bb3.i.i + %sum.020 = phi i8 [ %3, %bb3.i.i ], [ 0, %start ] + %iter.sroa.0.019 = phi i8 [ %spec.select15, %bb3.i.i ], [ %iter.sroa.0.0.extract.trunc, %start ] + %1 = icmp ult i8 %iter.sroa.0.019, %iter.sroa.5.0.extract.trunc + %not. = xor i1 %1, true + %2 = zext i1 %1 to i8 + %spec.select15 = add nuw i8 %iter.sroa.0.019, %2 + %3 = add i8 %sum.020, %iter.sroa.0.019 + %.not.i.i.i = icmp ugt i8 %spec.select15, %iter.sroa.5.0.extract.trunc + %.0.i.i.i = select i1 %not., i1 true, i1 %.not.i.i.i + br i1 %.0.i.i.i, label %bb6, label %bb3.i.i + + bb6: ; preds = %bb3.i.i, %start + %sum.0.lcssa = phi i8 [ 0, %start ], [ %3, %bb3.i.i ] + ret i8 %sum.0.lcssa + } + +We do cargo asm our_crate::sum --rust and see a similar structure (the +initialization and loop): + + pub fn sum(range: std::ops::RangeInclusive) -> u8 { + mov ecx, edi + and ecx, 16777215 + xor eax, eax + cmp ecx, 65535 + ja .LBB0_5 + mov ecx, edi + shr ecx, 8 + cmp dil, cl + ja .LBB0_5 + xor eax, eax + .LBB0_3: + mov edx, edi + cmp dil, cl + adc dil, 0 + sum += i; + add al, dl + cmp dl, cl + jae .LBB0_5 + cmp dil, cl + jbe .LBB0_3 + .LBB0_5: + } + ret + +Now let’s have a look at a slightly different piece of code. The change +is that we now use an exclusive Range: + + pub fn sum(range: std::ops::Range) -> u8 { + let mut sum = 0; + for i in range { + sum += i; + } + sum + } + +Now, our LLVM IR code looks quite different: + + define i8 @asm::sum(i8 %range.0, i8 %range.1) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context" + start: + %0 = icmp ult i8 %range.0, %range.1 + br i1 %0, label %bb4.preheader, label %bb6 + + bb4.preheader: ; preds = %start + %1 = xor i8 %range.0, -1 + %2 = add i8 %1, %range.1 + %3 = add nuw i8 %range.0, 1 + %4 = mul i8 %2, %3 + %5 = zext i8 %2 to i9 + %6 = add i8 %range.1, -2 + %7 = sub i8 %6, %range.0 + %8 = zext i8 %7 to i9 + %9 = mul i9 %5, %8 + %10 = lshr i9 %9, 1 + %11 = trunc i9 %10 to i8 + %12 = add i8 %4, %range.0 + %13 = add i8 %12, %11 + br label %bb6 + + bb6: ; preds = %bb4.preheader, %start + %sum.0.lcssa = phi i8 [ 0, %start ], [ %13, %bb4.preheader ] + ret i8 %sum.0.lcssa + } + +What you notice is that there is no loop anymore. Instead, Rust was able +to detect that what we are doing is adding up the elements of a range +i..j (which is different from adding up arbitrary elements from a list). +In consequence, it converted this construct into an optimized version of +the well-known formulas for computing the triangular number: + +\[T_n = \] + +and the natural sum of a range: sum(i..j) = \({i}^{j-1} = T_j - T{i-1} +\) + +In assembly, the result looks like this: + + pub fn sum(range: std::ops::Range) -> u8 { + cmp dil, sil + jae .LBB0_1 + mov ecx, edi + not cl + add cl, sil + lea edx, [rdi, +, 1] + mov eax, ecx + mul dl + movzx ecx, cl + sub sil, dil + add sil, -2 + movzx edx, sil + imul edx, ecx + shr edx + add al, dil + add al, dl + } + ret + .LBB0_1: + xor eax, eax + ret + +In summary, you should realize how efficient the Rust compiler is and +what is done for you already. This should also motivate you to abstain +from premature optimization, and only look at this things if you have +enough demand to further optimize the code you are generating. + + Foreign Function Interface + +Rust, by being a language without a runtime environment, is very well +suited to interface with other programming languages and ecosystems. +This is where foreign function interfaces (FFI) come into play. +Naturally, there are several use cases for this: + +- You have existing code in a language that is well-tested and + established and you do not want to touch or rewrite it. Still, you + want to use the code in Rust and for this, you would wrap the + existing code in an FFI and safely call it from Rust. +- You have existing code in a language and you want to gradually + rewrite it in Rust. In this case, you can start by writing pieces of + code in Rust, expose them via FFI, and integrate them into the + existing system — replacing existing functionality. +- You have code written in a language that is not suited for high + performance (e.g., Python) and want to remove performance + bottlenecks by rewriting critical functions in Rust. + +In the following, we take a closer look at the last use case. + +An FFI Fibonacci + +To show you FFI in action, we show how using Rust for a core function +and calling it from Python can improve performance. Again use the +recursive Fibonacci method as an example — hoping that it is clear to +you that this is a toy example and the obvious path to improve the +performance by using the closed form to compute the result. + + Pure Python + +First, to establish a baseline, we implement and benchmark the function +in pure Python: + + #!/usr/bin/env python3 + + def fib(n): + if n == 0 or n == 1: + return 1 + else: + return fib(n-1) + fib(n-2) + + + print(fib(34)) + +Running it with hyperfine yields the following results: + + > hyperfine "python3 src/pure.py" + Benchmark #1: python3 src/pure.py + Time (mean ± σ): 1.099 s ± 0.046 s [User: 1.097 s, System: 0.002 s] + Range (min … max): 1.042 s … 1.192 s 10 runs + + Rust-Powered Python + +We know that Python, being an interpreted language, is not fast in doing +a) function calls (in our case the recursive call) as well as b) basic +mathematical operations (+ in our case). The reason behind this is that +due to the dynamic typing of Python, it cannot make the same assumptions +as other languages do. For our addition operation, Python is going to +first check what the two operands are, how + is implemented and then +apply it — despite the fact that only integers are used for which the +addition is a single machine instruction. + +A typical approach for these performance-critical code parts of Python +programs is to call to code from other programming languages or use, for +instance, Cython. In our case, we rewrite the core logic in Rust and +make it accessible from Python. + +The PyO3 Way + +While the previous version works, for many use cases it is more suitable +to use this approach for FFI. Our Cargo.toml looks like this: + + [package] + name = "fibonacci" + version = "0.1.0" + edition = "2018" + + [lib] + name = "fibonacci" + crate-type = ["cdylib"] + + [dependencies.pyo3] + version = "0.15.1" + features = ["extension-module"] + +The upper part is as before and we further leverage the pyo3, a crate +for bridging the foreign-function interface between Rust and Python +(both ways). The lib.rs with the fib function looks very similar to the +Python version: + + use pyo3::prelude::*; + + #[pyfunction] + fn fib(n: u32) -> u32 { + if n == 0 || n == 1 { + 1 + } else { + fib(n - 1) + fib(n - 2) + } + } + + #[pymodule] + fn fibonacci(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(fib, m)?)?; + Ok(()) + } + +What is different from normal Rust functions are the attributes. +#[pyfunction] automatically wraps a function to be Python compatible. +#[pymodule] creates a Python module that can be imported. Note that the +name of the module function fibonacci must be identical with the +lib.name in the Cargo.toml. Thanks to Rust being statically typed, this +code can be compiled down to highly efficient machine code. + +On the Python side of things, we declare the following in ffi.py: + + #!/usr/bin/env python3 + + import fibonacci + + print(fibonacci.fib(34)) + +Before we can trigger pyo3-rs magic, we need to do the following to +setup a virtual environment and add maturin (a CLI tool to build and +publish PyO3 crates): + + python -m venv .env # creates a virtual environment + source .env/bin/activate # activates it + pip install maturin + +Now, we can build our Python extension module with + + maturin develop --release + +Finally, we compare the two approaches with hyperfine: + + > hyperfine "python src/ffi.py" "python src/pure.py" + Benchmark 1: python src/pure.py + Time (mean ± σ): 1.197 s ± 0.218 s [User: 1.195 s, System: 0.002 s] + Range (min … max): 1.070 s … 1.801 s 10 runs + + Benchmark 2: python src/ffi.py + Time (mean ± σ): 19.2 ms ± 0.8 ms [User: 17.7 ms, System: 1.4 ms] + Range (min … max): 17.9 ms … 21.2 ms 158 runs + + Summary + 'python src/ffi.py' ran + 62.23 ± 11.66 times faster than 'python src/pure.py' + +In summary, we reduce the computation time more than a factor of 60 with +our solution. For more complex functions, it is well possible to achieve +even higher gains. + + U13: unsafe(ty) last + +You almost made it! In this final training, Corro the Unsafe Rusturchin +is going to teach about unsafe Code, Debugging Tools for unsafe, Foreign +Function Interfaces, and a little bit of Assembly. + + S13: Example solution + +FFI with PyO3 + +lib.rs: + + use num::complex::Complex; + use pyo3::prelude::*; + + fn mandelbrot_at_point(cx: f64, cy: f64, max_iters: usize) -> usize { + let mut z = Complex { re: 0.0, im: 0.0 }; + let c = Complex::new(cx, cy); + + for i in 0..=max_iters { + if z.norm() > 2.0 { + return i; + } + z = z * z + c; + } + max_iters + } + + #[pyfunction] + fn calculate_mandelbrot( + max_iters: usize, + x_min: f64, + x_max: f64, + y_min: f64, + y_max: f64, + width: usize, + height: usize, + ) -> Vec> { + let mut rows: Vec<_> = Vec::with_capacity(width); + for img_y in 0..height { + let mut row: Vec = Vec::with_capacity(height); + for img_x in 0..width { + let x_percent = (img_x as f64 / width as f64); + let y_percent = (img_y as f64 / height as f64); + let cx = x_min + (x_max - x_min) * x_percent; + let cy = y_min + (y_max - y_min) * y_percent; + let escaped_at = mandelbrot_at_point(cx, cy, max_iters); + row.push(escaped_at); + } + rows.push(row); + } + rows + } + + #[pymodule] + fn mandelbrot(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(calculate_mandelbrot, m)?)?; + Ok(()) + } + +ffi.py: + + import io + from mandelbrot import calculate_mandelbrot + + def render_mandelbrot(vals): + for row in vals: + line = io.StringIO() + for column in row: + if column in range(0,2): + line.write(' ') + elif column in range(3,5): + line.write('.') + elif column in range(6,10): + line.write('•') + elif column in range(11, 30): + line.write('*') + elif column in range(31, 100): + line.write('+') + elif column in range(101, 200): + line.write('x') + elif column in range(201, 400): + line.write('$') + elif column in range(401, 700): + line.write('#') + else: + line.write('%') + print(line.getvalue()) + + if __name__ == "__main__": + mandelbrot = calculate_mandelbrot(1000, -2.0, 1.0, -1.0, 1.0, 100, 24) + + render_mandelbrot(mandelbrot) + +Assembling numbers + +Discuss in class. + + Summary + + What did you learn? + +- What privileges and duties come whenever you start using unsafe in + your project. +- What rare practical use cases are there for you to employ unsafe. + Look first, if someone else already did the work for you. +- How Rust can interface with other languages using FFI, for instance, + to replace performance-critical functions with efficient Rust + implementations. + + Where can you learn more? + +- unsafe + - Rust Book: Ch. 19.1 + - Programming Rust: Ch. 22 + - Nomicon + - Unsafe Coding Guidelines + - Unsafe is not what you think it means + - Understand Unsafe Rust + - Rust for Rustaceans: Ch. 10 +- Foreign Function Interfaces: + - Programming Rust: Ch. 23 + - Rust for Rustaceans: Ch. 12 + - Rust FFI Omnibus +- The Challenge of Using C in Safety-Critical Applications + + Tools + +Before we let you work with unsafe code, we want to show you two tools +that allow you to debug your software for undefined behaviour + + cargo-careful + +The first tool is cargo-careful, which lets you run Rust code with extra +care. You install it like this: + + cargo install cargo-careful + +and run it like this: + + cargo +nightly careful run + +run can be replaced with test to run your test suite instead of your +binary. careful changes the build process in that it builds your code +with a standard library in which all debug assertions are enabled. +Hence, your execution is really slow but way more assumptions are +checked while running. There are also some nightly-only flags that add +run-time checks against undefined behaviour. + +The following shows some undefined behaviour we introduced in an unsafe +block: + + fn main() { + let arr = [1, 2, 3, 4]; + let slice = &arr[..2]; + let value = unsafe { slice.get_unchecked(2) }; + println!("The value is {}!", value); + } + +If the do cargo run, value will become 3 but we violated memory rules by +indexing slice out of bounds. If we run it using cargo careful, the +get_unchecked precondition is evaluated and an index violation is +detected. + +This is a simple (if not obvious) example, but you can imagine how +larger projects using various pieces of unsafe code could create less +obvious undefined behaviour. + + miri + +The second tool is miri, an interpreter for Rust’s mid-level +intermediate representation (MIR). This is not a course on compilers, +hence it should be enough for you to know that MIR is a simpler +representation of Rust code (i.e. syntax is desugared). With miri, you +can run binaries and tests of cargo projects to check for certain +classes of undefined behaviour, as we show below. If you are authoring +unsafe code, you should leverage miri to double-check if you do not +expose miri-detectable classes of undefined behaviour. + +You can add miri like this: + + rustup +nightly component add miri + +and run it like this: + + cargo +nightly miri run + +The following examples have been kindly provided by Ralf Jung, the +author of miri and graduate of MPI-SWS. They represent cases where we do +not fulfill our duties mentioned before. You can try them out by copying +the code to a new crate and run the above command. Note that if you run +them with cargo run they might still produce some behaviour. However, +when multiple pieces of unsafe code work together, strange things can +happen. + +Invalid Memory Access + +Here, we attempt to dereference null: + + #![allow(unused)] + + fn main() { + unsafe { + let val = *(0 as *const u8); + } + } + +Note that for this piece, cargo run already presents us with a warning +(as apparently some parts of Miri already work with cargo alone). + +For the following, this is not the case. + + fn main() { + unsafe { + let x = 0u8; + let ptr = &x as *const u8; + ptr.offset(1); // okay, one-past-the-end + ptr.wrapping_offset(2); // okay, wrapping_offset may go OOB + ptr.offset(2); // UB + } + } + +Here, we create a pointer to a memory region (2 bytes into ptr) that +does not belong to what we allocated (1 byte for u8). + +Type Invariants + +We mentioned before that the memory region of a bool should contain +either the value 0 or 1. In the following, this is not the case: + + fn main() { + unsafe { + let x: bool = std::mem::transmute(2u8); + println!("{}", x); + } + } + +Similarly, enum memory should only ever contain values that are +associated with a valid enum variant. Here, we disobey this rule: + + #[derive(Debug)] + enum Enum { + A, + B, + C, + } + fn main() { + unsafe { + let x: Enum = std::mem::transmute(3u8); + println!("{:?}", x); + } + } + + unsafe + +So far in this course, we have only used safe Rust code, which means +that the code we wrote (and successfully compiled) so far could not +contain certain forms of bugs. In particular, this is concerned with +so-called undefined behaviour. + + Undefined behaviour describes the situation, where it is no longer + clear what you as the programmer intended and it is left free to the + compiler to pick a behaviour. + +This is particularly bad, as arbitrarily bad things can happen. Let’s +build a crashing piece of code: + + fn main() { + let mut a: usize = 0; + let ptr = &mut a as *mut usize; + unsafe { + *ptr.offset(-3) = 0x7ffff72f484c; + } + } + +In the program, we take a raw pointer to the first stack variable a. In +the unsafe block, we do pointer arithmetic, leaving our original memory +area (a) and use the return address of main. In overwriting this value, +we make our program no longer well-behaved. So we have misused the +capabilities provided by unsafe. Fortunately, the operating system +provides memory separation, so we get a segmentation fault and only our +application crashes. On a embedded system (without OS), we could have +easily caused more trouble. + +In summary, with the use of the unsafe keyword, we are entering the +realm of unsafe Rust where two things happen: + +- First, you get more power as you can now write code that does not + need to conform with the compiler’s rules. You can think of unsafe + as a way to swear to the compiler “you don’t need to check this, I + know what I am doing”. +- Second, you get more responsibility, as it is now your fault if the + resulting code contains issues. + +Metaphorically speaking, safe Rust is like a prison in where you’re not +allowed to bring shovels. Even more, it is a language — so it erases the +concept of shovels & digging from the inhabitants. In consequence, they +cannot even think about the concept of a shovel. With unsafe, thinking +about this concept is allowed again — including all the, potentially +devastating, consequences. + +Before we get started, let’s clarify the use cases of unsafe a bit more. +It is important that, after you read and understood this section, you +don’t feel like you should now spread unsafe blocks all over your code +because it makes things easier. If you are writing high-level, +application-layer programs, it is extremely unlikely that you need to +use unsafe — it is even discouraged. If you want to enforce this policy +in your crate, use the #![forbid(unsafe_code)] attribute in your +top-level module, so that unsafe code cannot sneak in easily (assuming +you have contributors that might not be aware of unsafe consequences). +So when is unsafe really needed? + +- If you write low-level software that deals with IO, registers, or + other hardware directly. Please note that in many cases, someone + already wrote that low-level code for you and provided a library + with safe abstractions. +- If you want to write efficient data structures whose structure and + algorithms do not comply with the Rust ownership rules. Again, it is + highly likely that someone already wrote a crate for that. + +With this in mind, let’s remove the safety net and get unsafe. + +Unsafe Privileges and Duties + +Privileges + +When you mark a block of code or a function as unsafe, you get access to +the following operations: + +- dereference pointers +- call other unsafe functions +- call functions from other languages (via foreign function interface) +- mutably access global variables (with 'static lifetime) + +Note that while Rust no longer avoids these potentially harmful +operations, the compiler still checks for (a) types, (b) lifetimes, and +(c) bounds on data structures. + +Duties + +Now, with unsafe in place, it is your duty to uphold the Rust rules for +well-behaved programs (source: Programming Rust): + +- The program must not read uninitialized memory. +- The program must not create invalid primitive values: + - References, boxes, or fn pointers that are null + - bool values that are not either 0 or 1 + - enum values with invalid discriminant values + - char values that are not valid, non-surrogate Unicode code + points + - str values that are not well-formed UTF-8 + - Fat pointers with invalid vtables/slice lengths + - Any value of type ! +- The rules for references must be followed: + - No reference may outlive its referent + - Shared access is only read-only access + - Mutable access is exclusive access +- The program must not dereference null, incorrectly aligned or + dangling pointers +- The program must not use a pointer to access memory outside the + allocation with which the pointer is associated +- The program must be free of data races +- The program must not unwind across a call made from another + language, via the foreign function interface +- The program must comply with the contracts of standard library + functions + +Rust assumes that any unsafe code never violates any of these rules. If +this is the case, Rust can guarantee that the composition of several +safe Rust components is also safe. + +It is important to note that checking for the above rules does not only +require you to look at the unsafe block but also its surroundings. Bugs +before the unsafe block can break contracts, which only turns into +undefined behaviour inside the block. Also, it is possible that the +consequences of contract breaking only happen after the unsafe block. + +In essence, to be a good Rustacean, you should + +- only use unsafe where needed, in blocks of code that are as small as + possible. As they must undergo review, this helps both yourself as + well as your reviewers. +- explicitly state contracts, by adding a # Safety section to each + unsafe function you write. +- uphold all contracts mentioned above. + +Using cargo-geiger + +If you care about the usage of unsafe in your project and its +dependencies, you can use cargo-geiger to check all of them. It returns +the following results: + +- 🔒 = No unsafe usage found, declares #![forbid(unsafe_code)] +- ❓ = No unsafe usage found, missing #![forbid(unsafe_code)] +- ☢️ = unsafe usage found + +Ideally, most of your dependencies have the lock symbol. Note that it +does not mean you should eliminate all unsafe code as much as possible. +Instead, the idea is to minimize the usage of unnecessary unsafe code as +much as possible. So in case you have the choice between two, +functionally equivalent libraries, pick the safer one. + +Unsafe in Action + +Efficient ASCIIString + +This example shows how you can write efficient code, when you are well +aware that certain contracts are upheld, while the compiler is not aware +of this. + + mod ascii { + #[derive(Debug, Eq, PartialEq)] + pub struct Ascii( + Vec + ); + + impl Ascii { + pub fn from_bytes(bytes: Vec) -> Result { + if bytes.iter().any(|&byte| !byte.is_ascii()) { + return Err(NotAsciiError(bytes)); + } + Ok(Ascii(bytes)) + } + } + + #[derive(Debug, Eq, PartialEq)] + pub struct NotAsciiError(pub Vec); + + impl From for String { + fn from(ascii: Ascii) -> String { + unsafe { String::from_utf8_unchecked(ascii.0) } + } + } + } + +The type Ascii operates as follows: When the type is created based on a +vector of bytes, they are all checked if they are valid ASCII +characters. In this case, the vector is moved to be the inner type of +Ascii. As the from_bytes function is the only one to create Ascii +instances, the contract is upheld that the vector only contains valid +ASCII bytes. Now when we want to convert Ascii into a String this helps. +Internally, a String is a vector of bytes that have been checked if they +are valid UTF8 characters. As any ASCII character is a valid UTF8 +character, we can in principle reuse the Ascii vector for the String. We +can do so by using the unsafe function from_utf8_unchecked, whose safety +contract is that the inputted bytes are all valid. We checked so before, +making the transformation a simple move of the vector from the Ascii to +the String type. If we had used the safe from_utf8(), this would have +been less efficient as it checks if these are all valid. + +Here is the safe Ascii type in use: + + use ascii:Ascii; + + let bytes: Vec = b"ASCII string example".to_vec(); + + let ascii : Ascii = Ascii::from_bytes(bytes) // no allocation or copy, only scan + .unwrap(); + + let string = String::from(ascii); // Zero-cost: no allocation, copy, or scan + + assert_eq!(string, "ASCII string example"); + + W13: Work Sheet + +FFI with PyO3 + +Start from the following Python code which prints the Mandelbrot set and +rewrite the core performance-critical functions in Rust: + + import io + + def calculate_mandelbrot(max_iters, x_min, x_max, y_min, y_max, width, height): + rows = [] + for img_y in range(height): + row = [] + for img_x in range(width): + x_percent = img_x / width + y_percent = img_y / height + cx = x_min + (x_max - x_min) * x_percent + cy = y_min + (y_max - y_min) * y_percent + escaped_at = mandelbrot_at_point(cx, cy, max_iters) + row.append(escaped_at) + rows.append(row) + + return rows + + + def mandelbrot_at_point(cx, cy, max_iters): + z = complex(0.0, 0.0) + c = complex(cx, cy) + + for i in range(max_iters+1): + if abs(z) > 2.0: + return i + z = (z * z) + c + return max_iters + + def render_mandelbrot(vals): + for row in vals: + line = io.StringIO() + for column in row: + if column in range(0,2): + line.write(' ') + elif column in range(3,5): + line.write('.') + elif column in range(6,10): + line.write('•') + elif column in range(11, 30): + line.write('*') + elif column in range(31, 100): + line.write('+') + elif column in range(101, 200): + line.write('x') + elif column in range(201, 400): + line.write('$') + elif column in range(401, 700): + line.write('#') + else: + line.write('%') + print(line.getvalue()) + + if __name__ == "__main__": + mandelbrot = calculate_mandelbrot(1000, -2.0, 1.0, -1.0, 1.0, 100, 24) + + render_mandelbrot(mandelbrot) + +Save this as pure.py. Now start with a ffi.py (a copy of pure.py) and a +lib.rs for using pyo3 to bridge the two. In a first step, move +mandelbrot_at_point from Python to Rust. Afterwards, also move +calculate_mandelbrotset to Rust. You are allowed to use +num::complex::Complex (from the third-party num crate). + +Finally, run hyperfine "python src/ffi.py" "python src/pure.py" to see +how the performance improves. + +Assembling numbers + +Consider the following function that sums a slice of numbers (in +contrast to working on ranges as in the earlier section): + + pub fn sum_numbers(numbers: &[u8]) -> u8 { + let mut sum = 0; + for num in numbers { + sum += num; + } + sum + } + +Your task is now to: + +- Have a close look at the LLVM-IR and assembly and annotate which + parts of the code implement which higher level function. +- Rewrite the function by using an Iterator and an appropriate + consumer function. What happens to the IR and assembly? + + U14: Energy-Aware Systems + +[Timo Hönig] + +Timo Hönig, © RUB, Marquard + +Finally, DSys invited Timo Hönig (RUB) as the last coach to give a +lecture on the design and implementation of energy-aware computing +systems. From the perspective of the practical design of operating +systems and system software, the lecture will discuss methods and +approaches to improve non-functional system properties such as +performance and dependability - in particular under the consideration of +the systems’ energy demand. + +[1] Niels Bohr, winner of a Nobel Prize in physics, contributed the Bohr +model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[2] Niels Bohr, winner of a Nobel Prize in physics, contributed the Bohr +model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[3] The meaning of life. + +[4] Niels Bohr, winner of a Nobel Prize in physics, contributed the Bohr +model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[5] Niels Bohr, winner of a Nobel Prize in physics, contributed the Bohr +model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[6] The meaning of life. + +[7] Tony Hoare invented it and calls it his billion dollar mistake. + +[8] Niels Bohr, winner of a Nobel Prize in physics, contributed the Bohr +model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[9] Niels Bohr, winner of a Nobel Prize in physics, contributed the Bohr +model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[10] Niels Bohr, winner of a Nobel Prize in physics, contributed the +Bohr model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[11] Niels Bohr, winner of a Nobel Prize in physics, contributed the +Bohr model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. + +[12] Niels Bohr, winner of a Nobel Prize in physics, contributed the +Bohr model the atom, which is a rather stable and tangible model. Werner +Heisenberg, another Nobel Prize in physics winner, described the +“uncertainty principle”, where things change or disappear if you try to +measure them. diff --git a/hp/hp.py b/hp/hp.py index 8055dc8..23225b9 100755 --- a/hp/hp.py +++ b/hp/hp.py @@ -1,4 +1,10 @@ #!/bin/python3 +import sys + +if len(sys.argv) < 2: + print(sys.argv) + exit("Please provide a filename as argument") +filename = sys.argv[-1] def colorize(word): return f"\u001b[0;33m{word}\u001b[0m" @@ -16,7 +22,7 @@ import re chapter_pattern = re.compile("^CHAPTER") page_pattern = re.compile("^\d+$") -with open("hp.txt") as f: +with open(filename) as f: lines = f.readlines() print("Stripping excess data")