Well, at this point, The Rust Programming Language demonstrates how to write a command line program, which named minigrep. Followed the textbook, I decided to rewrite the small utility that I mentioned in https://ryza.moe/2019/08/rewrite-the-styled-code-in-html-generated-by-apple-to-wordpress-compatible-html/.
The things learnt so far is enough to support me to write a, at least, workable utility. And if you're an expert in Rust, you'll find the following code is ugly and perhaps even not Rust-ish.
However, based on the previous 9 posts of this series, for these who just begins to learn Rust lang like me, the code which will be shown below won't be a giant jump. Nevertheless, there definitely has plenty of room to improve the following code. Any suggestions or questions are welcomed(⁎⁍̴̛ᴗ⁍̴̛⁎)
Furthermore, I googled a lot during writing the code. So I also attached corresponding link in comments.
![](/wp-content/uploads/2019/10/rust-9-rewrite.webp)
Of course, this tiny (and perhaps ugly even) project is on my GitHub, codetowp-rust
Here goes our Cargo.toml
first.
[package] name = "codetowp" version = "0.1.0" authors = ["RyzaOikawa <[data deleted]>"] edition = "2018" [dependencies] clap = "2.33.0" lazy_static = "1.4.0" regex = "1"
And src/main.rs
followed~
extern crate clap; #[macro_use] extern crate lazy_static; use clap::{Arg, App}; use regex::Regex; use std::collections::HashMap; use std::error::Error; use std::fs::File; use std::io::{self, Write, BufReader, BufRead}; use std::path::Path; // using `lazy_static` to ensure that all regex is compiled exactly once // https://docs.rs/regex/1.3.1/regex/#example-avoid-compiling-the-same-regex-in-a-loop lazy_static! { static ref CLASSRE: Regex = Regex::new("(.*)?class=(.*)").unwrap(); static ref STYLERE: Regex = Regex::new("^[\\s]*(\\w+)\\.([sp\\d]+) .*[ {]color: (#[a-f0-9]+)").unwrap(); static ref APPLECONVERTEDSPACERE: Regex = Regex::new("(?:.*?)(<span class=\"Apple-converted-space\">([\\s]+)</span>)").unwrap(); static ref COLORALLZEROSPACERE: Regex = Regex::new("(?:.*?)(<span style=\"color: #000000\">([\\s]+)</span>)").unwrap(); } fn rewrite_code(file: &Path, output: &Path) -> io::Result<()> { // open input source file // https://stackoverflow.com/a/45882510 let file_reader = match File::open(&file) { // The `description` method of `io::Error` returns a string that // describes the error Err(why) => panic!("Couldn't open {}: {}", file.display(), why.description()), Ok(file) => BufReader::new(file), }; // create output file let mut output = match File::create(&output) { Ok(file) => file, Err(why) => panic!("Couldn't create output {}: {}", file.display(), why.description()), }; // save styles // e.g, // {"p1" : "#000233", "s1" : "#233333"} let mut styles: HashMap<String, String> = HashMap::new(); // number of classes of ``<p></p>`` let mut p_class_num = 0; // number of classes of ``<span></span>`` let mut span_class_num = 0; // replacement strings // e.g, // { // "<p class=\"p1\"" : "<p style=\"color: #000233\"", // "class=\"s1\" : "style=\"color: #233333\"" // } let mut replacement: HashMap<String, String> = HashMap::new(); // 0: we haven't encountered <style>...</style> // 1: Handling // 2: the <style>...</style> has been processed let mut have_encountered_style = 0; // read line by line from input file for line in file_reader.lines() { // unwrap `line` and make it `&str` // https://stackoverflow.com/a/23977218 let current_line = &line?[..]; // if the <style>...</style> has NOT been processed if have_encountered_style != 2 { // try to match styles inside <style>...</style> // https://docs.rs/regex/1.3.1/regex/#example-find-a-date if STYLERE.is_match(current_line) { have_encountered_style = 1; // if we have this line matched // https://docs.rs/regex/1.3.1/regex/#example-iterating-over-capture-groups for cap in STYLERE.captures_iter(current_line) { // save styles {"p1" : "#000233", "s1" : "#233333"} styles.insert(cap[2].to_string(), cap[3].to_string()); // update the number // match `&str` in Rust // https://stackoverflow.com/a/32790546 match &cap[1] { "p" => p_class_num += 1, "span" => span_class_num += 1, _ => () }; } } else { if styles.len() != 0 { // remember that the <style>...</style> has been processed have_encountered_style = 2; for p_index in 1..1 + p_class_num { let origin = format!("p class=\"p{}\"", p_index); if let Some(p_colour) = styles.get(&format!("p{}", p_index)) { let replace = format!("span style=\"color: {}\"", p_colour); replacement.insert(origin, replace); } } replacement.insert("</p>".to_string(), "</span>".to_string()); for span_index in 1..1 + span_class_num { let origin = format!("class=\"s{}\"", span_index); if let Some(span_colour) = styles.get(&format!("s{}", span_index)) { let replace = format!("style=\"color: {}\"", span_colour); replacement.insert(origin, replace); } } } } } else { if current_line.len() <= 7 { continue; } let mut modified: String = current_line.to_string(); for (origin, replace) in &replacement { modified = modified.replace(origin, replace); } if modified.starts_with("<span style=\"color: #000000\">") { modified = modified.replace("<span style=\"color: #000000\">", ""); let modified_len = modified.len(); modified = modified[..modified_len - 7].to_string(); } // replace all `\x0a` with ` ` modified = modified.replace(&" ".to_string(), &" ".to_string()); let replace_inline = |regex_index: u8, modified: String| -> String { let mut more_replacement: HashMap<String, String> = HashMap::new(); // `lazy_static!` generates struct // thus the workaround is to match index // ugly but works match regex_index { 1 => { for cap in APPLECONVERTEDSPACERE.captures_iter(&modified) { more_replacement.insert(cap[1].to_string(), cap[2].to_string()); } }, 2 => { for cap in COLORALLZEROSPACERE.captures_iter(&modified) { more_replacement.insert(cap[1].to_string(), cap[2].to_string()); } }, _ => () }; // do more replacement let mut result: String = modified; for (origin, replace) in &more_replacement { result = result.replace(origin, replace); } result }; modified = replace_inline(1, modified); modified = replace_inline(2, modified); // replace `<br>` with `` modified = modified.replace(&"<br>".to_string(), &"".to_string()); // add the line feed character // https://stackoverflow.com/questions/37889337/how-to-concatenate-a-char-onto-a-string-in-rust modified.push('\n'); // write `String` to file // https://stackoverflow.com/a/31193386 output.write_all(modified.as_bytes()).expect("Couldn't write to output file!"); } } Ok(()) } fn parseargs() -> (String, String) { // https://github.com/clap-rs/clap#quick-example let matches = App::new("codetowp") .version("1.0") .author("Ryza<[data deleted]>") .about("Convert Apple generated HTML output of coloured code to WordPress compatible HTML code") .arg(Arg::with_name("file") .short("f") .long("file") .value_name("FILE") .help("Input file path") .required(true) ) .arg(Arg::with_name("output") .short("o") .long("output") .value_name("FILE") .help("Output file path") .required(true) ) .get_matches(); // directly use `unwrap()` because they were set to be required let file = matches.value_of("file").unwrap().to_string(); let output = matches.value_of("output").unwrap().to_string(); (file, output) } fn main() -> io::Result<()> { let (file, output) = parseargs(); // use `Path` // https://doc.rust-lang.org/rust-by-example/std_misc/file/open.html rewrite_code(Path::new(&file), Path::new(&output)) }