从零开始的 Rust 学习笔记(9)

Well, at this point, The Rust Programming Language demonstrates how to write a command line program, which named minigrep. Followed the textbook, I decided to rewrite the small utility that I mentioned in https://ryza.moe/2019/08/rewrite-the-styled-code-in-html-generated-by-apple-to-wordpress-compatible-html/.

The things learnt so far is enough to support me to write a, at least, workable utility. And if you're an expert in Rust, you'll find the following code is ugly and perhaps even not Rust-ish.

However, based on the previous 9 posts of this series, for these who just begins to learn Rust lang like me, the code which will be shown below won't be a giant jump. Nevertheless, there definitely has plenty of room to improve the following code. Any suggestions or questions are welcomed(⁎⁍̴̛ᴗ⁍̴̛⁎)

Furthermore, I googled a lot during writing the code. So I also attached corresponding link in comments.

Of course, this tiny (and perhaps ugly even) project is on my GitHub, codetowp-rust

Here goes our Cargo.toml first.

[package]
name = "codetowp"
version = "0.1.0"
authors = ["RyzaOikawa <[data deleted]>"]
edition = "2018"

[dependencies]
clap = "2.33.0"
lazy_static = "1.4.0"
regex = "1"

And src/main.rs followed~

extern crate clap;

#[macro_use]
extern crate lazy_static;

use clap::{Arg, App};
use regex::Regex;
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::{self, Write, BufReader, BufRead};
use std::path::Path;

// using `lazy_static` to ensure that all regex is compiled exactly once
// https://docs.rs/regex/1.3.1/regex/#example-avoid-compiling-the-same-regex-in-a-loop
lazy_static! {
    static ref CLASSRE: Regex = Regex::new("(.*)?class=(.*)").unwrap();
    static ref STYLERE: Regex = Regex::new("^[\\s]*(\\w+)\\.([sp\\d]+) .*[ {]color: (#[a-f0-9]+)").unwrap();
    static ref APPLECONVERTEDSPACERE: Regex = Regex::new("(?:.*?)(<span class=\"Apple-converted-space\">([\\s]+)</span>)").unwrap();
    static ref COLORALLZEROSPACERE: Regex = Regex::new("(?:.*?)(<span style=\"color: #000000\">([\\s]+)</span>)").unwrap();
}

fn rewrite_code(file: &Path, output: &Path) -> io::Result<()> {
    // open input source file
    // https://stackoverflow.com/a/45882510
    let file_reader = match File::open(&file) {
        // The `description` method of `io::Error` returns a string that
        // describes the error
        Err(why) => panic!("Couldn't open {}: {}", file.display(), why.description()),
        Ok(file) => BufReader::new(file),
    };
    
    // create output file
    let mut output = match File::create(&output) {
        Ok(file) => file,
        Err(why) => panic!("Couldn't create output {}: {}", file.display(), why.description()),
    };
    
    // save styles
    // e.g,
    // {"p1" : "#000233", "s1" : "#233333"}
    let mut styles: HashMap<String, String> = HashMap::new();
    
    // number of classes of ``<p></p>``
    let mut p_class_num = 0;
    
    // number of classes of ``<span></span>``
    let mut span_class_num = 0;
    
    // replacement strings
    // e.g,
    // {
    //    "<p class=\"p1\"" : "<p style=\"color: #000233\"",
    //    "class=\"s1\" : "style=\"color: #233333\""
    // }
    let mut replacement: HashMap<String, String> = HashMap::new();

    // 0: we haven't encountered <style>...</style>
    // 1: Handling
    // 2: the <style>...</style> has been processed
    let mut have_encountered_style = 0;
    
    // read line by line from input file
    for line in file_reader.lines() {
        // unwrap `line` and make it `&str`
        // https://stackoverflow.com/a/23977218
        let current_line = &line?[..];
        
        // if the <style>...</style> has NOT been processed
        if have_encountered_style != 2 {
            // try to match styles inside <style>...</style>
            // https://docs.rs/regex/1.3.1/regex/#example-find-a-date
            if STYLERE.is_match(current_line) {
                have_encountered_style = 1;
                
                // if we have this line matched
                // https://docs.rs/regex/1.3.1/regex/#example-iterating-over-capture-groups
                for cap in STYLERE.captures_iter(current_line) {
                    // save styles {"p1" : "#000233", "s1" : "#233333"}
                    styles.insert(cap[2].to_string(), cap[3].to_string());

                    // update the number
                    // match `&str` in Rust
                    // https://stackoverflow.com/a/32790546
                    match &cap[1] {
                        "p" => p_class_num += 1,
                        "span" => span_class_num += 1,
                        _ => ()
                    };
                }
            } else {
                if styles.len() != 0 {                    
                    // remember that the <style>...</style> has been processed
                    have_encountered_style = 2;
                    
                    for p_index in 1..1 + p_class_num {
                        let origin = format!("p class=\"p{}\"", p_index);
                        if let Some(p_colour) = styles.get(&format!("p{}", p_index)) {
                            let replace = format!("span style=\"color: {}\"", p_colour);
                            replacement.insert(origin, replace);
                        }
                    }
                    replacement.insert("</p>".to_string(), "</span>".to_string());
                    
                    for span_index in 1..1 + span_class_num {
                        let origin = format!("class=\"s{}\"", span_index);
                        if let Some(span_colour) = styles.get(&format!("s{}", span_index)) {
                            let replace = format!("style=\"color: {}\"", span_colour);
                            replacement.insert(origin, replace);
                        }
                    }
                }
            }
        } else {
            if current_line.len() <= 7 {
                continue;
            }
            
            let mut modified: String = current_line.to_string();
            for (origin, replace) in &replacement {
                modified = modified.replace(origin, replace);
            }

            if modified.starts_with("<span style=\"color: #000000\">") {
                modified = modified.replace("<span style=\"color: #000000\">", "");
                let modified_len = modified.len();
                modified = modified[..modified_len - 7].to_string();
            }
            
            // replace all `\x0a` with ` `
            modified = modified.replace(&" ".to_string(), &" ".to_string());
            
            let replace_inline = |regex_index: u8, modified: String| -> String {
                let mut more_replacement: HashMap<String, String> = HashMap::new();
                
                // `lazy_static!` generates struct
                // thus the workaround is to match index
                // ugly but works
                match regex_index {
                    1 => {
                        for cap in APPLECONVERTEDSPACERE.captures_iter(&modified) {
                            more_replacement.insert(cap[1].to_string(), cap[2].to_string());
                        }
                    },
                    2 => {
                        for cap in COLORALLZEROSPACERE.captures_iter(&modified) {
                            more_replacement.insert(cap[1].to_string(), cap[2].to_string());
                        }
                    },
                    _ => ()
                };
                
                // do more replacement
                let mut result: String = modified;
                for (origin, replace) in &more_replacement {
                    result = result.replace(origin, replace);
                }
                result
            };
            
            modified = replace_inline(1, modified);
            modified = replace_inline(2, modified);

            // replace `<br>` with ``
            modified = modified.replace(&"<br>".to_string(), &"".to_string());

            // add the line feed character
            // https://stackoverflow.com/questions/37889337/how-to-concatenate-a-char-onto-a-string-in-rust
            modified.push('\n');
            
            // write `String` to file
            // https://stackoverflow.com/a/31193386
            output.write_all(modified.as_bytes()).expect("Couldn't write to output file!");
        }
    }
    
    Ok(())
}

fn parseargs() -> (String, String) {
    // https://github.com/clap-rs/clap#quick-example
    let matches = App::new("codetowp")
        .version("1.0")
        .author("Ryza<[data deleted]>")
        .about("Convert Apple generated HTML output of coloured code to WordPress compatible HTML code")
        .arg(Arg::with_name("file")
            .short("f")
            .long("file")
            .value_name("FILE")
            .help("Input file path")
            .required(true)
        )
        .arg(Arg::with_name("output")
            .short("o")
            .long("output")
            .value_name("FILE")
            .help("Output file path")
            .required(true)
        )
        .get_matches();
    
    // directly use `unwrap()` because they were set to be required
    let file = matches.value_of("file").unwrap().to_string();
    let output = matches.value_of("output").unwrap().to_string();
    
    (file, output)
}

fn main() -> io::Result<()> {
    let (file, output) = parseargs();
    
    // use `Path`
    // https://doc.rust-lang.org/rust-by-example/std_misc/file/open.html
    rewrite_code(Path::new(&file), Path::new(&output))
}

Leave a Reply

Your email address will not be published. Required fields are marked *

5 × 2 =