Merge pull request #2897 from topecongiro/issue-2896

Ignore comment in wrap_str
2025-01-21 04:03:11 +00:00 · 2018-08-06 19:36:03 +12:00 · 2018-08-06 19:36:03 +12:00 · e4d560b914
commit e4d560b914
parent 8cd6633499 2eeb366311
4 changed files with 358 additions and 1 deletions
--- a/src/comment.rs
+++ b/src/comment.rs
@ -1139,6 +1139,21 @@ pub fn recover_comment_removed(
    }
 }

+pub fn filter_normal_code(code: &str) -> String {
+    let mut buffer = String::with_capacity(code.len());
+    LineClasses::new(code).for_each(|(kind, line)| match kind {
+        FullCodeCharKind::Normal | FullCodeCharKind::InString => {
+            buffer.push_str(&line);
+            buffer.push('\n');
+        }
+        _ => (),
+    });
+    if !code.ends_with("\n") && buffer.ends_with("\n") {
+        buffer.pop();
+    }
+    buffer
+}
+
 /// Return true if the two strings of code have the same payload of comments.
 /// The payload of comments is everything in the string except:
 ///     - actual code (not comments)
@ -1392,4 +1407,21 @@ mod test {
        let s = format!("    r#\"\n        test\n    \"#");
        assert_eq!(remove_trailing_white_spaces(&s), s);
    }
+
+    #[test]
+    fn test_filter_normal_code() {
+        let s = r#"
+fn main() {
+    println!("hello, world");
+}
+"#;
+        assert_eq!(s, filter_normal_code(s));
+        let s_with_comment = r#"
+fn main() {
+    // hello, world
+    println!("hello, world");
+}
+"#;
+        assert_eq!(s, filter_normal_code(s_with_comment));
+    }
 }
--- a/src/utils.rs
+++ b/src/utils.rs
@ -18,6 +18,7 @@ use syntax::ast::{
 use syntax::codemap::{BytePos, Span, NO_EXPANSION};
 use syntax::ptr;

+use comment::filter_normal_code;
 use rewrite::RewriteContext;
 use shape::Shape;

@ -350,7 +351,7 @@ macro_rules! skip_out_of_file_lines_range_visitor {
 // Wraps String in an Option. Returns Some when the string adheres to the
 // Rewrite constraints defined for the Rewrite trait and None otherwise.
 pub fn wrap_str(s: String, max_width: usize, shape: Shape) -> Option<String> {
-    if is_valid_str(&s, max_width, shape) {
+    if is_valid_str(&filter_normal_code(&s), max_width, shape) {
        Some(s)
    } else {
        None
--- a/tests/source/issue-2896.rs
+++ b/tests/source/issue-2896.rs
@ -0,0 +1,161 @@
+extern crate rand;
+extern crate timely;
+extern crate differential_dataflow;
+
+use rand::{Rng, SeedableRng, StdRng};
+
+use timely::dataflow::operators::*;
+
+use differential_dataflow::AsCollection;
+use differential_dataflow::operators::*;
+use differential_dataflow::input::InputSession;
+
+// mod loglikelihoodratio;
+
+fn main() {
+
+  // define a new timely dataflow computation. 
+  timely::execute_from_args(std::env::args().skip(6), move |worker| {
+
+    // capture parameters of the experiment.
+    let users: usize = std::env::args().nth(1).unwrap().parse().unwrap();
+    let items: usize = std::env::args().nth(2).unwrap().parse().unwrap();
+    let scale: usize = std::env::args().nth(3).unwrap().parse().unwrap();
+    let batch: usize = std::env::args().nth(4).unwrap().parse().unwrap();
+    let noisy: bool = std::env::args().nth(5).unwrap() == "noisy";
+
+    let index = worker.index();
+    let peers = worker.peers();
+
+    let (input, probe) = worker.dataflow(|scope| {
+
+      // input of (user, item) collection.
+      let (input, occurrences) = scope.new_input();
+      let occurrences = occurrences.as_collection();
+
+      //TODO adjust code to only work with upper triangular half of cooccurrence matrix
+
+      /* Compute the cooccurrence matrix C = A'A from the binary interaction matrix A. */
+      let cooccurrences = 
+      occurrences
+        .join_map(&occurrences, |_user, &item_a, &item_b| (item_a, item_b))
+        .filter(|&(item_a, item_b)| item_a != item_b)
+        .count();
+
+      /* compute the rowsums of C indicating how often we encounter individual items. */
+      let row_sums = 
+      occurrences
+        .map(|(_user, item)| item)
+        .count();
+
+      // row_sums.inspect(|record| println!("[row_sums] {:?}", record));
+
+      /* Join the cooccurrence pairs with the corresponding row sums. */
+      let mut cooccurrences_with_row_sums = cooccurrences
+        .map(|((item_a, item_b), num_cooccurrences)| (item_a, (item_b, num_cooccurrences)))
+        .join_map(&row_sums, |&item_a, &(item_b, num_cooccurrences), &row_sum_a| {
+          assert!(row_sum_a > 0);
+          (item_b, (item_a, num_cooccurrences, row_sum_a))
+        })
+        .join_map(&row_sums, |&item_b, &(item_a, num_cooccurrences, row_sum_a), &row_sum_b| {
+          assert!(row_sum_a > 0);
+          assert!(row_sum_b > 0);
+          (item_a, (item_b, num_cooccurrences, row_sum_a, row_sum_b))
+        });
+
+      // cooccurrences_with_row_sums
+      //     .inspect(|record| println!("[cooccurrences_with_row_sums] {:?}", record));
+
+      // //TODO compute top-k "similar items" per item
+      // /* Compute LLR scores for each item pair. */
+      // let llr_scores = cooccurrences_with_row_sums.map(
+      //   |(item_a, (item_b, num_cooccurrences, row_sum_a, row_sum_b))| {
+
+      //     println!(
+      //       "[llr_scores] item_a={} item_b={}, num_cooccurrences={} row_sum_a={} row_sum_b={}",
+      //       item_a, item_b, num_cooccurrences, row_sum_a, row_sum_b);
+
+      //     let k11: isize = num_cooccurrences;
+      //     let k12: isize = row_sum_a as isize - k11;
+      //     let k21: isize = row_sum_b as isize - k11;
+      //     let k22: isize = 10000 - k12 - k21 + k11;
+
+      //     let llr_score = loglikelihoodratio::log_likelihood_ratio(k11, k12, k21, k22);
+
+      //     ((item_a, item_b), llr_score)
+      //   });
+
+      if noisy {
+        cooccurrences_with_row_sums = 
+        cooccurrences_with_row_sums
+          .inspect(|x| println!("change: {:?}", x));
+      }
+
+      let probe = 
+      cooccurrences_with_row_sums
+          .probe();
+/*
+      // produce the (item, item) collection
+      let cooccurrences = occurrences
+        .join_map(&occurrences, |_user, &item_a, &item_b| (item_a, item_b));
+      // count the occurrences of each item.
+      let counts = cooccurrences
+        .map(|(item_a,_)| item_a)
+        .count();
+      // produce ((item1, item2), count1, count2, count12) tuples
+      let cooccurrences_with_counts = cooccurrences
+        .join_map(&counts, |&item_a, &item_b, &count_item_a| (item_b, (item_a, count_item_a)))
+        .join_map(&counts, |&item_b, &(item_a, count_item_a), &count_item_b| {
+          ((item_a, item_b), count_item_a, count_item_b)
+        });
+      let probe = cooccurrences_with_counts
+        .inspect(|x| println!("change: {:?}", x))
+        .probe();
+*/
+      (input, probe)
+    });
+
+    let seed: &[_] = &[1, 2, 3, index];
+    let mut rng1: StdRng = SeedableRng::from_seed(seed);  // rng for edge additions
+    let mut rng2: StdRng = SeedableRng::from_seed(seed);  // rng for edge deletions
+
+    let mut input = InputSession::from(input);
+
+    for count in 0 .. scale {
+      if count % peers == index {
+        let user = rng1.gen_range(0, users);
+        let item = rng1.gen_range(0, items);
+        // println!("[INITIAL INPUT] ({}, {})", user, item);
+        input.insert((user, item));
+      }
+    }
+
+    // load the initial data up!
+    while probe.less_than(input.time()) { worker.step(); }
+
+    for round in 1 .. {
+
+      for element in (round * batch) .. ((round + 1) * batch) {
+        if element % peers == index {
+          // advance the input timestamp.
+          input.advance_to(round * batch);
+          // insert a new item.
+          let user = rng1.gen_range(0, users);
+          let item = rng1.gen_range(0, items);
+          if noisy { println!("[INPUT: insert] ({}, {})", user, item); }
+          input.insert((user, item));
+          // remove an old item.
+          let user = rng2.gen_range(0, users);
+          let item = rng2.gen_range(0, items);
+          if noisy { println!("[INPUT: remove] ({}, {})", user, item); }
+          input.remove((user, item));
+        }
+      }
+
+      input.advance_to(round * batch);
+      input.flush();
+
+      while probe.less_than(input.time()) { worker.step(); }
+    }
+  }).unwrap();
+}
--- a/tests/target/issue-2896.rs
+++ b/tests/target/issue-2896.rs
@ -0,0 +1,163 @@
+extern crate differential_dataflow;
+extern crate rand;
+extern crate timely;
+
+use rand::{Rng, SeedableRng, StdRng};
+
+use timely::dataflow::operators::*;
+
+use differential_dataflow::input::InputSession;
+use differential_dataflow::operators::*;
+use differential_dataflow::AsCollection;
+
+// mod loglikelihoodratio;
+
+fn main() {
+    // define a new timely dataflow computation.
+    timely::execute_from_args(std::env::args().skip(6), move |worker| {
+        // capture parameters of the experiment.
+        let users: usize = std::env::args().nth(1).unwrap().parse().unwrap();
+        let items: usize = std::env::args().nth(2).unwrap().parse().unwrap();
+        let scale: usize = std::env::args().nth(3).unwrap().parse().unwrap();
+        let batch: usize = std::env::args().nth(4).unwrap().parse().unwrap();
+        let noisy: bool = std::env::args().nth(5).unwrap() == "noisy";
+
+        let index = worker.index();
+        let peers = worker.peers();
+
+        let (input, probe) = worker.dataflow(|scope| {
+            // input of (user, item) collection.
+            let (input, occurrences) = scope.new_input();
+            let occurrences = occurrences.as_collection();
+
+            //TODO adjust code to only work with upper triangular half of cooccurrence matrix
+
+            /* Compute the cooccurrence matrix C = A'A from the binary interaction matrix A. */
+            let cooccurrences = occurrences
+                .join_map(&occurrences, |_user, &item_a, &item_b| (item_a, item_b))
+                .filter(|&(item_a, item_b)| item_a != item_b)
+                .count();
+
+            /* compute the rowsums of C indicating how often we encounter individual items. */
+            let row_sums = occurrences.map(|(_user, item)| item).count();
+
+            // row_sums.inspect(|record| println!("[row_sums] {:?}", record));
+
+            /* Join the cooccurrence pairs with the corresponding row sums. */
+            let mut cooccurrences_with_row_sums = cooccurrences
+                .map(|((item_a, item_b), num_cooccurrences)| (item_a, (item_b, num_cooccurrences)))
+                .join_map(
+                    &row_sums,
+                    |&item_a, &(item_b, num_cooccurrences), &row_sum_a| {
+                        assert!(row_sum_a > 0);
+                        (item_b, (item_a, num_cooccurrences, row_sum_a))
+                    },
+                ).join_map(
+                    &row_sums,
+                    |&item_b, &(item_a, num_cooccurrences, row_sum_a), &row_sum_b| {
+                        assert!(row_sum_a > 0);
+                        assert!(row_sum_b > 0);
+                        (item_a, (item_b, num_cooccurrences, row_sum_a, row_sum_b))
+                    },
+                );
+
+            // cooccurrences_with_row_sums
+            //     .inspect(|record| println!("[cooccurrences_with_row_sums] {:?}", record));
+
+            // //TODO compute top-k "similar items" per item
+            // /* Compute LLR scores for each item pair. */
+            // let llr_scores = cooccurrences_with_row_sums.map(
+            //   |(item_a, (item_b, num_cooccurrences, row_sum_a, row_sum_b))| {
+
+            //     println!(
+            //       "[llr_scores] item_a={} item_b={}, num_cooccurrences={} row_sum_a={} row_sum_b={}",
+            //       item_a, item_b, num_cooccurrences, row_sum_a, row_sum_b);
+
+            //     let k11: isize = num_cooccurrences;
+            //     let k12: isize = row_sum_a as isize - k11;
+            //     let k21: isize = row_sum_b as isize - k11;
+            //     let k22: isize = 10000 - k12 - k21 + k11;
+
+            //     let llr_score = loglikelihoodratio::log_likelihood_ratio(k11, k12, k21, k22);
+
+            //     ((item_a, item_b), llr_score)
+            //   });
+
+            if noisy {
+                cooccurrences_with_row_sums =
+                    cooccurrences_with_row_sums.inspect(|x| println!("change: {:?}", x));
+            }
+
+            let probe = cooccurrences_with_row_sums.probe();
+            /*
+      // produce the (item, item) collection
+      let cooccurrences = occurrences
+        .join_map(&occurrences, |_user, &item_a, &item_b| (item_a, item_b));
+      // count the occurrences of each item.
+      let counts = cooccurrences
+        .map(|(item_a,_)| item_a)
+        .count();
+      // produce ((item1, item2), count1, count2, count12) tuples
+      let cooccurrences_with_counts = cooccurrences
+        .join_map(&counts, |&item_a, &item_b, &count_item_a| (item_b, (item_a, count_item_a)))
+        .join_map(&counts, |&item_b, &(item_a, count_item_a), &count_item_b| {
+          ((item_a, item_b), count_item_a, count_item_b)
+        });
+      let probe = cooccurrences_with_counts
+        .inspect(|x| println!("change: {:?}", x))
+        .probe();
+*/
+            (input, probe)
+        });
+
+        let seed: &[_] = &[1, 2, 3, index];
+        let mut rng1: StdRng = SeedableRng::from_seed(seed); // rng for edge additions
+        let mut rng2: StdRng = SeedableRng::from_seed(seed); // rng for edge deletions
+
+        let mut input = InputSession::from(input);
+
+        for count in 0..scale {
+            if count % peers == index {
+                let user = rng1.gen_range(0, users);
+                let item = rng1.gen_range(0, items);
+                // println!("[INITIAL INPUT] ({}, {})", user, item);
+                input.insert((user, item));
+            }
+        }
+
+        // load the initial data up!
+        while probe.less_than(input.time()) {
+            worker.step();
+        }
+
+        for round in 1.. {
+            for element in (round * batch)..((round + 1) * batch) {
+                if element % peers == index {
+                    // advance the input timestamp.
+                    input.advance_to(round * batch);
+                    // insert a new item.
+                    let user = rng1.gen_range(0, users);
+                    let item = rng1.gen_range(0, items);
+                    if noisy {
+                        println!("[INPUT: insert] ({}, {})", user, item);
+                    }
+                    input.insert((user, item));
+                    // remove an old item.
+                    let user = rng2.gen_range(0, users);
+                    let item = rng2.gen_range(0, items);
+                    if noisy {
+                        println!("[INPUT: remove] ({}, {})", user, item);
+                    }
+                    input.remove((user, item));
+                }
+            }
+
+            input.advance_to(round * batch);
+            input.flush();
+
+            while probe.less_than(input.time()) {
+                worker.step();
+            }
+        }
+    }).unwrap();
+}