1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
//@ # The CodeMap
//@
//@ A `CodeMap` gives you a central mapping from spans to their location in the
//@ group of files being analysed.
//@
//@ As usual, lets add in a couple imports and module-level documentation.

//! A mapping from arbitrary locations and sections of source code to their
//! contents.

use std::collections::HashMap;
use std::ops::Range;
use std::rc::Rc;
use std::cmp;
use std::cell::RefCell;
use std::sync::atomic::{AtomicUsize, Ordering};
use lex::{Token, TokenKind};

//@ We start off with a `Span`. This is really just a wrapper around an integer,
//@ with the assumption that a span will **always** correspond to something in
//@ the `CodeMap`. This means using a span from one `CodeMap` with another will
//@ result in a panic if you are lucky, or silently give you garbage.

/// A unique identifier pointing to a substring in some file.
///
/// To get back the original string this points to you'll need to look it up
/// in a `CodeMap` or `FileMap`. 
#[derive(Copy, Clone, Debug, PartialEq, Hash, Eq, Serialize, Deserialize)]
pub struct Span(usize);

impl Span {
    /// Returns the special "dummy" span, which matches anything. This should
    /// only be used internally to make testing easier.
    pub(crate) fn dummy() -> Span {
        Span(0)
    }
}

//@ For our purposes, the `CodeMap` will just contain a list of `FileMap`s. These
//@ keep track of their name, contents, and the mapping of spans to locations in
//@ that content.

/// A mapping of `Span`s to the files in which they are located.
#[derive(Debug)]
pub struct CodeMap {
    next_id: Rc<AtomicUsize>,
    files: Vec<Rc<FileMap>>,
}

/// A mapping which keeps track of a file's contents and allows you to cheaply
/// access substrings of the original content.
#[derive(Clone, Debug)]
pub struct FileMap {
    name: String,
    contents: String,
    next_id: Rc<AtomicUsize>,
    items: RefCell<HashMap<Span, Range<usize>>>
}

//@ The codemap has a couple useful methods for adding new files and looking up the
//@ string corresponding to a span.

impl CodeMap {
    /// Create a new, empty `CodeMap`.
    pub fn new() -> CodeMap {
        let next_id = Rc::new(AtomicUsize::new(1));
        let files = Vec::new();
        CodeMap { next_id, files }
    }

    /// Add a new file to the `CodeMap` and get back a reference to it.
    pub fn insert_file<C, F>(&mut self, filename: F, contents: C) -> Rc<FileMap> 
    where F: Into<String>,
          C: Into<String>,
    {
        let filemap = FileMap {
            name: filename.into(),
            contents: contents.into(),
            items: RefCell::new(HashMap::new()),
            next_id: Rc::clone(&self.next_id),
        };
        let fm = Rc::new(filemap);
        self.files.push(Rc::clone(&fm));

        fm
    }

    /// Get the substring that this `Span` corresponds to.
    pub fn lookup(&self, span: Span) -> &str {
        for filemap in &self.files {
            if let Some(substr) = filemap.lookup(span) {
                return substr;
            }
        }

        panic!("Tried to lookup {:?} but it wasn't in any \
            of the FileMaps... This is a bug!", span)
    }

    /// The files that this `CodeMap` contains.
    pub fn files(&self) -> &[Rc<FileMap>] {
        self.files.as_slice()
    }
}

impl Default for CodeMap {
    fn default() -> CodeMap {
        CodeMap::new()
    }
}

//@ You may have noticed that `FileMap` contains a `RefCell<HashMap<_>>`. This is 
//@ because we want to pass around multiple pointers to a file mapping, yet still
//@ be able to add new spans if we want to. It also contains a reference to the
//@ parent `CodeMap`'s counter so when we insert new spans into the `FileMap` 
//@ they'll still get globally unique IDs.

impl FileMap {
    /// Get the name of this `FileMap`.
    pub fn filename(&self) -> &str {
        &self.name
    }

    /// Get the entire content of this file.
    pub fn contents(&self) -> &str {
        &self.contents
    }

    /// Lookup a span in this `FileMap`.
    ///
    /// # Panics
    ///
    /// If the `FileMap`'s `items` hashmap contains a span, but that span 
    /// **doesn't** point to a valid substring this will panic. If you ever
    /// get into a situation like this then things are almost certainly FUBAR.
    pub fn lookup(&self, span: Span) -> Option<&str> {
        let range = match self.range_of(span) {
            Some(r) => r,
            None => return None,
        };

        match self.contents.get(range.clone()) {
            Some(substr) => Some(substr),
            None => panic!("FileMap thinks it contains {:?}, \
                but the range ({:?}) doesn't point to anything valid!", span, range),
        }
    }

    /// Get the range corresponding to this span.
    pub fn range_of(&self, span: Span) -> Option<Range<usize>> {
        self.items.borrow().get(&span).cloned() 
    }
}

//@ Users can freely add new spans to a `FileMap`, to do this we'll take in the 
//@ start and end indices, create a new span ID by incrementing our counter, then
//@ we insert the new span and range into the `items`. In debug builds we'll do 
//@ bounds checks, but it's an assumption that the `start` and `end` indices are
//@ both within bounds, and lie on valid codepoint boundaries.

impl FileMap {
    /// Ask the `FileMap` to give you the span corresponding to the half-open
    /// interval `[start, end)`.
    ///
    /// # Panics
    ///
    /// In debug mode, this will panic if either `start` or `end` are outside
    /// the source code or if they don't lie on a codepoint boundary.
    ///
    /// It is assumed that the `start` and `indices` were originally obtained
    /// from the file's contents.
    pub fn insert_span(&self, start: usize, end: usize) -> Span {
        debug_assert!(self.contents.is_char_boundary(start), 
            "Start doesn't lie on a char boundary");
        debug_assert!(self.contents.is_char_boundary(end), 
            "End doesn't lie on a char boundary");
        debug_assert!(start < self.contents.len(), 
            "Start lies outside the content string");
        debug_assert!(end <= self.contents.len(), 
            "End lies outside the content string");

        let range = start..end;

        if let Some(existing) = self.reverse_lookup(&range) {
            return existing;
        }

        let span_id = self.next_id.fetch_add(1, Ordering::Relaxed);
        let span = Span(span_id);

        self.items.borrow_mut().insert(span, range);
        span
    }

    /// We don't want to go and add duplicate spans unnecessarily so we 
    /// iterate through all existing ranges to see if this one already
    /// exists. 
    fn reverse_lookup(&self, needle: &Range<usize>) -> Option<Span> {
        self.items.borrow()
            .iter()
            .find(|&(_, range)| range == needle)
            .map(|(span, _)| span)
            .cloned()
    }

    /// Merge two spans to get the span which includes both.
    ///
    /// As usual, the constraints from `insert_span()` also apply here. If
    /// you try to enter two spans from different `FileMap`s, it'll panic.
    pub fn merge(&self, first: Span, second: Span) -> Span {
        let range_1 = self.range_of(first).expect("Can only merge spans from the same FileMap");
        let range_2 = self.range_of(second).expect("Can only merge spans from the same FileMap");

        let start = cmp::min(range_1.start, range_2.start);
        let end = cmp::max(range_1.end, range_2.end);

        self.insert_span(start, end)
    }
}

//@ To help after the tokenizing step, lets add a method which will take a bunch
//@ of tokens and register them with a `FileMap`. The same caveats as with 
//@ `insert_span()` will apply here.

impl FileMap {
    /// Register a set of tokenized inputs and turn them into a proper stream
    /// of tokens. Note that all the caveats from `insert_span()` also apply 
    /// here.
    pub fn register_tokens(&self, tokens: Vec<(TokenKind, usize, usize)>) -> Vec<Token> {
        let mut registered = Vec::new();

        for (kind, start, end) in tokens {
            let span = self.insert_span(start, end);
            let token = Token::new(span, kind);
            registered.push(token);
        }

        registered
    }
}

//@ To test that our `CodeMap` and `FileMap` behave as we expect them to, let's
//@ create some dummy "files" and try to create spans in them.

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn insert_a_file_into_a_codemap() {
        let mut map = CodeMap::new();
        let filename = "foo.rs";
        let content = "Hello World!";

        assert_eq!(map.files.len(), 0);
        let fm = map.insert_file(filename, content);

        assert_eq!(fm.filename(), filename);
        assert_eq!(fm.contents(), content);
        assert_eq!(map.files.len(), 1);
    }

    #[test]
    fn get_span_for_substring() {
        let mut map = CodeMap::new();
        let src = "Hello World!";
        let fm = map.insert_file("foo.rs", src);

        let start = 2;
        let end = 5;
        let should_be = &src[start..end];

        let span = fm.insert_span(start, end);
        let got = fm.lookup(span).unwrap();
        assert_eq!(got, should_be);
        assert_eq!(fm.range_of(span).unwrap(), start..end);

        let got_from_codemap = map.lookup(span);
        assert_eq!(got_from_codemap, should_be);
    }

    #[test]
    fn spans_for_different_ranges_are_always_unique() {
        let mut map = CodeMap::new();
        let src = "Hello World!";
        let fm = map.insert_file("foo.rs", src);

        let mut spans = Vec::new();

        for start in 0..src.len() {
            for end in start..src.len() {
                let span = fm.insert_span(start, end);
                assert!(!spans.contains(&span), 
                    "{:?} already contains {:?} ({}..{})", 
                    spans, span, start, end);
                assert!(span != Span::dummy());

                spans.push(span);
            }
        }
    }

    #[test]
    fn spans_for_identical_ranges_are_identical() {
        let mut map = CodeMap::new();
        let src = "Hello World!";
        let fm = map.insert_file("foo.rs", src);

        let start = 0;
        let end = 5;

        let span_1 = fm.insert_span(start, end);
        let span_2 = fm.insert_span(start, end);

        assert_eq!(span_1, span_2);
    }

    #[test]
    fn join_multiple_spans() {
        let mut map = CodeMap::new();
        let src = "Hello World!";
        let fm = map.insert_file("foo.rs", src);

        let span_1 = fm.insert_span(0, 2);
        let span_2 = fm.insert_span(3, 8);

        let joined = fm.merge(span_1, span_2);
        let equivalent_range = fm.range_of(joined).unwrap();

        assert_eq!(equivalent_range.start, 0);
        assert_eq!(equivalent_range.end, 8);
    }
}