(** A sequence encodes both raw sequences from a sequence database, and also * sequential patterns. *) open Printf (** Flag to indicate how to treat this sequence, especially when splitting it further. *) type is_partial = | Partial (** Indicates that the first itemset of the sequence is partial. *) | Complete (** The first itemset in the sequence is complete. *) (** A sequence is a (possibly partial) list of itemsets. *) type t = (Itemset.t list) * is_partial let is_empty sequence = let itemsets, _ = sequence in itemsets = [] (** Count how many times the given item appears in this sequence. *) let rec item_count_in_sequence i = function | [] -> 0 | (itemset,is_partial)::remaining_itemsets -> let count = List.length (List.filter (fun n -> n == i) itemset) in count + (item_count_in_sequence i remaining_itemsets) let rec sequence_of_nums = function | [] -> [] | n::t -> let seq, rest = Util.split_at_n t n in let seq = BatList.sort seq in seq :: (sequence_of_nums rest) (** Construct a sequence by reading directly from a channel. * This currently only knows how to deal with ascii definitions. *) let read_sequence channel = let line = input_line channel in let nums = Str.split (Str.regexp " ") line in let nums = List.map int_of_string nums in let nums = List.tl nums in (* Drop the first one, it's redundant *) let sequence = (sequence_of_nums nums), Complete in sequence let rec find_item_counts' itemsets item_count_hash = match itemsets with | [] -> item_count_hash | itemset::remaining_itemsets -> let item_count_hash = Itemset.find_item_counts itemset item_count_hash in find_item_counts' remaining_itemsets item_count_hash let find_item_counts sequence item_count_hash = let itemsets, is_partial = sequence in find_item_counts' itemsets item_count_hash let rec prune_infrequent_items' itemsets frequent_items = match itemsets with | [] -> [] | itemset::remaining_itemsets -> let itemset = Itemset.prune_infrequent_items itemset frequent_items in if itemset = [] then prune_infrequent_items' remaining_itemsets frequent_items else itemset::(prune_infrequent_items' remaining_itemsets frequent_items) let prune_infrequent_items sequence frequent_items = let itemsets, is_partial = sequence in (* if the first itemset is Complete, then we can start prunnin! *) if is_partial = Complete then (prune_infrequent_items' itemsets frequent_items), Complete (* Otherwise, we must handle the first itemset specially to decide if the result is Complete or Intermediate *) else match itemsets with | [] -> sequence | start_item::remaining_itemsets -> let start_item = Itemset.prune_infrequent_items start_item frequent_items in if start_item = [] then (prune_infrequent_items' remaining_itemsets frequent_items), Complete else (start_item::(prune_infrequent_items' remaining_itemsets frequent_items)), Partial (* let sequence = Sequence.prune_infrequent_items sequence frequent_items in *) let last_item sequence = let seq, is_partial = sequence in let last_itemset = BatList.last seq in let last_item = BatList.last last_itemset in last_item let to_string seq = let s, is_partial = seq in let inner_str = ( Util.join "" ( List.map ( fun i -> "(" ^ ( Util.join " " ( List.map string_of_int i)) ^ ")" ) s)) in "<" ^ (if is_partial = Partial then "_" else "") ^ inner_str ^ ">" let dumpSequence seq = let s, is_partial = seq in printf "<"; if is_partial = Partial then printf "_" else printf ""; print_string ( Util.join "" ( List.map ( fun i -> "(" ^ ( Util.join " " ( List.map string_of_int i)) ^ ")" ) s)); printf ">\n" (** Given an item and some itemsets, provide a list of all the possible splits that we can do. Also indicate if the split left a partial itemset *) let rec partial_split_at' allow_ending itemsets split_item = match itemsets with | [] -> [] | itemset::remaining_itemsets -> let matched, items_after = Itemset.items_after itemset split_item in if matched then begin if items_after = [] then begin if remaining_itemsets = [] then [] (* This is the last item in the last itemset *) else begin if allow_ending then (remaining_itemsets, Complete)::(partial_split_at' false remaining_itemsets split_item) else (partial_split_at' false remaining_itemsets split_item) end end else ((items_after::remaining_itemsets), Partial) ::(partial_split_at' allow_ending remaining_itemsets split_item) end else partial_split_at' allow_ending remaining_itemsets split_item let partial_split_at sequence split_item = let itemsets, is_partial = sequence in partial_split_at' true itemsets split_item (** Given an item and some itemsets, provide a list of all the possible splits that we can do. *) let rec split_at' itemsets split_item = match itemsets with | [] -> [] | itemset::remaining_itemsets -> let matched, items_after = Itemset.items_after itemset split_item in if matched then begin if items_after = [] then begin if remaining_itemsets = [] then [] (* This is the last item in the last itemset *) else (remaining_itemsets, Complete)::(split_at' remaining_itemsets split_item) end else ((items_after::remaining_itemsets), Partial) ::(split_at' remaining_itemsets split_item) end else split_at' remaining_itemsets split_item let split_at sequence split_item = let itemsets, is_partial = sequence in split_at' itemsets split_item let rec get_subsequence' start_item itemsets split_item = match itemsets with | [] -> ([], Complete), true | itemset::remaining_itemsets -> let matched, items_after = Itemset.items_after itemset split_item in if matched then begin if items_after = [] then (remaining_itemsets, Complete), start_item else ((items_after::remaining_itemsets), Partial), start_item end else get_subsequence' true remaining_itemsets split_item let get_subsequence sequence item = let itemsets, is_partial = sequence in let start_item = (if is_partial = Partial then false else true) in get_subsequence' start_item itemsets item (* match itemsets with | [] -> [] | start_item::remaining_itemsets -> if is_partial = Partial then [] else *) (* let partial_matches = if is_partial = Partial then split_at_partial itemsets last_item else []; *)