(* ./build && ./cispan.native *) (** cispan - Sequential Pattern Mining Longer description goes here. *) open Printf open Util open BatStd let rec is_frequent_item min_freq i = function | [] -> false | seq::remaining_sequences -> let c = Sequence.item_count_in_sequence i seq in if c >= min_freq then true else is_frequent_item (min_freq - c) i remaining_sequences (* let dumpSequence s = printf "Dump: %s\n" (dump s) *) let rec explore_item prefix_seq db min_sup lattice item = () (* Calculate seq_i and seq_s using prefix_seq and item *) (* For seq_i and seq_s as seq *) (* Create projected DB for seq and min_sup (ProjDB) *) (* Get list of frequent items in ProjDB *) (* Prune ProjDB *) (* if ProjDB is not empty, incCloSpan seq ProjDB min_sup lattice freq_items *) and incCloSpan db min_sup lattice sequence = let lattice = PrefixTree.insert lattice sequence in lattice (* match frequent_items with | [] -> lattice | item::remaining_items -> let lattice = PrefixTree.insert lattice prefix_seq item in let lattice = explore_item prefix_seq db min_sup lattice item in lattice *) let item_to_sequence item = [[item]], false let cross_module inserts updates min_sup lattice = let frequent_items = Database.find_frequent_items_combined inserts updates min_sup in print frequent_items; let db = Database.prune_and_merge_infrequent_items inserts updates frequent_items in print db; let initial_sequences = List.map item_to_sequence frequent_items in let lattice = List.fold_left (incCloSpan db min_sup) lattice initial_sequences in lattice (* Note: db_s = database projected over sequence *) let cloSpan db_s min_sup lattice sequence = let lattice = PrefixTree.insert lattice sequence in lattice (* let rec closed_mining' db min_sup lattice sequences = match sequences with | [] -> lattice | sequence::remaining_sequences -> let db_s = Database.project db min_sup sequence in let lattice = cloSpan db_s min_sup lattice sequence in closed_mining' db min_sup lattice remaining_sequences *) let initial_lattice db min_support = let lattice = Lattice.initial_lattice db in let frequent_items = Database.find_frequent_items !db min_support in List.iter (fun item -> let db' = Database.project !db min_support ([[item]],Sequence.Complete) in let _ = Lattice.add_child lattice { Lattice.item = item; Lattice.extension = Lattice.S_extension; Lattice.support = Database.size db'; Lattice.projected_db = ref db'; Lattice.children = []; } in () ) frequent_items; lattice let expand_lattice min_support lattice_ref = let lattice = !lattice_ref in (* Lattice.pretty_print lattice; *) let frequent_items = Database.find_frequent_items !(lattice.Lattice.projected_db) min_support in printf "Got frequent items (%d)" (List.length frequent_items); print_newline(); let item_projections = Database.project_over !(lattice.Lattice.projected_db) frequent_items in printf "Got item projectsion"; print_newline(); List.iter (fun ((item,is_start_item),(support,projection)) -> if support >= min_support then begin printf "Adding projected DB for item %d with support %d:\n" item support; print_newline(); (* Database.dump_db projection; *) Lattice.add_child lattice { Lattice.item = item; Lattice.extension = (if is_start_item then Lattice.S_extension else Lattice.I_extension); Lattice.support = support; Lattice.projected_db = ref projection; Lattice.children = []; } end else () ) item_projections; ref lattice let rec expand_children min_support lattice_ref = let lattice = !lattice_ref in if lattice.Lattice.children = [] then () else lattice.Lattice.children <- List.map (expand_lattice min_support) lattice.Lattice.children; lattice.Lattice.projected_db := []; let _ = List.map (expand_children min_support) lattice.Lattice.children in () (** Implement the main closed mining algorithm * * This is lovely *) let closed_mining db min_support = (* let frequent_items = Database.find_frequent_items db min_sup in print frequent_items; let db = Database.prune_infrequent_items' db frequent_items in print db; let initial_sequences = List.map item_to_sequence frequent_items in *) (* let lattice = initial_lattice db min_support in Lattice.pretty_print lattice; *) let lattice = Lattice.initial_lattice db in printf "Doing initial projection...\n"; print_newline(); let lattice = !(expand_lattice min_support (ref lattice)) in printf "Expanding children...\n"; print_newline(); expand_children min_support (ref lattice); (* Now expand each child level *) (* lattice.Lattice.children <- List.map (expand_lattice min_support) lattice.Lattice.children; *) (* let lattice = Lattice.to_closed_sequences lattice in *) lattice (* Main entrypoint * Read parameters, load data, analyze, output results *) let _ = printf "CISPAN+\n"; (* Gc.set { (Gc.get ()) with Gc.verbose = 0x01 }; *) if Array.length Sys.argv != 3 then printf "Usage: cispan \n" else begin let filename = Sys.argv.(1) in let min_support = int_of_string Sys.argv.(2) in printf "Loading DB..."; flush stdout; let db = Database.load_from_ascii filename in printf "done\n"; flush stdout; (* Create the first level of lattice *) (* let lattice = initial_lattice db min_support in *) let lattice = closed_mining db min_support in printf "New lattice:\n"; Lattice.pretty_print lattice; printf "Sequences:\n"; Lattice.print_sequences lattice; (* db := Database.prune_infrequent_items !db min_support; printf "Database:\n"; Database.dump_db !db; printf "\n\n"; let db' = Database.project !db min_support ([[3]],Sequence.Complete) in printf "Un-pruned Projected Database:\n"; Database.dump_db db'; let db'' = Database.prune_infrequent_items db' min_support in printf "Projected Database:\n"; Database.dump_db db''; printf "\n\n"; *) (* The initial lattice contains just a dummy item, and the entire DB is the projection *) (* let lattice_0 = cross_module !db [] min_support empty_lattice in *) (* let lattice_0 = closed_mining !db min_support PrefixTree.empty_lattice in *) (* let lattice_0 = PrefixTree.insert lattice_0 [[1;2];[3;4]] in let lattice_0 = PrefixTree.insert lattice_0 [[1;2];[3;5]] in *) (* dump lattice_0; *) (* PrefixTree.dumpTree lattice_0; *) () end