(* streeeetttchhhhh *) open Bigarray open Printf let training_set_path = "/home/awwaiid/netflix/training_set" type movie = int type rating = int type user = int type user_rating = rating * user let movie_db = ref (Array.make 17771 []) (* let fd = Unix.openfile "trial.dat" [Unix.O_RDWR] 0 let arr = Array2.map_file fd int8_unsigned c_layout true 17770 3 *) let total_files = ref 0 let save_db_to_file filename = let outfile = open_out_bin filename in Marshal.to_channel outfile !movie_db []; close_out outfile let load_db_from_file filename = let infile = open_in_bin filename in movie_db := (Marshal.from_channel infile : (int * int) list array); close_in infile let load_file filename = total_files := !total_files + 1; printf "File number: %d\n" !total_files; let f = open_in (training_set_path ^ "/" ^ filename) in let movie_id_str = input_line f in let movie_id_str = String.sub movie_id_str 0 ((String.length movie_id_str) - 1) in let movie_id = int_of_string movie_id_str in try while true do let line = input_line f in (* if Random.int 1000 = 0 then begin *) let first_comma = String.index line ',' in let user_id_str = String.sub line 0 first_comma in let rating_str = String.sub line (first_comma + 1) 1 in let user_id = int_of_string user_id_str in let rating = int_of_string rating_str in (*let date = String.sub line (first_comma + 3) ((String.length line) - first_comma - 3) in *) !movie_db.(movie_id) <- ((user_id, rating) :: !movie_db.(movie_id)) (* Array2.set arr movie_id 0 user_id; Array2.set arr movie_id 1 rating; *) (*if Vec.length !db > movie_id then let current_ratings = Vec.get movie_id !db in let new_ratings = Vec.append (user_id, rating) current_ratings in db := Vec.setappend empty_ratings_list new_ratings movie_id !db else let current_ratings = Vec.empty in let new_ratings = Vec.append (user_id, rating) current_ratings in db := Vec.setappend empty_ratings_list new_ratings movie_id !db *) (* printf "Movie: %d User: %d\tRating: %d\n" movie_id user_id rating *) (* end *) done with End_of_file -> close_in f let load_directory dir = let dir = Unix.opendir dir in try while true do let filename = Unix.readdir dir in if filename = "." or filename = ".." then () else begin printf "Loading file: %s" filename; print_newline(); load_file filename; (* printf " total %d" (Array.length !db); *) print_newline(); if !total_files mod 1000 = 0 then begin save_db_to_file "smallset2.dat"; load_db_from_file "smallset2.dat"; Gc.full_major (); printf "Saved.\n" end end done with End_of_file -> () let _ = (* load_file "mv_0015124.txt"; *) load_directory training_set_path; let outfile = open_out_bin "smallset2.dat" in Marshal.to_channel outfile !movie_db []