#!/usr/bin/perl use strict; use lib '.'; use AI::Genetic; my $debug = 0; my $globalCount = 0; my $best_score = -100000; $| = 1; # The idea here is to create a regex to extract some strings # from some text. You give it some examples, it gives you a # regex. Like magic. # # Basic Algorithm: # 1) use vars qw(@text @strs); $text[0] = "Account #AAAA, for BBBB CCCC"; $strs[0] = ['AAAA','BBBB','CCCC']; $text[1] = "Account #DDDD, for EEEE FFFF"; $strs[1] = ['DDDD', 'EEEE', 'FFFF']; $text[2] = "Account #2342345, for Euwen johnson"; $strs[2] = ['2342345','Euwen','johnson']; $text[3] = "Account #932924, for Bubbhamarie Joe"; $strs[3] = ['932924','Bubbhamarie','Joe']; my $ga = new AI::Genetic( -fitness => \&fitnessTest, -type => 'listvector', -mutation => 0.2, -crossover => 0.2, ); my $ops = ['(',')','.','a'..'z','A'..'Z','\\w','\\s','+','','|','?','*',"\n","#", ('') x 500 ]; $ga->init([ ($ops) x 50 ]); # $ga->inject(100, # [split('',"Account #(....), for (BBBB) (CCCC)"),''x15], # [split('',"Account #(AAAA), for (....) (CCCC)"),''x15], # [split('',"Account #(AAAA), for (BBBB) (....)"),''x15], # [split('',"Account #(....), for (....) (....)"),''x15], # ); #print "Test func fitness: " . fitnessTest( # [split('',"Account #(AAAA), for (BBBB) (CCCC)"),''x15] #) . "\n"; #print "Test func fitness: " . fitnessTest( # [split('',"ok+EA(bR\wuqU)TgYTrvxh+ZBYFgXENyfP+t\wVDfcAqPiHEg||n"),''x15] #) . "\n"; #exit; #$ga->evolve('rouletteSinglePoint', 1000); $ga->evolve('tournamentSinglePoint', 10000); print "Done evolving.\n"; print "Best score = ", $ga->getFittest->score, ".\n"; my (@genes) = $ga->getFittest->genes(); print "Best individual = /" . join('',@genes) . "/\n"; sub fitnessTest { my $i = shift; my $re = join '', @$i; $globalCount++; print $globalCount . " Trying /^$re\$/... \n" if $debug; my $score = 0; for(my $i=0; $i < $#text+1 ; $i++) { my @results; my $got_match; my $local_score = 0; eval { local $SIG{ALRM} = sub { die "alarm\n" }; alarm 2; if($text[$i] =~ /^$re$/x) { $got_match = 1; @results = $text[$i] =~ /^$re$/; } alarm 0; }; if($@) { # Invalid regex, lose 1000 points $local_score += -1000; } else { my @goal = @{$strs[$i]}; # 50 points if they return the correct number of responses # Then minus 10 points for how far off they are my $how_close = abs($#goal - $#results); #$how_close = $how_close > 5 ? 5 : $how_close; print " H:$how_close ($#goal - $#results)" if $debug; $local_score += 500 - (100 * $how_close); # then, for each of their responses we see how many actually match what # we want (who cares about order?) for(my $j = 0 ; $j < scalar @goal ; $j++) { if($goal[$j] eq $results[$j]) { $local_score += 10000; } if(!defined($results[$j])) { $local_score -= 1000; } } } # -1 point for each character in the regex (shorter is better) $local_score += -5 * (length $re); print " Len:" . (-5 * (length $re)) if $debug; # Super-penalty for blank | matches # if($#results == 1 && $results[0] == "1") { # $local_score += -1000; # } print " L:$local_score" if $debug; $score += $local_score; } print " S:$score.\n" if $debug; if($score > $best_score) { $best_score = $score; print $globalCount . " Best /^$re\$/ ($score)\n"; } return $score; }