#!/usr/bin/perl use Image::Size; @concepts = ("airplane", "airport", "animal", "apple", "architecture", "art", "autumn", "baby", "ball", "bar", "barcelona", "beach", "bear", "beauty", "beer", "berlin", "bicycle", "bird", "birthday", "blue", "boat", "book", "boy", "bridge", "brown", "building", "bus", "butterfly", "cake", "california", "camera", "candle", "car", "castle", "cat", "cathedral", "chair", "child", "chocolate", "christmas", "church", "city", "clock", "clouds", "coffee", "concert", "couple", "cow", "cross", "cute", "dance", "dinner", "dog", "doll", "door", "dress", "duck", "easter", "england", "europe", "eye", "feet", "field", "film", "fire", "fishing", "flower", "food", "football", "forest", "fountain", "france", "funny", "garden", "germany", "girl", "graffiti", "guitar", "hair", "halloween", "hand", "happy", "hat", "heart", "holiday", "horse", "hot", "house", "ice", "island", "italy", "kiss", "kitchen", "lake", "lamp", "leaves", "lighthouse", "london", "love", "man", "mirror", "model", "monument", "moon", "mountain", "museum", "newyork", "night", "painting", "paris", "park", "party", "pet", "pink", "pool", "portrait", "poster", "pub", "red", "river", "road", "rose", "sculpture", "sea", "selfportrait", "shoes", "shopping", "silhouette", "smile", "spain", "spider", "sport", "spring", "square", "statue", "storm", "street", "summer", "sunrise", "sunset", "table", "tower", "town", "train", "usa", "waterfall", "wedding", "window", "winter", "woman"); #@concepts = ("museum", "hot", "football", "statue", "lighthouse", "cake", "chocolate"); #@concepts = ("museum"); $ext_disk = "/media/disk/gender"; $loco = "flickr_gender"; open AW, "weights/all_unique"; @all = ; close AW; $excluded = " nb noiretblanc hdr bw blackandwhite blanc noir closeup "; foreach $con (@concepts) { $timo = localtime(); print "$con $timo\n"; $out_con = "$loco/$con"; if(!(-e $out_con)){mkdir $out_con;} $list_flickr = "flickr_metadata/$con"; $out_fem = "$out_con/female_rank"; $close_fem = "$out_con/close_female"; $close_mal = "$out_con/close_male"; if(!(-e $out_fem)) { $list_concepts = "$ext_disk/concepts/$con/female_weights_all"; $run = &rankGender($list_concepts, $list_flickr, $out_fem, $close_fem); } $out_mal = "$out_con/male_rank"; if(!(-e $out_mal)) { $list_concepts = "$ext_disk/concepts/$con/male_weights_all"; $run = &rankGender($list_concepts, $list_flickr, $out_mal, $close_mal); } $orig_res = "$out_con/original_rank"; if(!(-e $orig_res)) { open OUTO, ">$orig_res"; open MT, $list_flickr or die "cant open $meta_file\n"; @metas = ; close MT; @toRerank = (); $initRank = 0; foreach $linio (@metas) { if($linio =~ m/photo id/) { $initRank++; $owner = substr($linio, index($linio, 'owner="')+7, index($linio, '"', index($linio, 'owner="')+7) - (index($linio, 'owner="')+7)); $id = substr($linio, index($linio, 'o id="')+6, index($linio, '"', index($linio, 'o id="')+6) - (index($linio, 'o id="')+6)); $tags = substr($linio, index($linio, 'tags="')+6, index($linio, '"', index($linio, 'tags="')+6) - (index($linio, 'tags="')+6)); $secret = substr($linio, index($linio, 'secret="')+8, index($linio, '"', index($linio, 'secret="')+8) - (index($linio, 'secret="')+8)); $server = substr($linio, index($linio, 'server="')+8, index($linio, '"', index($linio, 'server="')+8) - (index($linio, 'server="')+8)); $farm = substr($linio, index($linio, 'farm="')+6, index($linio, '"', index($linio, 'farm="')+6) - (index($linio, 'farm="')+6)); $url = "http://farm$farm.static.flickr.com/$server/".$id."_$secret".".jpg"; print OUTO "$initRank\t$id\t$owner\txyz\t$tags\t$url\n"; } } close OUTO; } $html_file = "$out_con/$con".'.html'; #print "$html_file\n"; if(!(-e $html_file) || 1 == 1) { $run = &makeHTML($orig_res, $out_fem, $out_mal, $html_file, $con); #print "$con in html $html_file\n"; } } #routine for ranking with gender sub rankGender { $lst = $_[0]; $flickr = $_[1]; $out = $_[2]; $close_out = $_[3]; #print "$close_out\n"; if(!(-e $close_out)) { open LS, $lst or die "cant open $lst\n"; @close1 = ; close LS; @close_unranked = (); for($c1 = 0; $c1 <= 199; $c1++) { @cut1 = split(/\t/, @close1[$c1]); $cu = @cut1[2]; if(index($excluded, " $cu ") == -1) { $local_score = 0; $local_score2 = 0; $global_score = 0; $got = 0; $tr = 0; $splits = 2048; $min = 0; $max = 0; $search = 0; while($search <= $splits && $min == 0 && $max == 0) { $int = int(($search/$splits) * ($#all+1)); chomp(@all[$int]); @cut_tst = split(/\t/, @all[$int]); if(@cut_tst[0] gt $cu) { $min = int((($search-1)/$splits) * ($#all+1)); $max = $int; if($min < 0){$min = 0;} } $search++; } while($min <= $max && $got == 0) { chomp(@all[$min]); @cut_now = split(/ /, @all[$min]); if($cu eq @cut_now[0]) { $global_score = @cut_now[1]; } $min++; } $local_score = @cut1[0]; $local_score2 = @cut1[1]; $local_score =~ s/^0*//; $local_score2 =~ s/^0*//; $norm_local2 = &normalizer($local_score2); $log1 = &log2($local_score); $fraction = $log1*($local_score/$global_score); $fraction = $fraction/10; if(index($fraction, 'e-0') == -1){push(@close_unranked, "$fraction\t$norm_local2\t$cu\t$global_score");} #print "$fraction\t$norm_local2\t$cu\t$global_score\n"; } } @closos = reverse sort @close_unranked; $max = $#closos; #print "@closos\n"; if($max > 99){$max = 99;} open OUTC, ">$close_out" or die "cant open out $close_out\n"; for($cc = 0; $cc <= $max; $cc++) { print OUTC "@closos[$cc]\n"; } close OUTC; } else{print "exists $close_out\n";} open CLO, $close_out; @close = ; close CLO; #print "@close[0]@close[1]@close[2]\n"; open FL, $flickr or die "cant open fl $flickr\n"; @photos = ; close FL; @toRank = 0; $initRank = 0; $page = 1; for($pp = 0; $pp <= $#photos; $pp++) { if(index(@photos[$pp], 'page="') != -1) { $page = substr(@photos[$pp], index(@photos[$pp], 'page="')+6, index(@photos[$pp], '"', index(@photos[$pp], 'page="')+6) - (index(@photos[$pp], 'page="')+6)); #print "$page\n"; $counter = 0; @gots = (); } if(@photos[$pp] =~ m/photo id/) { $prop_all = 0; $initRank++; $tags = substr(@photos[$pp], index(@photos[$pp], 'tags="')+6, index(@photos[$pp], '"', index(@photos[$pp], 'tags="')+6) - (index(@photos[$pp], 'tags="')+6)); $owner = substr(@photos[$pp], index(@photos[$pp], 'owner="')+7, index(@photos[$pp], '"', index(@photos[$pp], 'owner="')+7) - (index(@photos[$pp], 'owner="')+7)); $id = substr(@photos[$pp], index(@photos[$pp], 'o id="')+6, index(@photos[$pp], '"', index(@photos[$pp], 'o id="')+6) - (index(@photos[$pp], 'o id="')+6)); $secret = substr(@photos[$pp], index(@photos[$pp], 'secret="')+8, index(@photos[$pp], '"', index(@photos[$pp], 'secret="')+8) - (index(@photos[$pp], 'secret="')+8)); $server = substr(@photos[$pp], index(@photos[$pp], 'server="')+8, index(@photos[$pp], '"', index(@photos[$pp], 'server="')+8) - (index(@photos[$pp], 'server="')+8)); $farm = substr(@photos[$pp], index(@photos[$pp], 'farm="')+6, index(@photos[$pp], '"', index(@photos[$pp], 'farm="')+6) - (index(@photos[$pp], 'farm="')+6)); $url = "http://farm$farm.static.flickr.com/$server/".$id."_$secret".".jpg"; $views = substr(@photos[$pp], index(@photos[$pp], 'views="')+7, index(@photos[$pp], '"', index(@photos[$pp], 'views="')+7) - (index(@photos[$pp], 'views="')+7)); $counter++; $pos = $page * 500 + $counter; $inv_pos = 99999999999 - $pos; @cut_tags = split(/ /, $tags); @ord_tags = sort @cut_tags; $tagos = ""; $found = 0; #make sure there is at least two tags - flickr recommendation... if($#cut_tags >= 1) { $text = " $tags "; $rank = 4; for($mm = 0; $mm <= $#close; $mm++) { chomp(@close[$mm]); @cut_close = split(/\t/, @close[$mm]); if($mm < 4){$rank = 4;} else{$rank = $mm;} if(index($text, " @cut_close[2] ") != -1 && $found < 3 && @gots[$mm] == 0) { @gots[$mm]++; $tagos = $tagos.' '.@cut_close[2]; $found++; @cut_close[0] =~ s/^1//; @cut_close[0] =~ s/^0*//; if(@cut_close[3] > 0) { $rank_contrib = 1/&log2($rank); $prop_all = $prop_all + $rank_contrib; } } } $prop_all = $prop_all; $score = $prop_all; $inv_tags = $views; if(index("@toRank", "$url") == -1 && length($tagos) > 0) { push(@toRank, "$score $inv_pos\t$id\t$owner\t$initRank\t$tagos\t$url"); } } } } @sorted = reverse sort @toRank; @usorted = (); open OUTP, ">$out"; for($ss = 0; $ss <= $#sorted; $ss++) { @cut_ss = split(/\t/, @sorted[$ss]); @cut_ss[4] =~ s/^ *//; @cut_ss[4] =~ s/ *$//; @cut_sorted = split(/ /, @cut_ss[4]); $new = 0; foreach $cs (@cut_sorted) { if(index(" @usorted ", " $cs ") == -1) { push(@usorted, $cs); $new++; } } if($new > 0) { #print "@sorted[$ss]\n"; print OUTP "@sorted[$ss]\n"; } } close OUTP;} sub normalizer { $ret = $_[0]; if(length($ret) == 8){$ret = '0'.$ret;} elsif(length($ret) == 7){$ret = '00'.$ret;} elsif(length($ret) == 6){$ret = '000'.$ret;} elsif(length($ret) == 5){$ret = '0000'.$ret;} elsif(length($ret) == 4){$ret = '00000'.$ret;} elsif(length($ret) == 3){$ret = '000000'.$ret;} elsif(length($ret) == 2){$ret = '0000000'.$ret;} elsif(length($ret) == 1){$ret = '00000000'.$ret;} return $ret; } #routine for creating HTML files with results sub makeHTML { $orig = $_[0]; $fem = $_[1]; $mal = $_[2]; $html_file = $_[3]; $titolo = $_[4]; $img_dir = "/mnt/user_survey/gender_concepts/flickr_images/$titolo"; open OUTHTML, ">$html_file" or die "cannot open $html_file\n"; print OUTHTML ''; print OUTHTML ''; print OUTHTML "Answers for $titolo"; print OUTHTML ''; print OUTHTML "\n"; print OUTHTML "

Query: $titolo

\n"; print OUTHTML "\n"; print OUTHTML ""; print OUTHTML ""; print OUTHTML ""; print OUTHTML ""; print OUTHTML "
"; print OUTHTML "
"; print OUTHTML ""; print OUTHTML "
"; print OUTHTML ""; #print the images for female results print OUTHTML ""; print OUTHTML ""; print OUTHTML ""; #print the images for male results print OUTHTML ""; print OUTHTML "
"; #print the images for original results print OUTHTML ""; print OUTHTML ""; open OR, $orig; @images_raw = ; close OR; $ret = 0; @images = (); $already = ' '; $tot = 0; while($ret < 20 && $tot <= $#images_raw) { chomp(@images_raw[$tot]); @cut_image = split(/\t/, @images_raw[$tot]); if(index($already, " @cut_image[4] ") == -1) { push(@images, @images_raw[$tot]); $already = $already." @cut_image[4] "; $ret++; } $tot++; } for($ll = 0; $ll < 5; $ll++) { $offset = $ll * 2; print OUTHTML ""; for($cols = 0; $cols < 2; $cols++) { $now = $offset+$cols; chomp(@images[$now]); @spl_answer = split(/\t/, @images[$now]); #print "@images[$now]\n"; $title = "$img_dir/@spl_answer[1]".'_'.@spl_answer[2].'.jpg'; @infos_image = imgsize($title); if ($infos_image[0] > $infos_image[1]){ $taille = 'width="160" ';} else{ $taille = 'height="160" ';} print OUTHTML ""; } print OUTHTML ""; } print OUTHTML "
original
"; print OUTHTML "

 

"; print OUTHTML ""; open OR, $fem; @images_raw = ; close OR; $ret = 0; $tot = 0; @images = (); $already = ' '; while($ret < 20 && $tot <= $#images_raw) { chomp(@images_raw[$tot]); @cut_image = split(/\t/, @images_raw[$tot]); if(index($already, " @cut_image[4] ") == -1) { push(@images, @images_raw[$tot]); $already = $already." @cut_image[4] "; $ret++; } $tot++; } for($ll = 0; $ll < 5; $ll++) { $offset = $ll * 2; print OUTHTML ""; for($cols = 0; $cols < 2; $cols++) { $now = $offset+$cols; chomp(@images[$now]); @spl_answer = split(/\t/, @images[$now]); #print "@spl_answer[4]\n"; $title = "$img_dir/@spl_answer[1]".'_'.@spl_answer[2].'.jpg'; @infos_image = imgsize($title); if ($infos_image[0] > $infos_image[1]){ $taille = 'width="160" ';} else{ $taille = 'height="160" ';} print OUTHTML ""; } print OUTHTML ""; } print OUTHTML "
female
"; print OUTHTML "

 

"; print OUTHTML ""; open OR, $mal; @images_raw = ; close OR; $ret = 0; $tot = 0; @images = (); $already = ' '; while($ret < 20 && $tot <= $#images_raw) { chomp(@images_raw[$tot]); @cut_image = split(/\t/, @images_raw[$tot]); if(index($already, " @cut_image[4] ") == -1) { push(@images, @images_raw[$tot]); $already = $already." @cut_image[4] "; $ret++; } $tot++; } for($ll = 0; $ll < 5; $ll++) { $offset = $ll * 2; print OUTHTML ""; for($cols = 0; $cols < 2; $cols++) { $now = $offset+$cols; chomp(@images[$now]); @spl_answer = split(/\t/, @images[$now]); $title = "$img_dir/@spl_answer[1]".'_'.@spl_answer[2].'.jpg'; @infos_image = imgsize($title); if ($infos_image[0] > $infos_image[1]){ $taille = 'width="160" ';} else{ $taille = 'height="160" ';} print OUTHTML ""; } print OUTHTML ""; } print OUTHTML "
male
"; print OUTHTML "
\n"; print OUTHTML ''; print OUTHTML ''; close OUTHTML; } #routine for log file sub log2 { my $n = shift; return (log($n)/log(2)); }