k nucleotide
zo / tasks
program
fun encode(base: str) -> int { if base == "A" { 0 } else if base == "C" { 1 } else if base == "T" { 2 } else { 3 } } fun code_of(seq: str) -> int { mut code: int = 0; for i := 0..seq.len { code = code * 4 + encode(seq[i..i + 1]); } code } fun pad3(value: int) -> str { if value >= 100 { value.to_str() } else if value >= 10 { "0" ++ value.to_str() } else { "00" ++ value.to_str() } } fun occurrences(strand: str, pattern: str) -> int { mut count: int = 0; imu span: int = when strand.len >= pattern.len ? strand.len - pattern.len + 1 : 0; for start := 0..span { if strand[start..start + pattern.len] == pattern { count += 1; } } count } fun frequencies(strand: str, frame: int) { imu total: int = when strand.len >= frame ? strand.len - frame + 1 : 0; mut seqs: []str = []; mut counts: []int = []; mut codes: []int = []; for start := 0..total { imu seq: str = strand[start..start + frame]; mut index: int = -1; for i := 0..seqs.len { if seqs[i] == seq { index = i; } } if index < 0 { seqs.push(seq); counts.push(1); codes.push(code_of(seq)); } else { counts[index] += 1; } } for i := 0..seqs.len { mut best: int = i; for j := i + 1..seqs.len { imu better: bool = counts[j] > counts[best] || (counts[j] == counts[best] && codes[j] > codes[best]); if better { best = j; } } imu seq: str = seqs[i]; imu count: int = counts[i]; imu code: int = codes[i]; seqs[i] = seqs[best]; counts[i] = counts[best]; codes[i] = codes[best]; seqs[best] = seq; counts[best] = count; codes[best] = code; } for i := 0..seqs.len { imu seq: str = seqs[i]; imu milli: int = (counts[i] * 100000 + total / 2) / total; imu whole: int = milli / 1000; imu frac: str = pad3(milli % 1000); showln("{seq} {whole}.{frac}"); } } fun main() { mut strand: str = ""; mut reading: bool = false; mut done: bool = false; while !done { match readln() { Result::Pass(line) => { imu text: str = line.trim(); if text.len >= 6 && text[0..6] == ">THREE" { reading = true; } else if reading && text.len >= 1 && text[0..1] == ">" { done = true; } else if reading { strand = strand ++ text; } } Result::Fail(_) => { done = true; } } } frequencies(strand, 1); frequencies(strand, 2); imu patterns: []str = [ "GGT", "GGTA", "GGTATT", "GGTATTTTAATT", "GGTATTTTAATTTATAGT", ]; for i := 0..patterns.len { imu pattern: str = patterns[i]; showln(occurrences(strand, pattern).to_str() ++ " " ++ pattern); } }
output
T 37.500 G 27.083 A 22.917 C 12.500 TA 14.894 GT 12.766 TT 12.766 AT 12.766 GG 8.511 TG 8.511 GC 6.383 CG 4.255 CA 4.255 AG 4.255 AA 4.255 TC 2.128 CC 2.128 AC 2.128 4 GGT 4 GGTA 2 GGTATT 1 GGTATTTTAATT 1 GGTATTTTAATTTATAGT