← how-to

k nucleotide

zo / tasks

program
fun encode(base: str) -> int {
  if base == "A" {
    0
  } else if base == "C" {
    1
  } else if base == "T" {
    2
  } else {
    3
  }
}

fun code_of(seq: str) -> int {
  mut code: int = 0;

  for i := 0..seq.len {
    code = code * 4 + encode(seq[i..i + 1]);
  }

  code
}

fun pad3(value: int) -> str {
  if value >= 100 {
    value.to_str()
  } else if value >= 10 {
    "0" ++ value.to_str()
  } else {
    "00" ++ value.to_str()
  }
}

fun occurrences(strand: str, pattern: str) -> int {
  mut count: int = 0;
  imu span: int =
    when strand.len >= pattern.len ? strand.len - pattern.len + 1 : 0;

  for start := 0..span {
    if strand[start..start + pattern.len] == pattern {
      count += 1;
    }
  }

  count
}

fun frequencies(strand: str, frame: int) {
  imu total: int = when strand.len >= frame ? strand.len - frame + 1 : 0;

  mut seqs: []str = [];
  mut counts: []int = [];
  mut codes: []int = [];

  for start := 0..total {
    imu seq: str = strand[start..start + frame];
    mut index: int = -1;

    for i := 0..seqs.len {
      if seqs[i] == seq {
        index = i;
      }
    }

    if index < 0 {
      seqs.push(seq);
      counts.push(1);
      codes.push(code_of(seq));
    } else {
      counts[index] += 1;
    }
  }

  for i := 0..seqs.len {
    mut best: int = i;

    for j := i + 1..seqs.len {
      imu better: bool = counts[j] > counts[best]
        || (counts[j] == counts[best] && codes[j] > codes[best]);

      if better {
        best = j;
      }
    }

    imu seq: str = seqs[i];
    imu count: int = counts[i];
    imu code: int = codes[i];

    seqs[i] = seqs[best];
    counts[i] = counts[best];
    codes[i] = codes[best];
    seqs[best] = seq;
    counts[best] = count;
    codes[best] = code;
  }

  for i := 0..seqs.len {
    imu seq: str = seqs[i];
    imu milli: int = (counts[i] * 100000 + total / 2) / total;
    imu whole: int = milli / 1000;
    imu frac: str = pad3(milli % 1000);

    showln("{seq} {whole}.{frac}");
  }
}

fun main() {
  mut strand: str = "";
  mut reading: bool = false;
  mut done: bool = false;

  while !done {
    match readln() {
      Result::Pass(line) => {
        imu text: str = line.trim();

        if text.len >= 6 && text[0..6] == ">THREE" {
          reading = true;
        } else if reading && text.len >= 1 && text[0..1] == ">" {
          done = true;
        } else if reading {
          strand = strand ++ text;
        }
      }
      Result::Fail(_) => {
        done = true;
      }
    }
  }

  frequencies(strand, 1);
  frequencies(strand, 2);

  imu patterns: []str = [
    "GGT",
    "GGTA",
    "GGTATT",
    "GGTATTTTAATT",
    "GGTATTTTAATTTATAGT",
  ];

  for i := 0..patterns.len {
    imu pattern: str = patterns[i];

    showln(occurrences(strand, pattern).to_str() ++ " " ++ pattern);
  }
}
output
T 37.500
G 27.083
A 22.917
C 12.500
TA 14.894
GT 12.766
TT 12.766
AT 12.766
GG 8.511
TG 8.511
GC 6.383
CG 4.255
CA 4.255
AG 4.255
AA 4.255
TC 2.128
CC 2.128
AC 2.128
4 GGT
4 GGTA
2 GGTATT
1 GGTATTTTAATT
1 GGTATTTTAATTTATAGT

reachout

echo -n 'dGhlQGNvbXBpbG9yZHMuaG91c2U=' | base64 --decode

For humans: faq.

For Ai agents: llms.txt (curated index) and llms-full.txt (full docs).

Privacy: No cookies, no ads, no tracking. It's like you were never here.