omega/transform.sh

66 lines
1.7 KiB
Bash
Executable File

#!/bin/bash
FILE_PATH="./1000_words.txt"
OUT_FILE_PATH="./out.txt"
vowels="aeiouy"
printf "previous_5,previous_4,previous_3,previous_2,previous_1,current,is_start,previous_type,word_length\n" > "$OUT_FILE_PATH"
while read -r line; do
prev_5=""
prev_4=""
prev_3=""
prev_2=""
prev_1=""
for (( i=0; i<"${#line}"; i++ )); do
word_length="$((i + 1))"
curr="${line:$i:1}"
# Convert all to lowercase
curr_lower=$(echo "$curr" | tr 'A-Z' 'a-z')
p1_lower=$(echo "$prev_1" | tr 'A-Z' 'a-z')
p2_lower=$(echo "$prev_2" | tr 'A-Z' 'a-z')
p3_lower=$(echo "$prev_3" | tr 'A-Z' 'a-z')
p4_lower=$(echo "$prev_4" | tr 'A-Z' 'a-z')
p5_lower=$(echo "$prev_5" | tr 'A-Z' 'a-z')
# Convert to ASCII values (default to 0 if empty)
val_p5=0; [ -n "$p5_lower" ] && val_p5=$(printf "%d" "'$p5_lower")
val_p4=0; [ -n "$p4_lower" ] && val_p4=$(printf "%d" "'$p4_lower")
val_p3=0; [ -n "$p3_lower" ] && val_p3=$(printf "%d" "'$p3_lower")
val_p2=0; [ -n "$p2_lower" ] && val_p2=$(printf "%d" "'$p2_lower")
val_p1=0; [ -n "$p1_lower" ] && val_p1=$(printf "%d" "'$p1_lower")
val_curr=$(printf "%d" "'$curr_lower")
# Determine if this is the start of the word
is_start=0
[ "$i" -eq 0 ] && is_start=1
# Determine if prev_1 is vowel or consonant
if [[ "$p1_lower" =~ ^[a-z]$ ]]; then
if [[ "$vowels" == *"$p1_lower"* ]]; then
prev_type="1"
else
prev_type="2"
fi
else
prev_type="0"
fi
# Output CSV line
printf "%d,%d,%d,%d,%d,%d,%d,%d,%d\n" \
"$val_p5" "$val_p4" "$val_p3" "$val_p2" "$val_p1" "$val_curr" \
"$is_start" "$prev_type" "$word_length" \
>> "$OUT_FILE_PATH"
# Shift history
prev_5="$prev_4"
prev_4="$prev_3"
prev_3="$prev_2"
prev_2="$prev_1"
prev_1="$curr"
done
done < "$FILE_PATH"