#!/bin/bash FILE_PATH="./1000_words.txt" OUT_FILE_PATH="./out.txt" vowels="aeiouy" printf "previous_5,previous_4,previous_3,previous_2,previous_1,current,is_start,previous_type,word_length\n" > "$OUT_FILE_PATH" while read -r line; do prev_5="" prev_4="" prev_3="" prev_2="" prev_1="" for (( i=0; i<"${#line}"; i++ )); do word_length="$((i + 1))" curr="${line:$i:1}" # Convert all to lowercase curr_lower=$(echo "$curr" | tr 'A-Z' 'a-z') p1_lower=$(echo "$prev_1" | tr 'A-Z' 'a-z') p2_lower=$(echo "$prev_2" | tr 'A-Z' 'a-z') p3_lower=$(echo "$prev_3" | tr 'A-Z' 'a-z') p4_lower=$(echo "$prev_4" | tr 'A-Z' 'a-z') p5_lower=$(echo "$prev_5" | tr 'A-Z' 'a-z') # Convert to ASCII values (default to 0 if empty) val_p5=0; [ -n "$p5_lower" ] && val_p5=$(printf "%d" "'$p5_lower") val_p4=0; [ -n "$p4_lower" ] && val_p4=$(printf "%d" "'$p4_lower") val_p3=0; [ -n "$p3_lower" ] && val_p3=$(printf "%d" "'$p3_lower") val_p2=0; [ -n "$p2_lower" ] && val_p2=$(printf "%d" "'$p2_lower") val_p1=0; [ -n "$p1_lower" ] && val_p1=$(printf "%d" "'$p1_lower") val_curr=$(printf "%d" "'$curr_lower") # Determine if this is the start of the word is_start=0 [ "$i" -eq 0 ] && is_start=1 # Determine if prev_1 is vowel or consonant if [[ "$p1_lower" =~ ^[a-z]$ ]]; then if [[ "$vowels" == *"$p1_lower"* ]]; then prev_type="1" else prev_type="2" fi else prev_type="0" fi # Output CSV line printf "%d,%d,%d,%d,%d,%d,%d,%d,%d\n" \ "$val_p5" "$val_p4" "$val_p3" "$val_p2" "$val_p1" "$val_curr" \ "$is_start" "$prev_type" "$word_length" \ >> "$OUT_FILE_PATH" # Shift history prev_5="$prev_4" prev_4="$prev_3" prev_3="$prev_2" prev_2="$prev_1" prev_1="$curr" done done < "$FILE_PATH"