66 lines
1.7 KiB
Bash
Executable File
66 lines
1.7 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
FILE_PATH="./1000_words.txt"
|
|
OUT_FILE_PATH="./out.txt"
|
|
vowels="aeiouy"
|
|
|
|
|
|
printf "previous_5,previous_4,previous_3,previous_2,previous_1,current,is_start,previous_type,word_length\n" > "$OUT_FILE_PATH"
|
|
|
|
while read -r line; do
|
|
prev_5=""
|
|
prev_4=""
|
|
prev_3=""
|
|
prev_2=""
|
|
prev_1=""
|
|
|
|
for (( i=0; i<"${#line}"; i++ )); do
|
|
word_length="$((i + 1))"
|
|
curr="${line:$i:1}"
|
|
|
|
# Convert all to lowercase
|
|
curr_lower=$(echo "$curr" | tr 'A-Z' 'a-z')
|
|
p1_lower=$(echo "$prev_1" | tr 'A-Z' 'a-z')
|
|
p2_lower=$(echo "$prev_2" | tr 'A-Z' 'a-z')
|
|
p3_lower=$(echo "$prev_3" | tr 'A-Z' 'a-z')
|
|
p4_lower=$(echo "$prev_4" | tr 'A-Z' 'a-z')
|
|
p5_lower=$(echo "$prev_5" | tr 'A-Z' 'a-z')
|
|
|
|
# Convert to ASCII values (default to 0 if empty)
|
|
val_p5=0; [ -n "$p5_lower" ] && val_p5=$(printf "%d" "'$p5_lower")
|
|
val_p4=0; [ -n "$p4_lower" ] && val_p4=$(printf "%d" "'$p4_lower")
|
|
val_p3=0; [ -n "$p3_lower" ] && val_p3=$(printf "%d" "'$p3_lower")
|
|
val_p2=0; [ -n "$p2_lower" ] && val_p2=$(printf "%d" "'$p2_lower")
|
|
val_p1=0; [ -n "$p1_lower" ] && val_p1=$(printf "%d" "'$p1_lower")
|
|
val_curr=$(printf "%d" "'$curr_lower")
|
|
|
|
# Determine if this is the start of the word
|
|
is_start=0
|
|
[ "$i" -eq 0 ] && is_start=1
|
|
|
|
# Determine if prev_1 is vowel or consonant
|
|
if [[ "$p1_lower" =~ ^[a-z]$ ]]; then
|
|
if [[ "$vowels" == *"$p1_lower"* ]]; then
|
|
prev_type="1"
|
|
else
|
|
prev_type="2"
|
|
fi
|
|
else
|
|
prev_type="0"
|
|
fi
|
|
|
|
# Output CSV line
|
|
printf "%d,%d,%d,%d,%d,%d,%d,%d,%d\n" \
|
|
"$val_p5" "$val_p4" "$val_p3" "$val_p2" "$val_p1" "$val_curr" \
|
|
"$is_start" "$prev_type" "$word_length" \
|
|
>> "$OUT_FILE_PATH"
|
|
|
|
# Shift history
|
|
prev_5="$prev_4"
|
|
prev_4="$prev_3"
|
|
prev_3="$prev_2"
|
|
prev_2="$prev_1"
|
|
prev_1="$curr"
|
|
done
|
|
done < "$FILE_PATH"
|