見出し画像

TWLogAIAN:安定のbblot VS 高速なbadger 勝者決定!

今朝は4時から開発開始です。年度末なので年休消化のための本業はお休みです。
さて、今週検討してきたGO言語のキーバリューストアの選択でようやく結論がでました。
まず

の記事に掲載したbboltとbadgerの比較グラフは正しくないことがわかりました。テストプログラムにバグがあって正しくデータが登録されていない状態の比較になっていました。badgerが高速に見えるものはデータの登録されていない影響でした。スコアの計算が遅い原因も予測していた理由でした。
キーバリューストアから読み出しながら保存もする状態が問題でした。
ということでテストプログラムbboltとbadgerを同じ条件で比較できるようにテストプログラムを改善しました。bboltに関しては、パラメータを調整して速度アップしています。(安定動作を考えなければもう少し高速化できる設定もありますが採用していませせん)最終的に異常スコアの高い20件を表示して結果が一致してることも確認しています。
その結果は

bboltとbadgerの比較

のような感じになりました。速度だけ見るとbadgerのほうが高速です。しかし、処理中のメモリ使用量や開いているファイルの数は圧倒的にbadgerの方が多い状態です。高速化のためにはリソースを多く必要とする典型のように思います。速度は1.2倍にしかならないのにリソースは数倍使っている状況です。

ほどの差はないようです。
という訳で安定動作するbboltを採用することにしました。(安定以外にも理由はありますが)
1000万件のログの分析にメモリ8GBのMac miniで10分ぐらいです。
テストプログラムを掲載しておきます。

bboltのプログラムは

package main

import (
	"archive/zip"
	"bufio"
	"bytes"
	"encoding/binary"
	"fmt"
	"log"
	"math/rand"
	"sort"
	"strconv"
	"strings"
	"sync"
	"time"

	go_iforest "github.com/codegaudi/go-iforest"
	"github.com/gravwell/gravwell/v3/timegrinder"
	"go.etcd.io/bbolt"
)

var total = 0
var valid = 0
var inputData = [][]float64{}

func main() {
	log.Println("start")
	if err := startDB(); err != nil {
		log.Fatalln(err)
	}
	if err := initTimegrinder(); err != nil {
		log.Fatalln(err)
	}
	loadData()
	for !vectorChDone {
		time.Sleep(time.Second)
	}
	makeInputData()
	makeIForest()
	checkData()
	checkScore()
	closeDB()
	log.Println("end")
}

var tg *timegrinder.TimeGrinder

func initTimegrinder() error {
	var err error
	tg, err = timegrinder.New(timegrinder.Config{
		EnableLeftMostSeed: true,
	})
	if err != nil {
		return err
	}
	tg.SetLocalTime()
	return nil
}

type logEnt struct {
	Key int64
	Val *string
}

type vectorEnt struct {
	Key int64
	Val []float64
}

type scoreEnt struct {
	Key   int64
	Score float64
}

var db *bbolt.DB
var logCh = make(chan *logEnt, 100000)
var vectorCh = make(chan *vectorEnt, 100000)

func startDB() error {
	log.Println("start openDB")
	var err error
	db, err = bbolt.Open("log.db", 0600, nil)
	if err != nil {
		return err
	}
	db.AllocSize = 64 * 1024 * 1024
	db.MaxBatchSize = 10 * 10000
	buckets := []string{"logs", "vectors", "scores"}
	err = db.Update(func(tx *bbolt.Tx) error {
		for _, b := range buckets {
			_, err := tx.CreateBucketIfNotExists([]byte(b))
			if err != nil {
				return err
			}
		}
		return nil
	})
	if err != nil {
		return err
	}
	go logChProcess()
	go vectorChProcess()
	return nil
}

func closeDB() {
	if db != nil {
		db.Close()
	}
}

var logChDone = false

func logChProcess() {
	logList := []*logEnt{}
	for e := range logCh {
		logList = append(logList, e)
		if len(logList) > 10000 {
			saveLogList(logList)
			logList = []*logEnt{}
		}
	}
	if len(logList) > 0 {
		saveLogList(logList)
	}
	logChDone = true
}

func saveLogList(list []*logEnt) {
	db.Batch(func(tx *bbolt.Tx) error {
		b := tx.Bucket([]byte("logs"))
		for _, e := range list {
			if err := b.Put([]byte(fmt.Sprintf("%016x", e.Key)), []byte(*e.Val)); err != nil {
				return err
			}
		}
		return nil
	})
}

var vectorChDone = false

func vectorChProcess() {
	vectorList := []*vectorEnt{}
	for e := range vectorCh {
		vectorList = append(vectorList, e)
		if len(vectorList) > 10000 {
			saveVectorList(vectorList)
			vectorList = []*vectorEnt{}
		}
	}
	if len(vectorList) > 0 {
		saveVectorList(vectorList)
	}
	vectorChDone = true
}

func saveVectorList(list []*vectorEnt) {
	db.Batch(func(tx *bbolt.Tx) error {
		b := tx.Bucket([]byte("vectors"))
		for _, e := range list {
			buf := new(bytes.Buffer)
			for _, v := range e.Val {
				err := binary.Write(buf, binary.LittleEndian, v)
				if err != nil {
					log.Println("binary.Write failed:", err)
					return err
				}
			}
			if err := b.Put([]byte(fmt.Sprintf("%016x", e.Key)), buf.Bytes()); err != nil {
				return err
			}
		}
		return nil
	})
}

func saveScoreList(list []*scoreEnt) {
	db.Batch(func(tx *bbolt.Tx) error {
		b := tx.Bucket([]byte("scores"))
		for _, e := range list {
			if err := b.Put([]byte(fmt.Sprintf("%016x", e.Key)), []byte(fmt.Sprintf("%f", e.Score))); err != nil {
				return err
			}
		}
		return nil
	})
}

func loadData() {
	log.Println("start loadData")
	st := time.Now()
	r, err := zip.OpenReader("../access.log.zip")
	if err != nil {
		log.Fatal(err)
	}
	defer r.Close()
	for _, f := range r.File {
		log.Printf("log file=%s", f.Name)
		file, err := f.Open()
		if err != nil {
			log.Fatal(err)
		}
		defer file.Close()
		scanner := bufio.NewScanner(file)
		for scanner.Scan() {
			l := scanner.Text()
			total++
			if total%1000000 == 0 {
				log.Printf("loadData total=%d valid=%d dur=%s", total, valid, time.Since(st))
			}
			lineProcess(&l)
		}
		if err := scanner.Err(); err != nil {
			log.Fatal(err)
		}
	}
	log.Printf("end loadData total=%d valid=%d dur=%s", total, valid, time.Since(st))
	close(logCh)
	close(vectorCh)
}

func makeInputData() {
	log.Println("start makeInputdata")
	st := time.Now()
	skip := total / 200000
	if skip < 1 {
		skip = 1
	}
	db.View(func(tx *bbolt.Tx) error {
		b := tx.Bucket([]byte("vectors"))
		i := 0
		b.ForEach(func(k, v []byte) error {
			i++
			if i%1000000 == 0 {
				log.Printf("makeInputData i=%d len=%d", i, len(inputData))
			}
			if i%skip != 0 {
				return nil
			}
			vector := []float64{}
			buf := bytes.NewReader(v)
			for {
				var f float64
				err := binary.Read(buf, binary.LittleEndian, &f)
				if err != nil {
					break
				}
				vector = append(vector, f)
			}
			inputData = append(inputData, vector)
			return nil
		})
		return nil
	})
	log.Printf("end makeInputdata skip=%d input=%d dur=%s", skip, len(inputData), time.Since(st))

}

var iforest *go_iforest.IForest

func makeIForest() {
	log.Println("makeIForest start")
	st := time.Now()
	rand.Seed(time.Now().UnixNano())
	var err error
	iforest, err = go_iforest.NewIForest(inputData, 1000, 256)
	if err != nil {
		log.Fatal(err)
	}
	log.Printf("makeIForest end dur=%s", time.Since(st))
}

func checkData() {
	log.Println("checkData start")
	st := time.Now()
	var wg sync.WaitGroup
	var mu sync.Mutex
	scoreList := []*scoreEnt{}
	db.View(func(tx *bbolt.Tx) error {
		b := tx.Bucket([]byte("vectors"))
		i := 0
		b.ForEach(func(k, v []byte) error {
			i++
			if i%1000000 == 0 {
				log.Printf("checkData i=%d", i)
			}
			vector := []float64{}
			buf := bytes.NewReader(v)
			for {
				var f float64
				err := binary.Read(buf, binary.LittleEndian, &f)
				if err != nil {
					break
				}
				vector = append(vector, f)
			}
			key, err := strconv.ParseInt(string(k), 16, 64)
			if err != nil {
				log.Println(err)
				return nil
			}
			wg.Add(1)
			go func(pv *[]float64) {
				defer wg.Done()
				score := iforest.CalculateAnomalyScore(*pv)
				mu.Lock()
				scoreList = append(scoreList, &scoreEnt{
					Key:   key,
					Score: score,
				})
				mu.Unlock()
			}(&vector)
			return nil
		})
		return nil
	})
	wg.Wait()
	log.Printf("checkData end dur=%s", time.Since(st))
	st = time.Now()
	saveScoreList(scoreList)
	log.Printf("saveScoreList end dur=%s", time.Since(st))
}

func checkScore() {
	log.Println("checkScore start")
	st := time.Now()
	scoreList := []*scoreEnt{}
	minScore := 0.0
	db.View(func(tx *bbolt.Tx) error {
		log.Println("checkScore start in view")
		b := tx.Bucket([]byte("scores"))
		i := 0
		b.ForEach(func(k, v []byte) error {
			i++
			if i%1000000 == 0 {
				log.Printf("checkScore i=%d", i)
			}
			key, err := strconv.ParseInt(string(k), 16, 64)
			if err != nil {
				return nil
			}
			s, err := strconv.ParseFloat(string(v), 64)
			if err != nil {
				return nil
			}
			if len(scoreList) < 20 || s > minScore {
				scoreList = append(scoreList, &scoreEnt{Key: key, Score: s})
				sort.Slice(scoreList, func(i, j int) bool {
					return scoreList[i].Score > scoreList[j].Score
				})
				if len(scoreList) > 20 {
					scoreList = scoreList[:len(scoreList)-1]
				}
				minScore = scoreList[len(scoreList)-1].Score
			}
			return nil
		})
		return nil
	})
	db.View(func(tx *bbolt.Tx) error {
		b := tx.Bucket([]byte("logs"))
		for i, e := range scoreList {
			v := b.Get([]byte(fmt.Sprintf("%016x", e.Key)))
			if v != nil {
				log.Printf("%d %f %s", i, e.Score, string(v))
			}
		}
		return nil
	})
	log.Printf("checkScore end %d dur=%s", len(scoreList), time.Since(st))
}

func lineProcess(s *string) {
	ts, ok, err := tg.Extract([]byte(*s))
	if err != nil || !ok {
		return
	}
	v := toVector(s)
	if len(v) > 1 {
		valid++
		k := ts.UnixNano() + int64((total % 100000))
		logCh <- &logEnt{
			Key: k,
			Val: s,
		}
		vectorCh <- &vectorEnt{
			Key: k,
			Val: v,
		}
	}
}

func toVector(s *string) []float64 {
	vector := []float64{}
	a := strings.Split(*s, "\"")
	if len(a) < 2 {
		return vector
	}
	f := strings.Fields(a[1])
	if len(f) < 3 {
		return vector
	}
	query := ""
	ua := strings.SplitN(f[1], "?", 2)
	path := ua[0]
	if len(ua) > 1 {
		query = ua[1]
	}
	ca := getCharCount(a[1])

	//findex_%
	vector = append(vector, float64(strings.Index(a[1], "%")))

	//findex_:
	vector = append(vector, float64(strings.Index(a[1], ":")))

	// countedCharArray
	for _, c := range []rune{':', '(', ';', '%', '/', '\'', '<', '?', '.', '#'} {
		vector = append(vector, float64(ca[c]))
	}

	//encoded =
	vector = append(vector, float64(strings.Count(a[1], "%3D")+strings.Count(a[1], "%3d")))

	//encoded /
	vector = append(vector, float64(strings.Count(a[1], "%2F")+strings.Count(a[1], "%2f")))

	//encoded \
	vector = append(vector, float64(strings.Count(a[1], "%5C")+strings.Count(a[1], "%5c")))

	//encoded %
	vector = append(vector, float64(strings.Count(a[1], "%25")))

	//%20
	vector = append(vector, float64(strings.Count(a[1], "%20")))

	//POST
	if strings.HasPrefix(a[1], "POST") {
		vector = append(vector, 1)
	} else {
		vector = append(vector, 0)
	}

	//path_nonalnum_count
	vector = append(vector, float64(len(path)-getAlphaNumCount(path)))

	//pvalue_nonalnum_avg
	vector = append(vector, float64(len(query)-getAlphaNumCount(query)))

	//non_alnum_len(max_len)
	vector = append(vector, float64(getMaxNonAlnumLength(a[1])))

	//non_alnum_count
	vector = append(vector, float64(getNonAlnumCount(a[1])))

	for _, p := range []string{"/%", "//", "/.", "..", "=/", "./", "/?"} {
		vector = append(vector, float64(strings.Count(a[1], p)))
	}
	return vector
}

func getCharCount(s string) []int {
	ret := []int{}
	for i := 0; i < 96; i++ {
		ret = append(ret, 0)
	}
	for _, c := range s {
		if 33 <= c && c <= 95 {
			ret[c] += 1
		}
	}
	return ret
}

func getAlphaNumCount(s string) int {
	ret := 0
	for _, c := range s {
		if 65 <= c && c <= 90 {
			ret++
		} else if 97 <= c && c <= 122 {
			ret++
		} else if 48 <= c && c <= 57 {
			ret++
		}
	}
	return ret
}

func getMaxNonAlnumLength(s string) int {
	max := 0
	length := 0
	for _, c := range s {
		if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') {
			if length > max {
				max = length
			}
			length = 0
		} else {
			length++
		}
	}
	if max < length {
		max = length
	}
	return max
}

func getNonAlnumCount(s string) int {
	ret := 0
	for _, c := range s {
		if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') {
		} else {
			ret++
		}
	}
	return ret
}

badgerの場合は、

package main

import (
	"archive/zip"
	"bufio"
	"bytes"
	"encoding/binary"
	"fmt"
	"log"
	"math/rand"
	"sort"
	"strconv"
	"strings"
	"sync"
	"time"

	go_iforest "github.com/codegaudi/go-iforest"
	badger "github.com/dgraph-io/badger/v3"
	"github.com/gravwell/gravwell/v3/timegrinder"
)

var total = 0
var valid = 0
var inputData = [][]float64{}

func main() {
	log.Println("start")
	if err := startDB(); err != nil {
		log.Fatalln(err)
	}
	if err := initTimegrinder(); err != nil {
		log.Fatalln(err)
	}
	loadData()
	for !vectorChDone {
		time.Sleep(time.Second)
	}
	makeInputData()
	makeIForest()
	checkData()
	checkScore()
	closeDB()
	log.Println("end")
}

var tg *timegrinder.TimeGrinder

func initTimegrinder() error {
	var err error
	tg, err = timegrinder.New(timegrinder.Config{
		EnableLeftMostSeed: true,
	})
	if err != nil {
		return err
	}
	tg.SetLocalTime()
	return nil
}

type logEnt struct {
	Key int64
	Val *string
}

type vectorEnt struct {
	Key int64
	Val []float64
}

type scoreEnt struct {
	Key   int64
	Score float64
}

var db *badger.DB
var logCh = make(chan *logEnt, 100000)
var vectorCh = make(chan *vectorEnt, 100000)

func startDB() error {
	log.Println("start openDB")
	var err error
	db, err = badger.Open(badger.DefaultOptions("./badger").WithLoggingLevel(badger.WARNING))
	if err != nil {
		return err
	}

	go logChProcess()
	go vectorChProcess()
	return nil
}

func closeDB() {
	if db != nil {
		db.Close()
	}
}

var logChDone = false

func logChProcess() {
	logList := []*logEnt{}
	for e := range logCh {
		logList = append(logList, e)
		if len(logList) > 10000 {
			saveLogList(logList)
			logList = []*logEnt{}
		}
	}
	if len(logList) > 0 {
		saveLogList(logList)
	}
	logChDone = true
}

func saveLogList(list []*logEnt) {
	db.Update(func(txn *badger.Txn) error {
		for _, e := range list {
			if err := txn.Set([]byte(fmt.Sprintf("l:%016x", e.Key)), []byte(*e.Val)); err != nil {
				log.Println(err)
				return err
			}
		}
		return nil
	})
}

var vectorChDone = false

func vectorChProcess() {
	vectorList := []*vectorEnt{}
	for e := range vectorCh {
		vectorList = append(vectorList, e)
		if len(vectorList) > 10000 {
			saveVectorList(vectorList)
			vectorList = []*vectorEnt{}
		}
	}
	if len(vectorList) > 0 {
		saveVectorList(vectorList)
	}
	vectorChDone = true
}

func saveVectorList(list []*vectorEnt) {
	db.Update(func(txn *badger.Txn) error {
		for _, e := range list {
			buf := new(bytes.Buffer)
			for _, v := range e.Val {
				err := binary.Write(buf, binary.LittleEndian, v)
				if err != nil {
					log.Println(err)
					return err
				}
			}
			if err := txn.Set([]byte(fmt.Sprintf("v:%016x", e.Key)), buf.Bytes()); err != nil {
				return err
			}
		}
		return nil
	})
}

func saveScoreList(list []*scoreEnt) {
	txn := db.NewTransaction(true)
	for _, e := range list {
		if err := txn.Set([]byte(fmt.Sprintf("s:%016x", e.Key)), []byte(fmt.Sprintf("%f", e.Score))); err != nil {
			if err == badger.ErrTxnTooBig {
				_ = txn.Commit()
				txn = db.NewTransaction(true)
				_ = txn.Set([]byte(fmt.Sprintf("s:%016x", e.Key)), []byte(fmt.Sprintf("%f", e.Score)))
			} else {
				log.Println(err)
				return
			}
		}
	}
	_ = txn.Commit()
}

func loadData() {
	log.Println("start loadData")
	st := time.Now()
	r, err := zip.OpenReader("../access.log.zip")
	if err != nil {
		log.Fatal(err)
	}
	defer r.Close()
	for _, f := range r.File {
		log.Printf("log file=%s", f.Name)
		file, err := f.Open()
		if err != nil {
			log.Fatal(err)
		}
		defer file.Close()
		scanner := bufio.NewScanner(file)
		for scanner.Scan() {
			l := scanner.Text()
			total++
			if total%1000000 == 0 {
				log.Printf("loadData total=%d valid=%d dur=%s", total, valid, time.Since(st))
			}
			lineProcess(&l)
		}
		if err := scanner.Err(); err != nil {
			log.Fatal(err)
		}
	}
	log.Printf("end loadData total=%d valid=%d dur=%s", total, valid, time.Since(st))
	close(logCh)
	close(vectorCh)
}

func makeInputData() {
	log.Println("start makeInputdata")
	st := time.Now()
	skip := total / 200000
	if skip < 1 {
		skip = 1
	}
	db.View(func(txn *badger.Txn) error {
		it := txn.NewIterator(badger.DefaultIteratorOptions)
		defer it.Close()
		i := 0
		prefix := []byte("v:")
		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
			i++
			if i%1000000 == 0 {
				log.Printf("makeInputData i=%d len=%d", i, len(inputData))
			}
			if i%skip != 0 {
				continue
			}
			item := it.Item()
			err := item.Value(func(v []byte) error {
				vector := []float64{}
				buf := bytes.NewReader(v)
				for {
					var f float64
					err := binary.Read(buf, binary.LittleEndian, &f)
					if err != nil {
						break
					}
					vector = append(vector, f)
				}
				inputData = append(inputData, vector)
				return nil
			})
			if err != nil {
				return err
			}
		}
		return nil
	})
	log.Printf("end makeInputdata skip=%d input=%d dur=%s", skip, len(inputData), time.Since(st))
}

var iforest *go_iforest.IForest

func makeIForest() {
	log.Println("makeIForest start")
	st := time.Now()
	rand.Seed(time.Now().UnixNano())
	var err error
	iforest, err = go_iforest.NewIForest(inputData, 1000, 256)
	if err != nil {
		log.Fatal(err)
	}
	log.Printf("makeIForest end dur=%s", time.Since(st))
}

func checkData() {
	log.Println("checkData start")
	st := time.Now()
	var wg sync.WaitGroup
	var mu sync.Mutex
	scoreList := []*scoreEnt{}
	db.View(func(txn *badger.Txn) error {
		it := txn.NewIterator(badger.DefaultIteratorOptions)
		defer it.Close()
		i := 0
		prefix := []byte("v:")
		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
			i++
			if i%1000000 == 0 {
				log.Printf("checkData i=%d", i)
			}
			item := it.Item()
			k := item.Key()
			a := strings.SplitN(string(k), ":", 2)
			if len(a) != 2 {
				continue
			}
			key, err := strconv.ParseInt(a[1], 16, 64)
			if err != nil {
				continue
			}
			err = item.Value(func(v []byte) error {
				vector := []float64{}
				buf := bytes.NewReader(v)
				for {
					var f float64
					err := binary.Read(buf, binary.LittleEndian, &f)
					if err != nil {
						break
					}
					vector = append(vector, f)
				}
				wg.Add(1)
				go func(pv *[]float64) {
					defer wg.Done()
					score := iforest.CalculateAnomalyScore(*pv)
					mu.Lock()
					scoreList = append(scoreList, &scoreEnt{
						Key:   key,
						Score: score,
					})
					mu.Unlock()
				}(&vector)
				return nil
			})
			if err != nil {
				return err
			}
		}
		return nil
	})
	wg.Wait()
	log.Printf("checkData end score=%d dur=%s", len(scoreList), time.Since(st))
	st = time.Now()
	saveScoreList(scoreList)
	scoreList = []*scoreEnt{}
	log.Printf("saveScoreList dur=%s", time.Since(st))
}

func checkScore() {
	log.Println("checkScore start")
	st := time.Now()
	scoreList := []*scoreEnt{}
	minScore := 0.0
	db.View(func(txn *badger.Txn) error {
		log.Println("checkScore start in view")
		it := txn.NewIterator(badger.DefaultIteratorOptions)
		defer it.Close()
		i := 0
		prefix := []byte("s:")
		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
			i++
			if i%1000000 == 0 {
				log.Printf("checkScore i=%d", i)
			}
			item := it.Item()
			k := item.Key()
			a := strings.SplitN(string(k), ":", 2)
			if len(a) != 2 {
				log.Panicln(string(k))
				continue
			}
			key, err := strconv.ParseInt(a[1], 16, 64)
			if err != nil {
				log.Println(err)
				continue
			}
			item.Value(func(v []byte) error {
				s, err := strconv.ParseFloat(string(v), 64)
				if err != nil {
					log.Println(err)
					return nil
				}
				if len(scoreList) < 20 || s > minScore {
					scoreList = append(scoreList, &scoreEnt{Key: key, Score: s})
					sort.Slice(scoreList, func(i, j int) bool {
						return scoreList[i].Score > scoreList[j].Score
					})
					if len(scoreList) > 20 {
						scoreList = scoreList[:len(scoreList)-1]
					}
					minScore = scoreList[len(scoreList)-1].Score
				}
				return nil
			})
		}
		return nil
	})
	db.View(func(txn *badger.Txn) error {
		for i, e := range scoreList {
			item, err := txn.Get([]byte(fmt.Sprintf("l:%016x", e.Key)))
			if err != nil {
				log.Printf("%f %016x err=%v", e.Score, e.Key, err)
				continue
			}
			item.Value(func(val []byte) error {
				fmt.Printf("%d %f %s\n", i, e.Score, val)
				return nil
			})
		}
		return nil
	})
	log.Printf("checkScore end %d dur=%s", len(scoreList), time.Since(st))
}

func lineProcess(s *string) {
	ts, ok, err := tg.Extract([]byte(*s))
	if err != nil || !ok {
		return
	}
	v := toVector(s)
	if len(v) > 1 {
		valid++
		k := ts.UnixNano() + int64((total % 100000))
		logCh <- &logEnt{
			Key: k,
			Val: s,
		}
		vectorCh <- &vectorEnt{
			Key: k,
			Val: v,
		}
	}
}

func toVector(s *string) []float64 {
	vector := []float64{}
	a := strings.Split(*s, "\"")
	if len(a) < 2 {
		return vector
	}
	f := strings.Fields(a[1])
	if len(f) < 3 {
		return vector
	}
	query := ""
	ua := strings.SplitN(f[1], "?", 2)
	path := ua[0]
	if len(ua) > 1 {
		query = ua[1]
	}
	ca := getCharCount(a[1])

	//findex_%
	vector = append(vector, float64(strings.Index(a[1], "%")))

	//findex_:
	vector = append(vector, float64(strings.Index(a[1], ":")))

	// countedCharArray
	for _, c := range []rune{':', '(', ';', '%', '/', '\'', '<', '?', '.', '#'} {
		vector = append(vector, float64(ca[c]))
	}

	//encoded =
	vector = append(vector, float64(strings.Count(a[1], "%3D")+strings.Count(a[1], "%3d")))

	//encoded /
	vector = append(vector, float64(strings.Count(a[1], "%2F")+strings.Count(a[1], "%2f")))

	//encoded \
	vector = append(vector, float64(strings.Count(a[1], "%5C")+strings.Count(a[1], "%5c")))

	//encoded %
	vector = append(vector, float64(strings.Count(a[1], "%25")))

	//%20
	vector = append(vector, float64(strings.Count(a[1], "%20")))

	//POST
	if strings.HasPrefix(a[1], "POST") {
		vector = append(vector, 1)
	} else {
		vector = append(vector, 0)
	}

	//path_nonalnum_count
	vector = append(vector, float64(len(path)-getAlphaNumCount(path)))

	//pvalue_nonalnum_avg
	vector = append(vector, float64(len(query)-getAlphaNumCount(query)))

	//non_alnum_len(max_len)
	vector = append(vector, float64(getMaxNonAlnumLength(a[1])))

	//non_alnum_count
	vector = append(vector, float64(getNonAlnumCount(a[1])))

	for _, p := range []string{"/%", "//", "/.", "..", "=/", "./", "/?"} {
		vector = append(vector, float64(strings.Count(a[1], p)))
	}
	return vector
}

func getCharCount(s string) []int {
	ret := []int{}
	for i := 0; i < 96; i++ {
		ret = append(ret, 0)
	}
	for _, c := range s {
		if 33 <= c && c <= 95 {
			ret[c] += 1
		}
	}
	return ret
}

func getAlphaNumCount(s string) int {
	ret := 0
	for _, c := range s {
		if 65 <= c && c <= 90 {
			ret++
		} else if 97 <= c && c <= 122 {
			ret++
		} else if 48 <= c && c <= 57 {
			ret++
		}
	}
	return ret
}

func getMaxNonAlnumLength(s string) int {
	max := 0
	length := 0
	for _, c := range s {
		if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') {
			if length > max {
				max = length
			}
			length = 0
		} else {
			length++
		}
	}
	if max < length {
		max = length
	}
	return max
}

func getNonAlnumCount(s string) int {
	ret := 0
	for _, c := range s {
		if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') {
		} else {
			ret++
		}
	}
	return ret
}

です。
方針が決まったので、TWLogAIANに組み込もうと思っています。
助手の猫が天から
「速度に目がくらんで、安定性を失うこともある」
と言ってそうです。

明日の続く


開発のための諸経費(機材、Appleの開発者、サーバー運用)に利用します。 ソフトウェアのマニュアルをnoteの記事で提供しています。 サポートによりnoteの運営にも貢献できるのでよろしくお願います。