initial commit
This commit is contained in:
commit
dd48aef782
8 changed files with 1010 additions and 0 deletions
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
index/*
|
||||
*_templ.go
|
||||
*.db
|
||||
*.json
|
||||
*.html
|
||||
*.png
|
||||
*.jpg
|
||||
sauce
|
||||
21
go.mod
Normal file
21
go.mod
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
module sauce
|
||||
|
||||
go 1.22.4
|
||||
|
||||
require gocv.io/x/gocv v0.37.0
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.8.1
|
||||
github.com/a-h/templ v0.2.747
|
||||
gorm.io/driver/sqlite v1.5.6
|
||||
gorm.io/gorm v1.25.11
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/cascadia v1.3.1 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.22 // indirect
|
||||
golang.org/x/net v0.24.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
)
|
||||
54
go.sum
Normal file
54
go.sum
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
|
||||
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
|
||||
github.com/a-h/templ v0.2.747 h1:D0dQ2lxC3W7Dxl6fxQ/1zZHBQslSkTSvl5FxP/CfdKg=
|
||||
github.com/a-h/templ v0.2.747/go.mod h1:69ObQIbrcuwPCU32ohNaWce3Cb7qM5GMiqN1K+2yop4=
|
||||
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
||||
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
||||
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
|
||||
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
gocv.io/x/gocv v0.37.0 h1:sISHvnApErjoJodz1Dxb8UAkFdITOB3vXGslbVu6Knk=
|
||||
gocv.io/x/gocv v0.37.0/go.mod h1:lmS802zoQmnNvXETpmGriBqWrENPei2GxYx5KUxJsMA=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
|
||||
golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gorm.io/driver/sqlite v1.5.6 h1:fO/X46qn5NUEEOZtnjJRWRzZMe8nqJiQ9E+0hi+hKQE=
|
||||
gorm.io/driver/sqlite v1.5.6/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4=
|
||||
gorm.io/gorm v1.25.11 h1:/Wfyg1B/je1hnDx3sMkX+gAlxrlZpn6X0BXRlwXlvHg=
|
||||
gorm.io/gorm v1.25.11/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ=
|
||||
67
index.templ
Normal file
67
index.templ
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
package main
|
||||
|
||||
import "fmt"
|
||||
import "sauce/shared"
|
||||
|
||||
templ form() {
|
||||
<marquee behavior="alternate"><h1>sauce guru</h1></marquee>
|
||||
<p>upload your image:</p>
|
||||
<form enctype="multipart/form-data" action="/search" method="POST" onsubmit="false">
|
||||
<div>
|
||||
<input name="search" type="file"/>
|
||||
</div>
|
||||
<div>
|
||||
<button type="submit">find!</button>
|
||||
</div>
|
||||
</form>
|
||||
}
|
||||
|
||||
templ results(originalB64 string, images []shared.Page) {
|
||||
<div style="display: flex;">
|
||||
<div>
|
||||
<h2>searched image:</h2>
|
||||
<img style="width: 20vw;" src={ originalB64 } crossorigin="anonymous"/>
|
||||
</div>
|
||||
<hr style="margin: 10px;">
|
||||
<div style="margin: 30px;">
|
||||
<h2>results:</h2>
|
||||
<div style="
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
">
|
||||
for i, img := range images {
|
||||
<div style="display: flex">
|
||||
<a href={ templ.SafeURL(img.Url) } target="_blank">
|
||||
<img
|
||||
if i == 0 {
|
||||
style="width: 20vw;"
|
||||
} else {
|
||||
style="width: 10vw;"
|
||||
}
|
||||
src={ fmt.Sprintf("/src?src=%s", img.Url) }/>
|
||||
</a>
|
||||
<div>
|
||||
<p>{img.Publication.Title}</p>
|
||||
<p>{fmt.Sprintf("page: %d", img.Order)}</p>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
|
||||
templ layout(body templ.Component) {
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>sauce guru</title>
|
||||
// <link href="css/style.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
@body
|
||||
</body>
|
||||
</html>
|
||||
}
|
||||
34
logging.go
Normal file
34
logging.go
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
type wrappedWriter struct {
|
||||
http.ResponseWriter
|
||||
statusCode int
|
||||
}
|
||||
|
||||
func (w *wrappedWriter) WriteHeader(statusCode int) {
|
||||
w.ResponseWriter.WriteHeader(statusCode)
|
||||
w.statusCode = statusCode
|
||||
}
|
||||
|
||||
func Logging(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
start := time.Now()
|
||||
|
||||
wrapped := &wrappedWriter{
|
||||
ResponseWriter: w,
|
||||
statusCode: http.StatusOK,
|
||||
}
|
||||
|
||||
next.ServeHTTP(wrapped, r)
|
||||
|
||||
log.Println(wrapped.statusCode, r.Method, r.URL.Path, time.Since(start))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
310
main.go
Normal file
310
main.go
Normal file
|
|
@ -0,0 +1,310 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"image/color"
|
||||
// "sort"
|
||||
|
||||
// "image/png"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"sauce/shared"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"gocv.io/x/gocv"
|
||||
"gocv.io/x/gocv/contrib"
|
||||
"gorm.io/driver/sqlite"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
var (
|
||||
port = ":9393"
|
||||
hashes []contrib.ImgHashBase
|
||||
matcher = gocv.NewBFMatcher()
|
||||
phash = contrib.PHash{}
|
||||
avghash = contrib.AverageHash{}
|
||||
// index = loadIndex()
|
||||
db *gorm.DB
|
||||
)
|
||||
|
||||
type candidate struct {
|
||||
page shared.Page
|
||||
phash, avghash float64
|
||||
matches []gocv.DMatch
|
||||
averageDistance float64
|
||||
publication shared.Publication
|
||||
}
|
||||
|
||||
func loadImageFromDisk(path string) (shared.Page, error) {
|
||||
img := gocv.IMRead(path, gocv.IMReadColor)
|
||||
if img.Empty() {
|
||||
log.Panic("cannot read image", path)
|
||||
}
|
||||
|
||||
return shared.LoadImage(path, img)
|
||||
}
|
||||
|
||||
// func (e *shared.Page) hashImage() {
|
||||
// phash.Compute(e.image, &e.phash)
|
||||
// if e.phash.Empty() {
|
||||
// panic("empty")
|
||||
// }
|
||||
// avghash.Compute(e.image, &e.avghash)
|
||||
// if e.phash.Empty() {
|
||||
// panic("empty")
|
||||
// }
|
||||
// }
|
||||
|
||||
func newCandidate(e shared.Page, p shared.Publication) candidate {
|
||||
return candidate{
|
||||
page: e,
|
||||
publication: p,
|
||||
// phash: phash.Compare(e.phash, b.phash) / 64,
|
||||
// avghash: avghash.Compare(e.avghash, b.avghash) / 64,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *candidate) tryMatch(search shared.Page) {
|
||||
c.matches = matcher.Match(search.Descriptors, c.page.Descriptors)
|
||||
|
||||
slices.SortFunc(c.matches, func(a, b gocv.DMatch) int {
|
||||
return cmp.Compare(a.Distance, b.Distance)
|
||||
})
|
||||
|
||||
var average float64
|
||||
for _, m := range c.matches {
|
||||
average += m.Distance
|
||||
}
|
||||
|
||||
c.averageDistance = average / float64(len(c.matches))
|
||||
}
|
||||
|
||||
// todo: paralelizar
|
||||
func loadIndex() []shared.Publication {
|
||||
|
||||
now := time.Now()
|
||||
log.Println("loading index...")
|
||||
|
||||
const indexFolder = "index"
|
||||
|
||||
indexDir, err := os.ReadDir(indexFolder)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
var index []shared.Publication
|
||||
|
||||
for _, i := range indexDir {
|
||||
if !i.Type().IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
var pages []shared.Page
|
||||
|
||||
cachePath := path.Join(indexFolder, i.Name(), "cache")
|
||||
pagesPath := path.Join(indexFolder, i.Name(), "pages")
|
||||
|
||||
_, err := os.Stat(cachePath) // validade cache
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
|
||||
pagesFolder, err := os.ReadDir(pagesPath)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
err = os.Mkdir(cachePath, os.ModePerm)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, p := range pagesFolder {
|
||||
|
||||
e, err := loadImageFromDisk(path.Join(pagesPath, p.Name()))
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
e.SaveORBtoDisk(path.Join(cachePath, p.Name()))
|
||||
|
||||
pages = append(pages, e)
|
||||
|
||||
// img, err := e.Descriptors.ToImage()
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
//
|
||||
// cache, err := os.Create(path.Join(cachePath, p.Name()))
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
// err = png.Encode(cache, img)
|
||||
|
||||
}
|
||||
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
|
||||
} else {
|
||||
|
||||
cacheDir, err := os.ReadDir(cachePath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, c := range cacheDir {
|
||||
des := gocv.IMRead(path.Join(cachePath, c.Name()), gocv.IMReadAnyColor)
|
||||
|
||||
pages = append(pages, shared.Page{
|
||||
Descriptors: des,
|
||||
Path: path.Join(pagesPath, c.Name()),
|
||||
Name: c.Name(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
index = append(index, shared.Publication{
|
||||
Title: i.Name(),
|
||||
Pages: pages,
|
||||
})
|
||||
}
|
||||
|
||||
log.Println("index loaded in", time.Since(now))
|
||||
return index
|
||||
}
|
||||
|
||||
func drawMatches(a, b shared.Page, matches []gocv.DMatch, path string) {
|
||||
output := gocv.NewMat()
|
||||
gocv.DrawMatches(
|
||||
a.Image, a.Keypoints,
|
||||
b.Image, b.Keypoints,
|
||||
matches[:20],
|
||||
&output,
|
||||
color.RGBA{R: 255}, color.RGBA{R: 255}, nil,
|
||||
gocv.NotDrawSinglePoints,
|
||||
)
|
||||
gocv.IMWrite(path, output)
|
||||
// fmt.Println()
|
||||
// img2 := gocv.NewMat()
|
||||
// gocv.DrawKeyPoints(search.image, kp, &img2, color.RGBA{R: 255}, 0)
|
||||
// gocv.IMWrite("matches.png", img3)
|
||||
}
|
||||
|
||||
func handleSearch(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
fileReader, _, err := req.FormFile("search")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
file, err := io.ReadAll(fileReader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
search, err := shared.LoadImageFromBytes(file)
|
||||
|
||||
var candidates []candidate
|
||||
|
||||
rows, err := db.Debug().Model(&shared.Page{}).Preload("publications").Rows()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
|
||||
var page shared.Page
|
||||
db.ScanRows(rows, &page)
|
||||
|
||||
page.Descriptors, err = gocv.NewMatFromBytes(500, 32, gocv.MatTypeCV8U, page.DescriptorBlob)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
c := newCandidate(page, shared.Publication{})
|
||||
|
||||
c.tryMatch(search)
|
||||
|
||||
candidates = append(candidates, c)
|
||||
// }
|
||||
}
|
||||
|
||||
slices.SortFunc(candidates, func(a, b candidate) int {
|
||||
return cmp.Compare(a.averageDistance, b.averageDistance)
|
||||
})
|
||||
|
||||
var pages []shared.Page
|
||||
|
||||
for _, c := range candidates[:8] {
|
||||
var pub shared.Publication
|
||||
err = db.Where("id = ?", c.page.UserID).Find(&pub).Error
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println("pub:", pub)
|
||||
c.page.Publication = pub
|
||||
pages = append(pages, c.page)
|
||||
}
|
||||
|
||||
layout(results(search.B64, pages)).Render(context.Background(), w)
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
var err error
|
||||
db, err = gorm.Open(sqlite.Open("test.db"), &gorm.Config{})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
home := bytes.Buffer{}
|
||||
layout(form()).Render(context.Background(), &home)
|
||||
|
||||
router := http.NewServeMux()
|
||||
|
||||
router.HandleFunc("GET /", func(w http.ResponseWriter, req *http.Request) {
|
||||
w.Write(home.Bytes())
|
||||
})
|
||||
|
||||
router.HandleFunc("POST /search", handleSearch)
|
||||
|
||||
router.HandleFunc("GET /src", func(w http.ResponseWriter, r *http.Request) {
|
||||
url := r.FormValue("src")
|
||||
if url == "" {
|
||||
panic(url)
|
||||
}
|
||||
|
||||
resp1, err := http.Get(url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if resp1.StatusCode != 200 {
|
||||
panic(resp1.Status)
|
||||
}
|
||||
defer resp1.Body.Close()
|
||||
|
||||
_, err = io.Copy(w, resp1.Body)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
})
|
||||
|
||||
router.Handle("GET /index/", http.StripPrefix("/index/", http.FileServer(http.Dir("index"))))
|
||||
|
||||
server := http.Server{
|
||||
Addr: port,
|
||||
Handler: Logging(router),
|
||||
}
|
||||
|
||||
fmt.Println("http://localhost" + port)
|
||||
log.Fatal(server.ListenAndServe())
|
||||
}
|
||||
359
scraper/scraper.go
Normal file
359
scraper/scraper.go
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"sauce/shared"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
// "sync"
|
||||
"time"
|
||||
|
||||
"gorm.io/driver/sqlite"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/mattn/go-sqlite3" // ???
|
||||
)
|
||||
|
||||
var categoryMap = map[string]shared.Category {
|
||||
"doujinshi": shared.Doujinshi,
|
||||
"manga": shared.Doujinshi,
|
||||
}
|
||||
|
||||
var languageMap = map[string]shared.Language {
|
||||
"japanese": shared.Jp,
|
||||
"chinese": shared.Cn,
|
||||
"english": shared.En,
|
||||
}
|
||||
|
||||
type extractor interface {
|
||||
extract(...string) error
|
||||
discover(string) error
|
||||
}
|
||||
|
||||
type nhentai struct {
|
||||
db *gorm.DB
|
||||
}
|
||||
|
||||
type Queue struct {
|
||||
ID uint `gorm:"primarykey"`
|
||||
CreatedAt time.Time
|
||||
UpdatedAt time.Time
|
||||
Url string `gorm:"unique"`
|
||||
}
|
||||
|
||||
var extractGalleryUrl = regexp.MustCompile(`https?://t\d?\.nhentai\.net/galleries/(\d+)/.+\.(\w+)`)
|
||||
|
||||
func (n nhentai) discover(url string) (error) {
|
||||
|
||||
root := "https://nhentai.net"
|
||||
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
doc.Find(".cover").Each(func(i int, s *goquery.Selection) {
|
||||
href, ok := s.Attr("href")
|
||||
if !ok {
|
||||
panic(ok)
|
||||
}
|
||||
url := root + href
|
||||
|
||||
fmt.Println(url)
|
||||
|
||||
var source shared.Publication
|
||||
err = n.db.Where("source = ?", url).First(&source).Error
|
||||
if err != nil && err != gorm.ErrRecordNotFound {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = n.db.Create(&Queue{ Url: url}).Error
|
||||
if _, ok := err.(sqlite3.Error); ok { // fixme
|
||||
return
|
||||
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
func (n nhentai) download(url, name string) (shared.Page, error) {
|
||||
|
||||
resp1, err := http.Get(url)
|
||||
if err != nil {
|
||||
return shared.Page{}, err
|
||||
}
|
||||
if resp1.StatusCode >= 400 && resp1.StatusCode < 500 {
|
||||
return shared.Page{}, fmt.Errorf("status code: %s", resp1.Status)
|
||||
}
|
||||
defer resp1.Body.Close()
|
||||
|
||||
blob, err := io.ReadAll(resp1.Body)
|
||||
if err != nil {
|
||||
return shared.Page{}, err
|
||||
}
|
||||
|
||||
img, err := shared.LoadImageFromBytes(blob)
|
||||
if err != nil {
|
||||
return shared.Page{}, err
|
||||
}
|
||||
img.Url = url
|
||||
img.Name = name
|
||||
|
||||
// publication.Pages = append(publication.Pages, img)
|
||||
|
||||
return img, nil
|
||||
}
|
||||
|
||||
func (n nhentai) extract(url string) error {
|
||||
|
||||
fmt.Println("extracting:", url)
|
||||
|
||||
// url := root + strings.Join(path,
|
||||
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
|
||||
publication := shared.Publication {
|
||||
Title: doc.Find(".title .pretty").Text(),
|
||||
Source: url,
|
||||
Host: "nhentai",
|
||||
}
|
||||
|
||||
if publication.Title == "" {
|
||||
return fmt.Errorf("missing title")
|
||||
}
|
||||
|
||||
coverUrl, ok := doc.Find("#cover > a > img").Attr("data-src")
|
||||
if !ok {
|
||||
panic(coverUrl)
|
||||
}
|
||||
|
||||
var pagesTotal int
|
||||
// galleryUrl := extractGalleryUrl.FindStringSubmatch(coverUrl)
|
||||
// fmt.Println(galleryUrl, coverUrl)
|
||||
// galleryId := galleryUrl[1]
|
||||
// galleryExt := galleryUrl[2]
|
||||
|
||||
doc.Find(".tag-container").Each(func(i int, s *goquery.Selection) {
|
||||
tag := strings.TrimSpace(s.Contents().First().Text())
|
||||
|
||||
switch tag {
|
||||
case "Artists:":
|
||||
field := s.Find(".name").First().Text()
|
||||
artists := strings.Split(field, " | ")
|
||||
|
||||
var authors []shared.Author
|
||||
for _, name := range artists {
|
||||
var author shared.Author
|
||||
|
||||
err := n.db.Where("name = ?", name).First(&author).Error
|
||||
if err == gorm.ErrRecordNotFound {
|
||||
author = shared.Author{ Name: name, }
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
authors = append(authors, author)
|
||||
}
|
||||
|
||||
publication.Authors = append(publication.Authors, authors...)
|
||||
case "Tags:":
|
||||
s.Find("a").Each(func(i int, s *goquery.Selection) {
|
||||
name := s.Children().First().Text()
|
||||
var tag shared.Tag
|
||||
|
||||
err := n.db.Where("name = ?", name).First(&tag).Error
|
||||
if err == gorm.ErrRecordNotFound {
|
||||
tag = shared.Tag{
|
||||
Name: name,
|
||||
}
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
publication.Tags = append(publication.Tags, tag)
|
||||
})
|
||||
case "Categories:":
|
||||
s.Find("a").Each(func(i int, s *goquery.Selection) {
|
||||
|
||||
category, ok := categoryMap[s.Children().First().Text()]
|
||||
|
||||
if !ok {
|
||||
panic(category)
|
||||
}
|
||||
publication.Category = &category
|
||||
|
||||
})
|
||||
case "Languages:":
|
||||
s.Find("a").Each(func(i int, s *goquery.Selection) {
|
||||
|
||||
lang, ok := languageMap[s.Children().First().Text()]
|
||||
|
||||
if !ok {
|
||||
lang = shared.None
|
||||
}
|
||||
|
||||
publication.Language = &lang
|
||||
})
|
||||
case "Pages:":
|
||||
s.Find("a .name").Each(func(i int, s *goquery.Selection) {
|
||||
pagesTotal, err = strconv.Atoi(s.Text())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}) }
|
||||
})
|
||||
|
||||
// var wg sync.WaitGroup
|
||||
// var mut sync.Mutex
|
||||
// errors := make(chan error, pagesTotal)
|
||||
|
||||
for pageNumber := range pagesTotal {
|
||||
|
||||
url := fmt.Sprintf("%s%d", url, pageNumber+1)
|
||||
// fmt.Println("source:", url)
|
||||
resp2, err := http.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp2.StatusCode != 200 {
|
||||
panic(resp2.Status)
|
||||
}
|
||||
doc2, err := goquery.NewDocumentFromReader(resp2.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
src, ok := doc2.Find("#image-container > a > img").Attr("src")
|
||||
if !ok {
|
||||
panic(src)
|
||||
}
|
||||
// url := fmt.Sprintf("https://i.nhentai.net/galleries/%s/%d.%s", galleryId, pageNumber+1, galleryExt)
|
||||
outputName := fmt.Sprintf("%d.jpg", pageNumber+1)
|
||||
|
||||
fmt.Println("requested:", src)
|
||||
page, err := n.download(src, outputName)
|
||||
if err != nil {
|
||||
if errors.Is(err, shared.BlankImage) {
|
||||
fmt.Println("blank image:", src)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
fmt.Println("finished:", src)
|
||||
page.Order = pageNumber
|
||||
|
||||
publication.Pages = append(publication.Pages, page)
|
||||
|
||||
// time.Sleep(time.Second) // good guying
|
||||
// wg.Add(1)
|
||||
|
||||
// go func() {
|
||||
// defer wg.Done()
|
||||
//
|
||||
// fmt.Println("requested:", url)
|
||||
// page, err := n.download(url, outputName)
|
||||
// if err != nil {
|
||||
// errors <- err
|
||||
// return
|
||||
// }
|
||||
// page.Order = order
|
||||
//
|
||||
// mut.Lock()
|
||||
// publication.Pages = append(publication.Pages, page)
|
||||
// mut.Unlock()
|
||||
//
|
||||
// fmt.Println("finished:", url)
|
||||
// errors <- nil
|
||||
// }()
|
||||
}
|
||||
|
||||
// wg.Wait()
|
||||
// close(errors)
|
||||
|
||||
// for err := range errors {
|
||||
// if err == shared.BlankImage {
|
||||
// continue
|
||||
// }
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
|
||||
err = n.db.Debug().Create(&publication).Error
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
db, err := gorm.Open(sqlite.Open("test.db"), &gorm.Config{})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
db.AutoMigrate(&Queue{})
|
||||
db.AutoMigrate(&shared.Author{}, &shared.Page{}, &shared.Tag{}, &shared.Publication{})
|
||||
|
||||
// nhentai{db: db}.discover("https://nhentai.net/artist/mda-starou/")
|
||||
// return
|
||||
|
||||
for {
|
||||
|
||||
var dequeue Queue
|
||||
tx := db.Begin()
|
||||
|
||||
nhentai := nhentai{ db: tx }
|
||||
|
||||
err = tx.First(&dequeue).Error
|
||||
if err == gorm.ErrRecordNotFound {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = nhentai.extract(dequeue.Url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = tx.Delete(&dequeue).Error
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
tx.Commit()
|
||||
}
|
||||
}
|
||||
157
shared/shared.go
Normal file
157
shared/shared.go
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
package shared
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image/png"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"gocv.io/x/gocv"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
type Page struct {
|
||||
gorm.Model
|
||||
Name, Path, Url string
|
||||
DescriptorBlob []byte
|
||||
UserID uint
|
||||
Publication Publication `gorm:"foreignKey:UserID"`
|
||||
Order int
|
||||
|
||||
Image gocv.Mat `gorm:"-:all"`
|
||||
Phash, Avghash gocv.Mat `gorm:"-:all"`
|
||||
Keypoints []gocv.KeyPoint `gorm:"-:all"`
|
||||
Descriptors gocv.Mat `gorm:"-:all"`
|
||||
B64 string `gorm:"-:all"`
|
||||
// b64, mime string
|
||||
}
|
||||
|
||||
var (
|
||||
BlankImage = fmt.Errorf("blank image")
|
||||
)
|
||||
var (
|
||||
nomask gocv.Mat = gocv.NewMat()
|
||||
orb gocv.ORB = gocv.NewORB()
|
||||
)
|
||||
|
||||
type Language string
|
||||
type Category string
|
||||
|
||||
const (
|
||||
Jp Language = "japanese"
|
||||
Cn Language = "chinese"
|
||||
En Language = "english"
|
||||
None Language = "none"
|
||||
)
|
||||
|
||||
const (
|
||||
Doujinshi Category = "doujinshi"
|
||||
)
|
||||
|
||||
type Tag struct {
|
||||
gorm.Model
|
||||
Name string `gorm:"unique"`
|
||||
}
|
||||
|
||||
type Author struct {
|
||||
gorm.Model
|
||||
Name string `gorm:"unique"`
|
||||
}
|
||||
|
||||
type Publication struct {
|
||||
gorm.Model
|
||||
Title string
|
||||
Source string // url original
|
||||
Host string
|
||||
Favorites int
|
||||
Authors []Author `gorm:"many2many:publication_authors"`
|
||||
Language *Language
|
||||
Category *Category
|
||||
Tags []Tag `gorm:"many2many:publication_tags"`
|
||||
Pages []Page `gorm:"foreignKey:UserID"`
|
||||
}
|
||||
|
||||
func (p Publication) SaveInfoToFile(dir string) {
|
||||
bytes, err := json.Marshal(p)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
file, err := os.Create(filepath.Join(dir, "info.json"))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
file.Write(bytes)
|
||||
}
|
||||
|
||||
func LoadImageFromBytes(bytes []byte) (Page, error) {
|
||||
|
||||
fileMat, err := gocv.IMDecode(bytes, gocv.IMReadColor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
img, err := LoadImage("", fileMat)
|
||||
if err != nil {
|
||||
return Page{}, err
|
||||
}
|
||||
|
||||
img.B64 = fmt.Sprintf(
|
||||
"data:%s;base64,%s",
|
||||
http.DetectContentType(bytes),
|
||||
base64.StdEncoding.EncodeToString(bytes),
|
||||
)
|
||||
|
||||
return img, nil
|
||||
}
|
||||
|
||||
func LoadImage(path string, img gocv.Mat) (Page, error) {
|
||||
// img := gocv.IMRead(path, gocv.IMReadColor)
|
||||
// if img.Empty() {
|
||||
// log.Panic("cannot read image", path)
|
||||
// }
|
||||
keypoints, descriptors := orb.DetectAndCompute(img, nomask)
|
||||
|
||||
if len(keypoints) == 0 {
|
||||
return Page{}, BlankImage
|
||||
}
|
||||
|
||||
e := Page{
|
||||
Image: img,
|
||||
Name: filepath.Base(path),
|
||||
Path: "/" + path,
|
||||
// Phash: gocv.NewMat(),
|
||||
// Avghash: gocv.NewMat(),
|
||||
Keypoints: keypoints,
|
||||
Descriptors: descriptors,
|
||||
DescriptorBlob: descriptors.ToBytes(),
|
||||
// b64: base64.StdEncoding.EncodeToString(img.ToBytes()),
|
||||
// mime: http.DetectContentType(img.ToBytes()),
|
||||
}
|
||||
|
||||
// e.hashImage()
|
||||
|
||||
return e, nil
|
||||
}
|
||||
|
||||
func (e Page) SaveORBtoDisk(path string) {
|
||||
img, err := e.Descriptors.ToImage()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
cache, err := os.Create(path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer cache.Close()
|
||||
err = png.Encode(cache, img)
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue