commit dd48aef782944a6f48b654b48842bd9189839dd5 Author: silva guimaraes Date: Sat Aug 23 19:48:04 2025 -0300 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c4958fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +index/* +*_templ.go +*.db +*.json +*.html +*.png +*.jpg +sauce diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..428f773 --- /dev/null +++ b/go.mod @@ -0,0 +1,21 @@ +module sauce + +go 1.22.4 + +require gocv.io/x/gocv v0.37.0 + +require ( + github.com/PuerkitoBio/goquery v1.8.1 + github.com/a-h/templ v0.2.747 + gorm.io/driver/sqlite v1.5.6 + gorm.io/gorm v1.25.11 +) + +require ( + github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/jinzhu/inflection v1.0.0 // indirect + github.com/jinzhu/now v1.1.5 // indirect + github.com/mattn/go-sqlite3 v1.14.22 // indirect + golang.org/x/net v0.24.0 // indirect + golang.org/x/text v0.14.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..0c348cc --- /dev/null +++ b/go.sum @@ -0,0 +1,54 @@ +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= +github.com/a-h/templ v0.2.747 h1:D0dQ2lxC3W7Dxl6fxQ/1zZHBQslSkTSvl5FxP/CfdKg= +github.com/a-h/templ v0.2.747/go.mod h1:69ObQIbrcuwPCU32ohNaWce3Cb7qM5GMiqN1K+2yop4= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +gocv.io/x/gocv v0.37.0 h1:sISHvnApErjoJodz1Dxb8UAkFdITOB3vXGslbVu6Knk= +gocv.io/x/gocv v0.37.0/go.mod h1:lmS802zoQmnNvXETpmGriBqWrENPei2GxYx5KUxJsMA= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gorm.io/driver/sqlite v1.5.6 h1:fO/X46qn5NUEEOZtnjJRWRzZMe8nqJiQ9E+0hi+hKQE= +gorm.io/driver/sqlite v1.5.6/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4= +gorm.io/gorm v1.25.11 h1:/Wfyg1B/je1hnDx3sMkX+gAlxrlZpn6X0BXRlwXlvHg= +gorm.io/gorm v1.25.11/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ= diff --git a/index.templ b/index.templ new file mode 100644 index 0000000..d53549c --- /dev/null +++ b/index.templ @@ -0,0 +1,67 @@ +package main + +import "fmt" +import "sauce/shared" + +templ form() { +

sauce guru

+

upload your image:

+
+
+ +
+
+ +
+
+} + +templ results(originalB64 string, images []shared.Page) { +
+
+

searched image:

+ +
+
+
+

results:

+
+ for i, img := range images { +
+ + + +
+

{img.Publication.Title}

+

{fmt.Sprintf("page: %d", img.Order)}

+
+
+ } +
+
+
+} + +templ layout(body templ.Component) { + + + + + + sauce guru + // + + + @body + + +} diff --git a/logging.go b/logging.go new file mode 100644 index 0000000..b34696a --- /dev/null +++ b/logging.go @@ -0,0 +1,34 @@ +package main + +import ( + "net/http" + "log" + "time" +) + +type wrappedWriter struct { + http.ResponseWriter + statusCode int +} + +func (w *wrappedWriter) WriteHeader(statusCode int) { + w.ResponseWriter.WriteHeader(statusCode) + w.statusCode = statusCode +} + +func Logging(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + + wrapped := &wrappedWriter{ + ResponseWriter: w, + statusCode: http.StatusOK, + } + + next.ServeHTTP(wrapped, r) + + log.Println(wrapped.statusCode, r.Method, r.URL.Path, time.Since(start)) + }) +} + + diff --git a/main.go b/main.go new file mode 100644 index 0000000..dd1643c --- /dev/null +++ b/main.go @@ -0,0 +1,310 @@ +package main + +import ( + "bytes" + "cmp" + "context" + "errors" + "fmt" + "image/color" + // "sort" + + // "image/png" + "io" + "log" + "net/http" + "os" + "path" + "sauce/shared" + "slices" + "time" + + "gocv.io/x/gocv" + "gocv.io/x/gocv/contrib" + "gorm.io/driver/sqlite" + "gorm.io/gorm" +) + +var ( + port = ":9393" + hashes []contrib.ImgHashBase + matcher = gocv.NewBFMatcher() + phash = contrib.PHash{} + avghash = contrib.AverageHash{} + // index = loadIndex() + db *gorm.DB +) + +type candidate struct { + page shared.Page + phash, avghash float64 + matches []gocv.DMatch + averageDistance float64 + publication shared.Publication +} + +func loadImageFromDisk(path string) (shared.Page, error) { + img := gocv.IMRead(path, gocv.IMReadColor) + if img.Empty() { + log.Panic("cannot read image", path) + } + + return shared.LoadImage(path, img) +} + +// func (e *shared.Page) hashImage() { +// phash.Compute(e.image, &e.phash) +// if e.phash.Empty() { +// panic("empty") +// } +// avghash.Compute(e.image, &e.avghash) +// if e.phash.Empty() { +// panic("empty") +// } +// } + +func newCandidate(e shared.Page, p shared.Publication) candidate { + return candidate{ + page: e, + publication: p, + // phash: phash.Compare(e.phash, b.phash) / 64, + // avghash: avghash.Compare(e.avghash, b.avghash) / 64, + } +} + +func (c *candidate) tryMatch(search shared.Page) { + c.matches = matcher.Match(search.Descriptors, c.page.Descriptors) + + slices.SortFunc(c.matches, func(a, b gocv.DMatch) int { + return cmp.Compare(a.Distance, b.Distance) + }) + + var average float64 + for _, m := range c.matches { + average += m.Distance + } + + c.averageDistance = average / float64(len(c.matches)) +} + +// todo: paralelizar +func loadIndex() []shared.Publication { + + now := time.Now() + log.Println("loading index...") + + const indexFolder = "index" + + indexDir, err := os.ReadDir(indexFolder) + if err != nil { + panic(err) + } + + var index []shared.Publication + + for _, i := range indexDir { + if !i.Type().IsDir() { + continue + } + + var pages []shared.Page + + cachePath := path.Join(indexFolder, i.Name(), "cache") + pagesPath := path.Join(indexFolder, i.Name(), "pages") + + _, err := os.Stat(cachePath) // validade cache + if errors.Is(err, os.ErrNotExist) { + + pagesFolder, err := os.ReadDir(pagesPath) + if err != nil { + log.Println(err) + continue + } + + err = os.Mkdir(cachePath, os.ModePerm) + if err != nil { + panic(err) + } + + for _, p := range pagesFolder { + + e, err := loadImageFromDisk(path.Join(pagesPath, p.Name())) + if err != nil { + log.Println(err) + continue + } + + e.SaveORBtoDisk(path.Join(cachePath, p.Name())) + + pages = append(pages, e) + + // img, err := e.Descriptors.ToImage() + // if err != nil { + // panic(err) + // } + // + // cache, err := os.Create(path.Join(cachePath, p.Name())) + // if err != nil { + // panic(err) + // } + // err = png.Encode(cache, img) + + } + + } else if err != nil { + panic(err) + + } else { + + cacheDir, err := os.ReadDir(cachePath) + if err != nil { + panic(err) + } + + for _, c := range cacheDir { + des := gocv.IMRead(path.Join(cachePath, c.Name()), gocv.IMReadAnyColor) + + pages = append(pages, shared.Page{ + Descriptors: des, + Path: path.Join(pagesPath, c.Name()), + Name: c.Name(), + }) + } + } + + index = append(index, shared.Publication{ + Title: i.Name(), + Pages: pages, + }) + } + + log.Println("index loaded in", time.Since(now)) + return index +} + +func drawMatches(a, b shared.Page, matches []gocv.DMatch, path string) { + output := gocv.NewMat() + gocv.DrawMatches( + a.Image, a.Keypoints, + b.Image, b.Keypoints, + matches[:20], + &output, + color.RGBA{R: 255}, color.RGBA{R: 255}, nil, + gocv.NotDrawSinglePoints, + ) + gocv.IMWrite(path, output) + // fmt.Println() + // img2 := gocv.NewMat() + // gocv.DrawKeyPoints(search.image, kp, &img2, color.RGBA{R: 255}, 0) + // gocv.IMWrite("matches.png", img3) +} + +func handleSearch(w http.ResponseWriter, req *http.Request) { + + fileReader, _, err := req.FormFile("search") + if err != nil { + panic(err) + } + file, err := io.ReadAll(fileReader) + if err != nil { + panic(err) + } + + search, err := shared.LoadImageFromBytes(file) + + var candidates []candidate + + rows, err := db.Debug().Model(&shared.Page{}).Preload("publications").Rows() + if err != nil { + panic(err) + } + defer rows.Close() + + for rows.Next() { + + var page shared.Page + db.ScanRows(rows, &page) + + page.Descriptors, err = gocv.NewMatFromBytes(500, 32, gocv.MatTypeCV8U, page.DescriptorBlob) + if err != nil { + panic(err) + } + + c := newCandidate(page, shared.Publication{}) + + c.tryMatch(search) + + candidates = append(candidates, c) + // } + } + + slices.SortFunc(candidates, func(a, b candidate) int { + return cmp.Compare(a.averageDistance, b.averageDistance) + }) + + var pages []shared.Page + + for _, c := range candidates[:8] { + var pub shared.Publication + err = db.Where("id = ?", c.page.UserID).Find(&pub).Error + if err != nil { + panic(err) + } + fmt.Println("pub:", pub) + c.page.Publication = pub + pages = append(pages, c.page) + } + + layout(results(search.B64, pages)).Render(context.Background(), w) +} + +func main() { + + var err error + db, err = gorm.Open(sqlite.Open("test.db"), &gorm.Config{}) + if err != nil { + panic(err) + } + + home := bytes.Buffer{} + layout(form()).Render(context.Background(), &home) + + router := http.NewServeMux() + + router.HandleFunc("GET /", func(w http.ResponseWriter, req *http.Request) { + w.Write(home.Bytes()) + }) + + router.HandleFunc("POST /search", handleSearch) + + router.HandleFunc("GET /src", func(w http.ResponseWriter, r *http.Request) { + url := r.FormValue("src") + if url == "" { + panic(url) + } + + resp1, err := http.Get(url) + if err != nil { + panic(err) + } + if resp1.StatusCode != 200 { + panic(resp1.Status) + } + defer resp1.Body.Close() + + _, err = io.Copy(w, resp1.Body) + if err != nil { + panic(err) + } + }) + + router.Handle("GET /index/", http.StripPrefix("/index/", http.FileServer(http.Dir("index")))) + + server := http.Server{ + Addr: port, + Handler: Logging(router), + } + + fmt.Println("http://localhost" + port) + log.Fatal(server.ListenAndServe()) +} diff --git a/scraper/scraper.go b/scraper/scraper.go new file mode 100644 index 0000000..25f15e9 --- /dev/null +++ b/scraper/scraper.go @@ -0,0 +1,359 @@ +package main + +import ( + "errors" + "fmt" + "io" + "net/http" + "regexp" + "sauce/shared" + "strconv" + "strings" + + // "sync" + "time" + + "gorm.io/driver/sqlite" + "gorm.io/gorm" + + "github.com/PuerkitoBio/goquery" + "github.com/mattn/go-sqlite3" // ??? +) + +var categoryMap = map[string]shared.Category { + "doujinshi": shared.Doujinshi, + "manga": shared.Doujinshi, +} + +var languageMap = map[string]shared.Language { + "japanese": shared.Jp, + "chinese": shared.Cn, + "english": shared.En, +} + +type extractor interface { + extract(...string) error + discover(string) error +} + +type nhentai struct { + db *gorm.DB +} + +type Queue struct { + ID uint `gorm:"primarykey"` + CreatedAt time.Time + UpdatedAt time.Time + Url string `gorm:"unique"` +} + +var extractGalleryUrl = regexp.MustCompile(`https?://t\d?\.nhentai\.net/galleries/(\d+)/.+\.(\w+)`) + +func (n nhentai) discover(url string) (error) { + + root := "https://nhentai.net" + + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + // Load the HTML document + doc, err := goquery.NewDocumentFromReader(resp.Body) + + if err != nil { + panic(err) + } + + doc.Find(".cover").Each(func(i int, s *goquery.Selection) { + href, ok := s.Attr("href") + if !ok { + panic(ok) + } + url := root + href + + fmt.Println(url) + + var source shared.Publication + err = n.db.Where("source = ?", url).First(&source).Error + if err != nil && err != gorm.ErrRecordNotFound { + panic(err) + } + + err = n.db.Create(&Queue{ Url: url}).Error + if _, ok := err.(sqlite3.Error); ok { // fixme + return + + } else if err != nil { + panic(err) + } + + }) + + return nil +} + + +func (n nhentai) download(url, name string) (shared.Page, error) { + + resp1, err := http.Get(url) + if err != nil { + return shared.Page{}, err + } + if resp1.StatusCode >= 400 && resp1.StatusCode < 500 { + return shared.Page{}, fmt.Errorf("status code: %s", resp1.Status) + } + defer resp1.Body.Close() + + blob, err := io.ReadAll(resp1.Body) + if err != nil { + return shared.Page{}, err + } + + img, err := shared.LoadImageFromBytes(blob) + if err != nil { + return shared.Page{}, err + } + img.Url = url + img.Name = name + + // publication.Pages = append(publication.Pages, img) + + return img, nil +} + +func (n nhentai) extract(url string) error { + + fmt.Println("extracting:", url) + + // url := root + strings.Join(path, + + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + // Load the HTML document + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + panic(err) + } + + + publication := shared.Publication { + Title: doc.Find(".title .pretty").Text(), + Source: url, + Host: "nhentai", + } + + if publication.Title == "" { + return fmt.Errorf("missing title") + } + + coverUrl, ok := doc.Find("#cover > a > img").Attr("data-src") + if !ok { + panic(coverUrl) + } + + var pagesTotal int + // galleryUrl := extractGalleryUrl.FindStringSubmatch(coverUrl) + // fmt.Println(galleryUrl, coverUrl) + // galleryId := galleryUrl[1] + // galleryExt := galleryUrl[2] + + doc.Find(".tag-container").Each(func(i int, s *goquery.Selection) { + tag := strings.TrimSpace(s.Contents().First().Text()) + + switch tag { + case "Artists:": + field := s.Find(".name").First().Text() + artists := strings.Split(field, " | ") + + var authors []shared.Author + for _, name := range artists { + var author shared.Author + + err := n.db.Where("name = ?", name).First(&author).Error + if err == gorm.ErrRecordNotFound { + author = shared.Author{ Name: name, } + } else if err != nil { + panic(err) + } + + authors = append(authors, author) + } + + publication.Authors = append(publication.Authors, authors...) + case "Tags:": + s.Find("a").Each(func(i int, s *goquery.Selection) { + name := s.Children().First().Text() + var tag shared.Tag + + err := n.db.Where("name = ?", name).First(&tag).Error + if err == gorm.ErrRecordNotFound { + tag = shared.Tag{ + Name: name, + } + } else if err != nil { + panic(err) + } + + publication.Tags = append(publication.Tags, tag) + }) + case "Categories:": + s.Find("a").Each(func(i int, s *goquery.Selection) { + + category, ok := categoryMap[s.Children().First().Text()] + + if !ok { + panic(category) + } + publication.Category = &category + + }) + case "Languages:": + s.Find("a").Each(func(i int, s *goquery.Selection) { + + lang, ok := languageMap[s.Children().First().Text()] + + if !ok { + lang = shared.None + } + + publication.Language = &lang + }) + case "Pages:": + s.Find("a .name").Each(func(i int, s *goquery.Selection) { + pagesTotal, err = strconv.Atoi(s.Text()) + if err != nil { + panic(err) + } + }) } + }) + + // var wg sync.WaitGroup + // var mut sync.Mutex + // errors := make(chan error, pagesTotal) + + for pageNumber := range pagesTotal { + + url := fmt.Sprintf("%s%d", url, pageNumber+1) + // fmt.Println("source:", url) + resp2, err := http.Get(url) + if err != nil { + return err + } + if resp2.StatusCode != 200 { + panic(resp2.Status) + } + doc2, err := goquery.NewDocumentFromReader(resp2.Body) + if err != nil { + return err + } + + src, ok := doc2.Find("#image-container > a > img").Attr("src") + if !ok { + panic(src) + } + // url := fmt.Sprintf("https://i.nhentai.net/galleries/%s/%d.%s", galleryId, pageNumber+1, galleryExt) + outputName := fmt.Sprintf("%d.jpg", pageNumber+1) + + fmt.Println("requested:", src) + page, err := n.download(src, outputName) + if err != nil { + if errors.Is(err, shared.BlankImage) { + fmt.Println("blank image:", src) + continue + } + return err + } + fmt.Println("finished:", src) + page.Order = pageNumber + + publication.Pages = append(publication.Pages, page) + + // time.Sleep(time.Second) // good guying + // wg.Add(1) + + // go func() { + // defer wg.Done() + // + // fmt.Println("requested:", url) + // page, err := n.download(url, outputName) + // if err != nil { + // errors <- err + // return + // } + // page.Order = order + // + // mut.Lock() + // publication.Pages = append(publication.Pages, page) + // mut.Unlock() + // + // fmt.Println("finished:", url) + // errors <- nil + // }() + } + + // wg.Wait() + // close(errors) + + // for err := range errors { + // if err == shared.BlankImage { + // continue + // } + // if err != nil { + // return err + // } + // } + + err = n.db.Debug().Create(&publication).Error + if err != nil { + panic(err) + } + + return nil +} + +func main() { + db, err := gorm.Open(sqlite.Open("test.db"), &gorm.Config{}) + if err != nil { + panic(err) + } + + db.AutoMigrate(&Queue{}) + db.AutoMigrate(&shared.Author{}, &shared.Page{}, &shared.Tag{}, &shared.Publication{}) + + // nhentai{db: db}.discover("https://nhentai.net/artist/mda-starou/") + // return + + for { + + var dequeue Queue + tx := db.Begin() + + nhentai := nhentai{ db: tx } + + err = tx.First(&dequeue).Error + if err == gorm.ErrRecordNotFound { + break + } + if err != nil { + panic(err) + } + + err = nhentai.extract(dequeue.Url) + if err != nil { + panic(err) + } + + err = tx.Delete(&dequeue).Error + if err != nil { + panic(err) + } + + tx.Commit() + } +} diff --git a/shared/shared.go b/shared/shared.go new file mode 100644 index 0000000..c7276e9 --- /dev/null +++ b/shared/shared.go @@ -0,0 +1,157 @@ +package shared + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "image/png" + "net/http" + "os" + "path/filepath" + + "gocv.io/x/gocv" + "gorm.io/gorm" +) + +type Page struct { + gorm.Model + Name, Path, Url string + DescriptorBlob []byte + UserID uint + Publication Publication `gorm:"foreignKey:UserID"` + Order int + + Image gocv.Mat `gorm:"-:all"` + Phash, Avghash gocv.Mat `gorm:"-:all"` + Keypoints []gocv.KeyPoint `gorm:"-:all"` + Descriptors gocv.Mat `gorm:"-:all"` + B64 string `gorm:"-:all"` + // b64, mime string +} + +var ( + BlankImage = fmt.Errorf("blank image") +) +var ( + nomask gocv.Mat = gocv.NewMat() + orb gocv.ORB = gocv.NewORB() +) + +type Language string +type Category string + +const ( + Jp Language = "japanese" + Cn Language = "chinese" + En Language = "english" + None Language = "none" +) + +const ( + Doujinshi Category = "doujinshi" +) + +type Tag struct { + gorm.Model + Name string `gorm:"unique"` +} + +type Author struct { + gorm.Model + Name string `gorm:"unique"` +} + +type Publication struct { + gorm.Model + Title string + Source string // url original + Host string + Favorites int + Authors []Author `gorm:"many2many:publication_authors"` + Language *Language + Category *Category + Tags []Tag `gorm:"many2many:publication_tags"` + Pages []Page `gorm:"foreignKey:UserID"` +} + +func (p Publication) SaveInfoToFile(dir string) { + bytes, err := json.Marshal(p) + if err != nil { + panic(err) + } + + file, err := os.Create(filepath.Join(dir, "info.json")) + if err != nil { + panic(err) + } + defer file.Close() + + file.Write(bytes) +} + +func LoadImageFromBytes(bytes []byte) (Page, error) { + + fileMat, err := gocv.IMDecode(bytes, gocv.IMReadColor) + if err != nil { + panic(err) + } + + img, err := LoadImage("", fileMat) + if err != nil { + return Page{}, err + } + + img.B64 = fmt.Sprintf( + "data:%s;base64,%s", + http.DetectContentType(bytes), + base64.StdEncoding.EncodeToString(bytes), + ) + + return img, nil +} + +func LoadImage(path string, img gocv.Mat) (Page, error) { + // img := gocv.IMRead(path, gocv.IMReadColor) + // if img.Empty() { + // log.Panic("cannot read image", path) + // } + keypoints, descriptors := orb.DetectAndCompute(img, nomask) + + if len(keypoints) == 0 { + return Page{}, BlankImage + } + + e := Page{ + Image: img, + Name: filepath.Base(path), + Path: "/" + path, + // Phash: gocv.NewMat(), + // Avghash: gocv.NewMat(), + Keypoints: keypoints, + Descriptors: descriptors, + DescriptorBlob: descriptors.ToBytes(), + // b64: base64.StdEncoding.EncodeToString(img.ToBytes()), + // mime: http.DetectContentType(img.ToBytes()), + } + + // e.hashImage() + + return e, nil +} + +func (e Page) SaveORBtoDisk(path string) { + img, err := e.Descriptors.ToImage() + if err != nil { + panic(err) + } + + cache, err := os.Create(path) + if err != nil { + panic(err) + } + defer cache.Close() + err = png.Encode(cache, img) +} + + +