package main import ( "errors" "fmt" "io" "net/http" "regexp" "sauce/shared" "strconv" "strings" // "sync" "time" "gorm.io/driver/sqlite" "gorm.io/gorm" "github.com/PuerkitoBio/goquery" "github.com/mattn/go-sqlite3" // ??? ) var categoryMap = map[string]shared.Category { "doujinshi": shared.Doujinshi, "manga": shared.Doujinshi, } var languageMap = map[string]shared.Language { "japanese": shared.Jp, "chinese": shared.Cn, "english": shared.En, } type extractor interface { extract(...string) error discover(string) error } type nhentai struct { db *gorm.DB } type Queue struct { ID uint `gorm:"primarykey"` CreatedAt time.Time UpdatedAt time.Time Url string `gorm:"unique"` } var extractGalleryUrl = regexp.MustCompile(`https?://t\d?\.nhentai\.net/galleries/(\d+)/.+\.(\w+)`) func (n nhentai) discover(url string) (error) { root := "https://nhentai.net" resp, err := http.Get(url) if err != nil { return err } defer resp.Body.Close() // Load the HTML document doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { panic(err) } doc.Find(".cover").Each(func(i int, s *goquery.Selection) { href, ok := s.Attr("href") if !ok { panic(ok) } url := root + href fmt.Println(url) var source shared.Publication err = n.db.Where("source = ?", url).First(&source).Error if err != nil && err != gorm.ErrRecordNotFound { panic(err) } err = n.db.Create(&Queue{ Url: url}).Error if _, ok := err.(sqlite3.Error); ok { // fixme return } else if err != nil { panic(err) } }) return nil } func (n nhentai) download(url, name string) (shared.Page, error) { resp1, err := http.Get(url) if err != nil { return shared.Page{}, err } if resp1.StatusCode >= 400 && resp1.StatusCode < 500 { return shared.Page{}, fmt.Errorf("status code: %s", resp1.Status) } defer resp1.Body.Close() blob, err := io.ReadAll(resp1.Body) if err != nil { return shared.Page{}, err } img, err := shared.LoadImageFromBytes(blob) if err != nil { return shared.Page{}, err } img.Url = url img.Name = name // publication.Pages = append(publication.Pages, img) return img, nil } func (n nhentai) extract(url string) error { fmt.Println("extracting:", url) // url := root + strings.Join(path, resp, err := http.Get(url) if err != nil { return err } defer resp.Body.Close() // Load the HTML document doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { panic(err) } publication := shared.Publication { Title: doc.Find(".title .pretty").Text(), Source: url, Host: "nhentai", } if publication.Title == "" { return fmt.Errorf("missing title") } coverUrl, ok := doc.Find("#cover > a > img").Attr("data-src") if !ok { panic(coverUrl) } var pagesTotal int // galleryUrl := extractGalleryUrl.FindStringSubmatch(coverUrl) // fmt.Println(galleryUrl, coverUrl) // galleryId := galleryUrl[1] // galleryExt := galleryUrl[2] doc.Find(".tag-container").Each(func(i int, s *goquery.Selection) { tag := strings.TrimSpace(s.Contents().First().Text()) switch tag { case "Artists:": field := s.Find(".name").First().Text() artists := strings.Split(field, " | ") var authors []shared.Author for _, name := range artists { var author shared.Author err := n.db.Where("name = ?", name).First(&author).Error if err == gorm.ErrRecordNotFound { author = shared.Author{ Name: name, } } else if err != nil { panic(err) } authors = append(authors, author) } publication.Authors = append(publication.Authors, authors...) case "Tags:": s.Find("a").Each(func(i int, s *goquery.Selection) { name := s.Children().First().Text() var tag shared.Tag err := n.db.Where("name = ?", name).First(&tag).Error if err == gorm.ErrRecordNotFound { tag = shared.Tag{ Name: name, } } else if err != nil { panic(err) } publication.Tags = append(publication.Tags, tag) }) case "Categories:": s.Find("a").Each(func(i int, s *goquery.Selection) { category, ok := categoryMap[s.Children().First().Text()] if !ok { panic(category) } publication.Category = &category }) case "Languages:": s.Find("a").Each(func(i int, s *goquery.Selection) { lang, ok := languageMap[s.Children().First().Text()] if !ok { lang = shared.None } publication.Language = &lang }) case "Pages:": s.Find("a .name").Each(func(i int, s *goquery.Selection) { pagesTotal, err = strconv.Atoi(s.Text()) if err != nil { panic(err) } }) } }) // var wg sync.WaitGroup // var mut sync.Mutex // errors := make(chan error, pagesTotal) for pageNumber := range pagesTotal { url := fmt.Sprintf("%s%d", url, pageNumber+1) // fmt.Println("source:", url) resp2, err := http.Get(url) if err != nil { return err } if resp2.StatusCode != 200 { panic(resp2.Status) } doc2, err := goquery.NewDocumentFromReader(resp2.Body) if err != nil { return err } src, ok := doc2.Find("#image-container > a > img").Attr("src") if !ok { panic(src) } // url := fmt.Sprintf("https://i.nhentai.net/galleries/%s/%d.%s", galleryId, pageNumber+1, galleryExt) outputName := fmt.Sprintf("%d.jpg", pageNumber+1) fmt.Println("requested:", src) page, err := n.download(src, outputName) if err != nil { if errors.Is(err, shared.BlankImage) { fmt.Println("blank image:", src) continue } return err } fmt.Println("finished:", src) page.Order = pageNumber publication.Pages = append(publication.Pages, page) // time.Sleep(time.Second) // good guying // wg.Add(1) // go func() { // defer wg.Done() // // fmt.Println("requested:", url) // page, err := n.download(url, outputName) // if err != nil { // errors <- err // return // } // page.Order = order // // mut.Lock() // publication.Pages = append(publication.Pages, page) // mut.Unlock() // // fmt.Println("finished:", url) // errors <- nil // }() } // wg.Wait() // close(errors) // for err := range errors { // if err == shared.BlankImage { // continue // } // if err != nil { // return err // } // } err = n.db.Debug().Create(&publication).Error if err != nil { panic(err) } return nil } func main() { db, err := gorm.Open(sqlite.Open("test.db"), &gorm.Config{}) if err != nil { panic(err) } db.AutoMigrate(&Queue{}) db.AutoMigrate(&shared.Author{}, &shared.Page{}, &shared.Tag{}, &shared.Publication{}) // nhentai{db: db}.discover("https://nhentai.net/artist/mda-starou/") // return for { var dequeue Queue tx := db.Begin() nhentai := nhentai{ db: tx } err = tx.First(&dequeue).Error if err == gorm.ErrRecordNotFound { break } if err != nil { panic(err) } err = nhentai.extract(dequeue.Url) if err != nil { panic(err) } err = tx.Delete(&dequeue).Error if err != nil { panic(err) } tx.Commit() } }