package main import ( "context" "database/sql" "log" "strings" "sync" "time" "git.rpuzonas.com/rpuzonas/go-rss-aggregator/internal/database" ) func startScraping( db *database.Queries, concurrency int, timeBetweenRequests time.Duration, ) { log.Printf("Scraping on %v goroutines every %s duration", concurrency, timeBetweenRequests) ticker := time.NewTicker(timeBetweenRequests) for ; ; <-ticker.C { feeds, err := db.GetNextFeedToFetch(context.Background(), int64(concurrency)) if err != nil { log.Println("Error fetching feeds:", err) continue } wg := sync.WaitGroup{} for _, feed := range feeds { wg.Add(1) go scrapeFeed(&wg, db, feed) } wg.Wait() } } func scrapeFeed(wg *sync.WaitGroup, db *database.Queries, feed database.Feed) { defer wg.Done() ctx := context.Background() _, err := db.MarkFeedAsFetched(ctx, feed.ID) if err != nil { log.Println("Error makrking feed as fetched:", err) return } rssFeed, err := urlToFeed(feed.Url) if err != nil { log.Println("Error fetching feed:", err) return } for _, item := range rssFeed.Channel.Items { description := sql.NullString{} if item.Description != "" { description.String = item.Description description.Valid = true } publishedAt, err := time.Parse(time.RFC1123Z, item.PubDate) if err != nil { log.Printf("Couldn't parse date %v with err %v\n", item.PubDate, err) continue } now := time.Now().UTC() _, err = db.CreatePost(ctx, database.CreatePostParams{ UpdatedAt: now, CreatedAt: now, Title: item.Title, Description: description, PublishedAt: publishedAt, Url: item.Link, FeedID: feed.ID, }) if err != nil { if strings.Contains(err.Error(), "UNIQUE constraint failed: posts.url") { continue } log.Println("Failed to add post:", err) } } log.Printf("Feed %s collected, %v posts found", feed.Name, len(rssFeed.Channel.Items)) }