1
0
go-rss-aggregator/scraper.go

56 lines
1.2 KiB
Go

package main
import (
"context"
"log"
"sync"
"time"
"git.rpuzonas.com/rpuzonas/go-rss-aggregator/internal/database"
)
func startScraping(
db *database.Queries,
concurrency int,
timeBetweenRequests time.Duration,
) {
log.Printf("Scraping on %v goroutines every %s duration", concurrency, timeBetweenRequests)
ticker := time.NewTicker(timeBetweenRequests)
for ; ; <-ticker.C {
feeds, err := db.GetNextFeedToFetch(context.Background(), int64(concurrency))
if err != nil {
log.Println("Error fetching feeds:", err)
continue
}
wg := sync.WaitGroup{}
for _, feed := range feeds {
wg.Add(1)
go scrapeFeed(&wg, db, feed)
}
wg.Wait()
}
}
func scrapeFeed(wg *sync.WaitGroup, db *database.Queries, feed database.Feed) {
defer wg.Done()
_, err := db.MarkFeedAsFetched(context.Background(), feed.ID)
if err != nil {
log.Println("Error makrking feed as fetched:", err)
return
}
rssFeed, err := urlToFeed(feed.Url)
if err != nil {
log.Println("Error fetching feed:", err)
return
}
for _, item := range rssFeed.Channel.Items {
log.Println("Found post:", item.Title, "on feed", feed.Name)
}
log.Printf("Feed %s collected, %v posts found", feed.Name, len(rssFeed.Channel.Items))
}