56 lines
1.2 KiB
Go
56 lines
1.2 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
|
|
"git.rpuzonas.com/rpuzonas/go-rss-aggregator/internal/database"
|
|
)
|
|
|
|
func startScraping(
|
|
db *database.Queries,
|
|
concurrency int,
|
|
timeBetweenRequests time.Duration,
|
|
) {
|
|
log.Printf("Scraping on %v goroutines every %s duration", concurrency, timeBetweenRequests)
|
|
|
|
ticker := time.NewTicker(timeBetweenRequests)
|
|
for ; ; <-ticker.C {
|
|
feeds, err := db.GetNextFeedToFetch(context.Background(), int64(concurrency))
|
|
if err != nil {
|
|
log.Println("Error fetching feeds:", err)
|
|
continue
|
|
}
|
|
|
|
wg := sync.WaitGroup{}
|
|
for _, feed := range feeds {
|
|
wg.Add(1)
|
|
go scrapeFeed(&wg, db, feed)
|
|
}
|
|
wg.Wait()
|
|
}
|
|
}
|
|
|
|
func scrapeFeed(wg *sync.WaitGroup, db *database.Queries, feed database.Feed) {
|
|
defer wg.Done()
|
|
|
|
_, err := db.MarkFeedAsFetched(context.Background(), feed.ID)
|
|
if err != nil {
|
|
log.Println("Error makrking feed as fetched:", err)
|
|
return
|
|
}
|
|
|
|
rssFeed, err := urlToFeed(feed.Url)
|
|
if err != nil {
|
|
log.Println("Error fetching feed:", err)
|
|
return
|
|
}
|
|
|
|
for _, item := range rssFeed.Channel.Items {
|
|
log.Println("Found post:", item.Title, "on feed", feed.Name)
|
|
}
|
|
log.Printf("Feed %s collected, %v posts found", feed.Name, len(rssFeed.Channel.Items))
|
|
}
|