pkg/rss/rss.go
package rss
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"strings"
"text/tabwriter"
"time"
)
// Parse RSS or Atom data.
func Parse(data []byte) (*Feed, error) {
if strings.Contains(string(data), "<rss") {
if debug {
fmt.Println("[i] Parsing as RSS 2.0")
}
return parseRSS2(data)
} else if strings.Contains(string(data), "xmlns=\"http://purl.org/rss/1.0/\"") {
if debug {
fmt.Println("[i] Parsing as RSS 1.0")
}
return parseRSS1(data)
} else {
if debug {
fmt.Println("[i] Parsing as Atom")
}
return parseAtom(data)
}
}
// A FetchFunc is a function that fetches a feed for given URL.
type FetchFunc func(url string) (resp *http.Response, err error)
// DefaultFetchFunc uses http.DefaultClient to fetch a feed.
var DefaultFetchFunc = func(url string) (resp *http.Response, err error) {
client := http.DefaultClient
return client.Get(url)
}
// Fetch downloads and parses the RSS feed at the given URL
func Fetch(url string) (*Feed, error) {
return FetchByFunc(DefaultFetchFunc, url)
}
// FetchByClient uses a http.Client to fetch a URL.
func FetchByClient(url string, client *http.Client) (*Feed, error) {
fetchFunc := func(url string) (resp *http.Response, err error) {
return client.Get(url)
}
return FetchByFunc(fetchFunc, url)
}
// FetchByFunc uses a func to fetch a URL.
func FetchByFunc(fetchFunc FetchFunc, url string) (*Feed, error) {
resp, err := fetchFunc(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
out, err := Parse(body)
if err != nil {
return nil, err
}
if out.Link == "" {
out.Link = url
}
out.UpdateURL = url
out.FetchFunc = fetchFunc
return out, nil
}
// Feed is the top-level structure.
type Feed struct {
Nickname string `json:"nickname"` // This is not set by the package, but could be helpful.
Title string `json:"title"`
Description string `json:"description"`
Link string `json:"link"` // Link to the creator's website.
UpdateURL string `json:"updateurl"` // URL of the feed itself.
HubURL string `json:"huburl"` // URL of the WebSub hub
Image *Image `json:"image"` // Feed icon.
Items []*Item `json:"items"`
ItemMap map[string]struct{} `json:"itemmap"` // Used in checking whether an item has been seen before.
Refresh time.Time `json:"refresh"` // Earliest time this feed should next be checked.
Unread uint32 `json:"unread"` // Number of unread items. Used by aggregators.
FetchFunc FetchFunc `json:"-"`
}
type refreshError string
var _ net.Error = refreshError("")
func (r refreshError) Error() string {
return string(r)
}
func (r refreshError) Timeout() bool {
return false
}
func (r refreshError) Temporary() bool {
return true
}
var errUpdateNotReady refreshError = "not ready to update: too soon to refresh"
// Update fetches any new items and updates f.
func (f *Feed) Update() error {
if f.FetchFunc == nil {
f.FetchFunc = DefaultFetchFunc
}
return f.UpdateByFunc(f.FetchFunc)
}
// UpdateWithContent parses content and updates f
func (f *Feed) UpdateWithContent(content []byte) error {
feed, err := Parse(content)
f.UpdateByFeed(feed)
return err
}
// UpdateByFeed parses content and updates f
func (f *Feed) UpdateByFeed(update *Feed) error {
// Check that we don't update too often.
if f.Refresh.After(time.Now()) {
return errUpdateNotReady
}
if f.UpdateURL == "" {
return errors.New("feed has no URL")
}
if f.ItemMap == nil {
f.ItemMap = make(map[string]struct{})
for _, item := range f.Items {
if _, ok := f.ItemMap[item.ID]; !ok {
f.ItemMap[item.ID] = struct{}{}
}
}
}
f.Refresh = update.Refresh
f.Title = update.Title
f.Description = update.Description
for _, item := range update.Items {
if _, ok := f.ItemMap[item.ID]; !ok {
f.Items = append(f.Items, item)
f.ItemMap[item.ID] = struct{}{}
f.Unread++
}
}
return nil
}
// UpdateByFunc uses a func to update f.
func (f *Feed) UpdateByFunc(fetchFunc FetchFunc) error {
// Check that we don't update too often.
if f.Refresh.After(time.Now()) {
return errUpdateNotReady
}
if f.UpdateURL == "" {
return errors.New("feed has no URL")
}
if f.ItemMap == nil {
f.ItemMap = make(map[string]struct{})
for _, item := range f.Items {
if _, ok := f.ItemMap[item.ID]; !ok {
f.ItemMap[item.ID] = struct{}{}
}
}
}
update, err := FetchByFunc(fetchFunc, f.UpdateURL)
if err != nil {
return err
}
f.Refresh = update.Refresh
f.Title = update.Title
f.Description = update.Description
for _, item := range update.Items {
if _, ok := f.ItemMap[item.ID]; !ok {
f.Items = append(f.Items, item)
f.ItemMap[item.ID] = struct{}{}
f.Unread++
}
}
return nil
}
func (f *Feed) String() string {
buf := new(bytes.Buffer)
if debug {
w := tabwriter.NewWriter(buf, 0, 8, 0, '\t', tabwriter.StripEscape)
fmt.Fprintf(w, "Feed {\n")
fmt.Fprintf(w, "\xff\t\xffNickname:\t%q\n", f.Nickname)
fmt.Fprintf(w, "\xff\t\xffTitle:\t%q\n", f.Title)
fmt.Fprintf(w, "\xff\t\xffDescription:\t%q\n", f.Description)
fmt.Fprintf(w, "\xff\t\xffLink:\t%q\n", f.Link)
fmt.Fprintf(w, "\xff\t\xffUpdateURL:\t%q\n", f.UpdateURL)
fmt.Fprintf(w, "\xff\t\xffImage:\t%q (%s)\n", f.Image.Title, f.Image.URL)
fmt.Fprintf(w, "\xff\t\xffRefresh:\t%s\n", f.Refresh.Format(DATE))
fmt.Fprintf(w, "\xff\t\xffUnread:\t%d\n", f.Unread)
fmt.Fprintf(w, "\xff\t\xffItems:\t(%d) {\n", len(f.Items))
for _, item := range f.Items {
fmt.Fprintf(w, "%s\n", item.Format(2))
}
fmt.Fprintf(w, "\xff\t\xff}\n}\n")
w.Flush()
} else {
w := buf
fmt.Fprintf(w, "Feed %q\n", f.Title)
fmt.Fprintf(w, "\t%q\n", f.Description)
fmt.Fprintf(w, "\t%q\n", f.Link)
fmt.Fprintf(w, "\t%s\n", f.Image)
fmt.Fprintf(w, "\tRefresh at %s\n", f.Refresh.Format(DATE))
fmt.Fprintf(w, "\tUnread: %d\n", f.Unread)
fmt.Fprintf(w, "\tItems:\n")
for _, item := range f.Items {
fmt.Fprintf(w, "\t%s\n", item.Format(2))
}
}
return buf.String()
}
// Item represents a single story.
type Item struct {
Title string `json:"title"`
Summary string `json:"summary"`
Content string `json:"content"`
Category string `json:"category"`
Link string `json:"link"`
Date time.Time `json:"date"`
DateValid bool
ID string `json:"id"`
Enclosures []*Enclosure `json:"enclosures"`
Read bool `json:"read"`
}
func (i *Item) String() string {
return i.Format(0)
}
// Format formats an item using tabs.
func (i *Item) Format(indent int) string {
buf := new(bytes.Buffer)
single := strings.Repeat("\t", indent)
double := single + "\t"
if debug {
w := tabwriter.NewWriter(buf, 0, 8, 0, '\t', tabwriter.StripEscape)
fmt.Fprintf(w, "\xff%s\xffItem {\n", single)
fmt.Fprintf(w, "\xff%s\xffTitle:\t%q\n", double, i.Title)
fmt.Fprintf(w, "\xff%s\xffSummary:\t%q\n", double, i.Summary)
fmt.Fprintf(w, "\xff%s\xffCategory:\t%q\n", double, i.Category)
fmt.Fprintf(w, "\xff%s\xffLink:\t%s\n", double, i.Link)
fmt.Fprintf(w, "\xff%s\xffDate:\t%s\n", double, i.Date.Format(DATE))
fmt.Fprintf(w, "\xff%s\xffID:\t%s\n", double, i.ID)
fmt.Fprintf(w, "\xff%s\xffRead:\t%v\n", double, i.Read)
fmt.Fprintf(w, "\xff%s\xffContent:\t%q\n", double, i.Content)
fmt.Fprintf(w, "\xff%s\xff}\n", single)
w.Flush()
} else {
w := buf
fmt.Fprintf(w, "%sItem %q\n", single, i.Title)
fmt.Fprintf(w, "%s%q\n", double, i.Link)
fmt.Fprintf(w, "%s%s\n", double, i.Date.Format(DATE))
fmt.Fprintf(w, "%s%q\n", double, i.ID)
fmt.Fprintf(w, "%sRead: %v\n", double, i.Read)
fmt.Fprintf(w, "%s%q\n", double, i.Content)
}
return buf.String()
}
// Enclosure maps an enclosure.
type Enclosure struct {
URL string `json:"url"`
Type string `json:"type"`
Length uint `json:"length"`
}
// Get uses http.Get to fetch an enclosure.
func (e *Enclosure) Get() (io.ReadCloser, error) {
if e == nil || e.URL == "" {
return nil, errors.New("No enclosure")
}
res, err := http.Get(e.URL)
if err != nil {
return nil, err
}
return res.Body, nil
}
// Image maps an image.
type Image struct {
Title string `json:"title"`
URL string `json:"url"`
Height uint32 `json:"height"`
Width uint32 `json:"width"`
}
// Get uses http.Get to fetch an image.
func (i *Image) Get() (io.ReadCloser, error) {
if i == nil || i.URL == "" {
return nil, errors.New("No image")
}
res, err := http.Get(i.URL)
if err != nil {
return nil, err
}
return res.Body, nil
}
func (i *Image) String() string {
return fmt.Sprintf("Image %q", i.Title)
}