From a3151074722294f050e8d37fa973c55ed93ffdd2 Mon Sep 17 00:00:00 2001 From: Wayne-Cole <77279425+Wacky404@users.noreply.github.com> Date: Mon, 19 May 2025 22:09:45 -0500 Subject: feat: testing first job --- Makefile | 8 ++++++ cmd/lurchers/main.go | 19 ++++++++++++--- data/configs/indeed.go | 34 ++++++++++++++++++++------ data/jobs.go | 38 ++++++++++++++++------------- data/literature.go | 66 +++++++++++++++++++++++++------------------------- 5 files changed, 103 insertions(+), 62 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5867a0b --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +BINARY=bin/lurchers +SRC=cmd/lurchers/main.go + +run: $(BINARY) + ./$(BINARY) + +$(BINARY): $(SRC) + go build -o $(BINARY) $(SRC) diff --git a/cmd/lurchers/main.go b/cmd/lurchers/main.go index 2a127f8..85db16b 100644 --- a/cmd/lurchers/main.go +++ b/cmd/lurchers/main.go @@ -6,6 +6,8 @@ import ( "log/slog" "time" + "github.com/Wacky404/lurchers/data/configs" + "github.com/Wacky404/lurchers/evade" "github.com/Wacky404/lurchers/util" "github.com/gocolly/colly" "github.com/joho/godotenv" @@ -27,11 +29,20 @@ func main() { slog.Error("error loading .env file", slog.Any("error", err)) } + // our buffed collector for indeed + i := configs.IndeedConfig() + evade.NewUserAgent(ctx, i.C) + proxies := []string{util.GetVar("TOR", "socks5://127.0.0.1:9050")} + err = evade.RotateProxy(i.C, &proxies) + if err != nil { + slog.Error("error configuring the RotateProxy", slog.Any("error", err)) + } + // before making a request print "Visiting..." - c.OnRequest(func(r *colly.Request) { - slog.Info("Visiting", slog.String("Request URL", r.URL.String())) + i.C.OnRequest(func(r *colly.Request) { + slog.Info("Going to website", slog.String("Request URL", r.URL.String())) }) // start scraping on website(s) - c.Visit("https://store.crunchyroll.com/collections/manga-books/?srule=Most-Popular") - c.Wait() + i.C.Visit(i.Data.Posting.Url) + i.C.Wait() } diff --git a/data/configs/indeed.go b/data/configs/indeed.go index 3fad0fd..d1ba4e4 100644 --- a/data/configs/indeed.go +++ b/data/configs/indeed.go @@ -1,20 +1,38 @@ package configs import ( + "log/slog" + "github.com/Wacky404/lurchers/data" + "github.com/gocolly/colly" ) -func indeedConfig() *colly.Collector { - job := new(data.Job) - c := colly.NewCollector() - c.OnHTML("a[id^='job_']", func(e *colly.HTMLElement) { +type CollyCfg struct { + C *colly.Collector + Data *data.Job +} + +func newCollyCfg() *CollyCfg { + return &CollyCfg{ + C: colly.NewCollector(colly.Async(true)), + Data: data.NewJob(), + } +} + +func IndeedConfig() *CollyCfg { + cfg := newCollyCfg() + cfg.Data.Posting.Website = "https://indeed.com/" + // testing this out; will need to build this + cfg.Data.Posting.Url = "https://www.indeed.com/jobs?q=%2B&l=Little+Rock%2C+AR&fromage=7&salaryType=%2440%2C000%2B&radius=5&jlid=68f779f7b0e38e09&rbl=Little+Rock%2C+AR&from=searchOnDesktopSerp&vjk=7a14f77130c03202" + cfg.C.OnHTML("a[id^='job_']", func(e *colly.HTMLElement) { link := e.Attr("href") - c.Visit(e.Request.AbsoluteURL(link)) + cfg.C.Visit(e.Request.AbsoluteURL(link)) }) - c.OnHTML("h1[class^='jobserch-JobInfoHeader-title']", func(e *colly.HTMLElement) { - + cfg.C.OnHTML("h1[class^='jobserch-JobInfoHeader-title']", func(e *colly.HTMLElement) { + jobPosition := e.Text + slog.Info("Job Found", slog.String("Position", jobPosition)) }) - return c + return cfg } diff --git a/data/jobs.go b/data/jobs.go index c47d142..9984cbe 100644 --- a/data/jobs.go +++ b/data/jobs.go @@ -6,27 +6,31 @@ import ( ) type Job struct { - posting JobPosting - details JobDetails + Posting *JobPosting + Details *JobDetails +} + +func NewJob() *Job { + return &Job{Posting: &JobPosting{}, Details: &JobDetails{}} } type JobPosting struct { - website string - url string - location string - company string - position string - jobType string - workShift string - workSetting string - lastModified time.Time + Website string + Url string + Location string + Company string + Position string + JobType string + WorkShift string + WorkSetting string + LastModified time.Time } type JobDetails struct { - skills json.Marshaler - licenses json.Marshaler - certs json.Marshaler - education json.Marshaler - benefits json.Marshaler - fullJobDescription string + Skills json.Marshaler + Licenses json.Marshaler + Certs json.Marshaler + Education json.Marshaler + Benefits json.Marshaler + FullJobDescription string } diff --git a/data/literature.go b/data/literature.go index f984bd0..ff6c4ec 100644 --- a/data/literature.go +++ b/data/literature.go @@ -6,49 +6,49 @@ import ( ) type Author struct { - name string - description string - photo image.Image + Name string + Description string + Photo image.Image } type Category struct { - name string - subCategory SubCategory + Name string + SubCategory SubCategory } type SubCategory struct { - name string + Name string } type Publisher struct { - name string - logo image.Image + Name string + Logo image.Image } type Book struct { - title string - author Author - category Category - series string - description string - cover image.Image - publisher Publisher - year_published uint16 - print_length uint16 - language string - isbn_10 uint32 - isbn_13 uint32 - reviews string - rating float32 - hcPriceUS map[string]uint16 - hcPriceCAN map[string]uint16 - pbPriceUS map[string]uint16 - pbPriceCAN map[string]uint16 - ebPriceUS map[string]uint16 - ebPriceCAN map[string]uint16 - audioPriceUS uint16 - audioPriceCAN uint16 - priceLastModified time.Time - cheapestPrice map[string]uint16 - cheapPriceLastModified time.Time + Title string + Author Author + Category Category + Series string + Description string + Cover image.Image + Publisher Publisher + Year_published uint16 + Print_length uint16 + Language string + Isbn_10 uint32 + Isbn_13 uint32 + Reviews string + Rating float32 + HcPriceUS map[string]uint16 + HcPriceCAN map[string]uint16 + PbPriceUS map[string]uint16 + PbPriceCAN map[string]uint16 + EbPriceUS map[string]uint16 + EbPriceCAN map[string]uint16 + AudioPriceUS uint16 + AudioPriceCAN uint16 + PriceLastModified time.Time + CheapestPrice map[string]uint16 + CheapPriceLastModified time.Time } -- cgit v1.3-3-g829e